diff --git a/src/main/cpp/CMakeLists.txt b/src/main/cpp/CMakeLists.txt index 14edae0ec0..7db20ceff9 100644 --- a/src/main/cpp/CMakeLists.txt +++ b/src/main/cpp/CMakeLists.txt @@ -195,6 +195,7 @@ add_library( src/HashJni.cpp src/HistogramJni.cpp src/HostTableJni.cpp + src/HLLPPJni.cpp src/JSONUtilsJni.cpp src/NativeParquetJni.cpp src/ParseURIJni.cpp @@ -203,6 +204,7 @@ add_library( src/SparkResourceAdaptorJni.cpp src/SubStringIndexJni.cpp src/ZOrderJni.cpp + src/HLLPP.cu src/bloom_filter.cu src/case_when.cu src/cast_decimal_to_string.cu diff --git a/src/main/cpp/src/HLLPP.cu b/src/main/cpp/src/HLLPP.cu new file mode 100644 index 0000000000..75734d749a --- /dev/null +++ b/src/main/cpp/src/HLLPP.cu @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "HLLPP.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +namespace spark_rapids_jni { + +namespace { + +// The number of bits required by register value. Register value stores num of zeros. +// XXHash64 value is 64 bits, it's safe to use 6 bits to store a register value. +constexpr int REGISTER_VALUE_BITS = 6; + +// MASK binary 6 bits: 111111 +constexpr uint64_t MASK = (1L << REGISTER_VALUE_BITS) - 1L; + +// One long stores 10 register values +constexpr int REGISTERS_PER_LONG = 64 / REGISTER_VALUE_BITS; + +__device__ inline int get_register_value(int64_t const long_10_registers, int reg_idx) +{ + int64_t shift_mask = MASK << (REGISTER_VALUE_BITS * reg_idx); + int64_t v = (long_10_registers & shift_mask) >> (REGISTER_VALUE_BITS * reg_idx); + return static_cast(v); +} + +struct estimate_fn { + cudf::device_span sketch_longs; + int const precision; + int64_t* const out; + + __device__ void operator()(cudf::size_type const idx) const + { + auto const num_regs = 1ull << precision; + double sum = 0; + int zeroes = 0; + + for (auto reg_idx = 0; reg_idx < num_regs; ++reg_idx) { + // each long contains 10 register values + int long_col_idx = reg_idx / REGISTERS_PER_LONG; + int reg_idx_in_long = reg_idx % REGISTERS_PER_LONG; + int reg = get_register_value(sketch_longs[long_col_idx][idx], reg_idx_in_long); + sum += double{1} / static_cast(1ull << reg); + zeroes += reg == 0; + } + + auto const finalize = cuco::hyperloglog_ns::detail::finalizer(precision); + out[idx] = finalize(sum, zeroes); + } +}; + +} // end anonymous namespace + +std::unique_ptr estimate_from_hll_sketches(cudf::column_view const& input, + int precision, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_EXPECTS(pricision >= 4 && pricision <= 18, "HLL++ requires pricision in range: [4, 18]"); + auto const input_iter = cudf::detail::make_counting_transform_iterator( + 0, [&](int i) { return input.child(i).begin(); }); + auto input_cols = std::vector(input_iter, input_iter + input.num_children()); + auto d_inputs = cudf::detail::make_device_uvector_async(input_cols, stream, mr); + auto result = cudf::make_numeric_column( + cudf::data_type{cudf::type_id::INT64}, input.size(), cudf::mask_state::ALL_VALID, stream); + // evaluate from struct + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + input.size(), + estimate_fn{d_inputs, precision, result->mutable_view().data()}); + return result; +} + +} // namespace spark_rapids_jni diff --git a/src/main/cpp/src/HLLPP.hpp b/src/main/cpp/src/HLLPP.hpp new file mode 100644 index 0000000000..69e0b237e5 --- /dev/null +++ b/src/main/cpp/src/HLLPP.hpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +namespace spark_rapids_jni { + +std::unique_ptr estimate_from_hll_sketches( + cudf::column_view const& input, + int precision, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + +} // namespace spark_rapids_jni diff --git a/src/main/cpp/src/HLLPPJni.cpp b/src/main/cpp/src/HLLPPJni.cpp new file mode 100644 index 0000000000..581af90a90 --- /dev/null +++ b/src/main/cpp/src/HLLPPJni.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "HLLPP.hpp" +#include "cudf_jni_apis.hpp" + +extern "C" { + +JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_HLLPP_estimateDistinctValueFromSketches( + JNIEnv* env, jclass, jlong sketches, jint precision) +{ + JNI_NULL_CHECK(env, sketches, "Sketch column is null", 0); + try { + cudf::jni::auto_set_device(env); + auto const sketch_view = reinterpret_cast(sketches); + return cudf::jni::ptr_as_jlong( + spark_rapids_jni::estimate_from_hll_sketches(*sketch_view, precision).release()); + } + CATCH_STD(env, 0); +} +} diff --git a/src/main/java/com/nvidia/spark/rapids/jni/HLLPP.java b/src/main/java/com/nvidia/spark/rapids/jni/HLLPP.java new file mode 100644 index 0000000000..1be2c80512 --- /dev/null +++ b/src/main/java/com/nvidia/spark/rapids/jni/HLLPP.java @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.jni; + +import ai.rapids.cudf.ColumnVector; +import ai.rapids.cudf.ColumnView; +import ai.rapids.cudf.NativeDepsLoader; + +/** + * HyperLogLogPlusPlus + */ +public class HLLPP { + static { + NativeDepsLoader.loadNativeDeps(); + } + + /** + * Compute the approximate count distinct value from sketch values. + *

+ * The input sketch values must be given in the format `LIST`. + * + * @param input The sketch column which constains `LIST values. + * @param precision The num of bits for addressing. + * @return A INT64 column with each value indicates the approximate count distinct value. + */ + public static ColumnVector estimateDistinctValueFromSketches(ColumnView input, int precision) { + return new ColumnVector(estimateDistinctValueFromSketches(input.getNativeView(), precision)); + } + + private static native long estimateDistinctValueFromSketches(long inputHandle, int precision); +} diff --git a/src/test/java/com/nvidia/spark/rapids/jni/HLLPPTest.java b/src/test/java/com/nvidia/spark/rapids/jni/HLLPPTest.java new file mode 100644 index 0000000000..c14b565313 --- /dev/null +++ b/src/test/java/com/nvidia/spark/rapids/jni/HLLPPTest.java @@ -0,0 +1,37 @@ +/* +* Copyright (c) 2024, NVIDIA CORPORATION. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package com.nvidia.spark.rapids.jni; + +import ai.rapids.cudf.GroupByAggregation; +import ai.rapids.cudf.Table; + +import org.junit.jupiter.api.Test; + + +public class HLLPPTest { + + @Test + void testGroupByHLL() { + // A trivial test: + try (Table input = new Table.TestBuilder().column(1, 2, 3, 1, 2, 2, 1, 3, 3, 2) + .column(0, 1, -2, 3, -4, -5, -6, 7, -8, 9) + .build()){ + input.groupBy(0).aggregate(GroupByAggregation.HLLPP(0) + .onColumn(1)); + } + } +}