Adding format_float kernel (#1572)

* wip Signed-off-by: Haoyang Li <[email protected]> * wip Signed-off-by: Haoyang Li <[email protected]> * Add float to string kernel Signed-off-by: Haoyang Li <[email protected]> * Update src/main/cpp/src/cast_float_to_string.cu Co-authored-by: Mike Wilson <[email protected]> * Update src/main/cpp/src/cast_float_to_string.cu Co-authored-by: Mike Wilson <[email protected]> * address comments and use different precision for float Signed-off-by: Haoyang Li <[email protected]> * a runnable format_number demo Signed-off-by: Haoyang Li <[email protected]> * rewrite the solution with ryu Signed-off-by: Haoyang Li <[email protected]> * update license Signed-off-by: Haoyang Li <[email protected]> * clean up Signed-off-by: Haoyang Li <[email protected]> * Split ftos_converter out Signed-off-by: Haoyang Li <[email protected]> * clean up Signed-off-by: Haoyang Li <[email protected]> * resolve cudf conflicts Signed-off-by: Haoyang Li <[email protected]> * resolve cudf conflicts Signed-off-by: Haoyang Li <[email protected]> * resolve cudf conflicts Signed-off-by: Haoyang Li <[email protected]> * resolve cudf conflicts Signed-off-by: Haoyang Li <[email protected]> * remove cudf changes Signed-off-by: Haoyang Li <[email protected]> * remove cudf changes Signed-off-by: Haoyang Li <[email protected]> * add ryu Signed-off-by: Haoyang Li <[email protected]> * Add copyright and notice Signed-off-by: Haoyang Li <[email protected]> * Fix copyrights and license Signed-off-by: Haoyang Li <[email protected]> * cudf conflict resolve Signed-off-by: Haoyang Li <[email protected]> * Add format_float kernel Signed-off-by: Haoyang Li <[email protected]> * clean up Signed-off-by: Haoyang Li <[email protected]> * Fixed two bugs Signed-off-by: Haoyang Li <[email protected]> * Added a failed case back Signed-off-by: Haoyang Li <[email protected]> * Refactor Signed-off-by: Haoyang Li <[email protected]> * Handle d=0 case Signed-off-by: Haoyang Li <[email protected]> * Add nv apache license to ftos_converter Signed-off-by: Haoyang Li <[email protected]> * Add nv apache license to ftos_converter Signed-off-by: Haoyang Li <[email protected]> * Fix an rounding bug Signed-off-by: Haoyang Li <[email protected]> * Update src/main/cpp/src/ftos_converter.cu Co-authored-by: Jason Lowe <[email protected]> * address some comments Signed-off-by: Haoyang Li <[email protected]> * cudf conflict Signed-off-by: Haoyang Li <[email protected]> * Update src/main/cpp/src/cast_float_to_string.cu Co-authored-by: Nghia Truong <[email protected]> * Make it runable again Signed-off-by: Haoyang Li <[email protected]> * address some comments Signed-off-by: Haoyang Li <[email protected]> * addressed comments Signed-off-by: Haoyang Li <[email protected]> * Address comments Signed-off-by: Haoyang Li <[email protected]> * clang format Signed-off-by: Haoyang Li <[email protected]> * Address comments Signed-off-by: Haoyang Li <[email protected]> * Address comments Signed-off-by: Haoyang Li <[email protected]> * address comments Signed-off-by: Haoyang Li <[email protected]> * fix build after upmerge Signed-off-by: Haoyang Li <[email protected]> * move inf/nan replacement to kernel Signed-off-by: Haoyang Li <[email protected]> * Apply suggestions from code review Co-authored-by: Mike Wilson <[email protected]> Co-authored-by: Nghia Truong <[email protected]> * address comments Signed-off-by: Haoyang Li <[email protected]> * Apply suggestions from code review Co-authored-by: Nghia Truong <[email protected]> * address comments Signed-off-by: Haoyang Li <[email protected]> * cudf Signed-off-by: Haoyang Li <[email protected]> * cudf Signed-off-by: Haoyang Li <[email protected]> * format Signed-off-by: Haoyang Li <[email protected]> * cudf reset Signed-off-by: Haoyang Li <[email protected]> * Apply suggestions from code review Co-authored-by: Mike Wilson <[email protected]> --------- Signed-off-by: Haoyang Li <[email protected]> Co-authored-by: Mike Wilson <[email protected]> Co-authored-by: Jason Lowe <[email protected]> Co-authored-by: Nghia Truong <[email protected]>
NVIDIA · Dec 18, 2023 · 48d2736 · 48d2736
1 parent 6bdc68b
commit 48d2736
Show file tree

Hide file tree

Showing 10 changed files with 677 additions and 38 deletions.
diff --git a/src/main/cpp/CMakeLists.txt b/src/main/cpp/CMakeLists.txt
@@ -164,6 +164,7 @@ add_library(
   src/ZOrderJni.cpp
   src/bloom_filter.cu
   src/cast_decimal_to_string.cu
+  src/format_float.cu
   src/cast_float_to_string.cu
   src/cast_string.cu
   src/cast_string_to_float.cu

diff --git a/src/main/cpp/src/CastStringJni.cpp b/src/main/cpp/src/CastStringJni.cpp
@@ -125,6 +125,21 @@ JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromFloat(J
   CATCH_CAST_EXCEPTION(env, 0);
 }
 
+JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromFloatWithFormat(
+  JNIEnv* env, jclass, jlong input_column, jint digits)
+{
+  JNI_NULL_CHECK(env, input_column, "input column is null", 0);
+
+  try {
+    cudf::jni::auto_set_device(env);
+
+    auto const& cv = *reinterpret_cast<cudf::column_view const*>(input_column);
+    return cudf::jni::release_as_jlong(
+      spark_rapids_jni::format_float(cv, digits, cudf::get_default_stream()));
+  }
+  CATCH_CAST_EXCEPTION(env, 0);
+}
+
 JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromDecimal(JNIEnv* env,
                                                                                  jclass,
                                                                                  jlong input_column)

diff --git a/src/main/cpp/src/cast_string.hpp b/src/main/cpp/src/cast_string.hpp
@@ -115,6 +115,12 @@ std::unique_ptr<cudf::column> string_to_float(
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+std::unique_ptr<cudf::column> format_float(
+  cudf::column_view const& input,
+  int const digits,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 std::unique_ptr<cudf::column> float_to_string(
   cudf::column_view const& input,
   rmm::cuda_stream_view stream,

diff --git a/src/main/cpp/src/format_float.cu b/src/main/cpp/src/format_float.cu
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cast_string.hpp"
+#include "ftos_converter.cuh"
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/detail/null_mask.hpp>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/strings/detail/strings_children.cuh>
+#include <cudf/utilities/type_dispatcher.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
+
+namespace spark_rapids_jni {
+
+namespace detail {
+namespace {
+
+template <typename FloatType>
+struct format_float_fn {
+  cudf::column_device_view d_floats;
+  int digits;
+  cudf::size_type* d_offsets;
+  char* d_chars;
+
+  __device__ cudf::size_type compute_output_size(FloatType const value) const
+  {
+    bool constexpr is_float = std::is_same_v<FloatType, float>;
+    return static_cast<cudf::size_type>(
+      ftos_converter::compute_format_float_size(static_cast<double>(value), digits, is_float));
+  }
+
+  __device__ void format_float(cudf::size_type const idx) const
+  {
+    auto const value        = d_floats.element<FloatType>(idx);
+    bool constexpr is_float = std::is_same_v<FloatType, float>;
+    auto const output       = d_chars + d_offsets[idx];
+    ftos_converter::format_float(static_cast<double>(value), digits, is_float, output);
+  }
+
+  __device__ void operator()(cudf::size_type const idx) const
+  {
+    if (d_floats.is_null(idx)) {
+      if (d_chars == nullptr) { d_offsets[idx] = 0; }
+      return;
+    }
+    if (d_chars != nullptr) {
+      format_float(idx);
+    } else {
+      d_offsets[idx] = compute_output_size(d_floats.element<FloatType>(idx));
+    }
+  }
+};
+
+/**
+ * @brief This dispatch method is for converting floats into strings.
+ *
+ * The template function declaration ensures only float types are allowed.
+ */
+struct dispatch_format_float_fn {
+  template <typename FloatType, CUDF_ENABLE_IF(std::is_floating_point_v<FloatType>)>
+  std::unique_ptr<cudf::column> operator()(cudf::column_view const& floats,
+                                           int const digits,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::mr::device_memory_resource* mr) const
+  {
+    auto const strings_count = floats.size();
+    if (strings_count == 0) { return cudf::make_empty_column(cudf::type_id::STRING); }
+
+    auto const input_ptr = cudf::column_device_view::create(floats, stream);
+
+    auto [offsets, chars] = cudf::strings::detail::make_strings_children(
+      format_float_fn<FloatType>{*input_ptr, digits}, strings_count, stream, mr);
+
+    return cudf::make_strings_column(strings_count,
+                                     std::move(offsets),
+                                     std::move(chars),
+                                     floats.null_count(),
+                                     cudf::detail::copy_bitmask(floats, stream, mr));
+  }
+
+  // non-float types throw an exception
+  template <typename T, CUDF_ENABLE_IF(not std::is_floating_point_v<T>)>
+  std::unique_ptr<cudf::column> operator()(cudf::column_view const&,
+                                           int const,
+                                           rmm::cuda_stream_view,
+                                           rmm::mr::device_memory_resource*) const
+  {
+    CUDF_FAIL("Values for format_float function must be a float type.");
+  }
+};
+
+}  // namespace
+
+// This will convert all float column types into a strings column.
+std::unique_ptr<cudf::column> format_float(cudf::column_view const& floats,
+                                           int const digits,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::mr::device_memory_resource* mr)
+{
+  return type_dispatcher(floats.type(), dispatch_format_float_fn{}, floats, digits, stream, mr);
+}
+
+}  // namespace detail
+
+// external API
+std::unique_ptr<cudf::column> format_float(cudf::column_view const& floats,
+                                           int const digits,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::format_float(floats, digits, stream, mr);
+}
+
+}  // namespace spark_rapids_jni