Skip to content

Commit

Permalink
Adding format_float kernel (#1572)
Browse files Browse the repository at this point in the history
* wip

Signed-off-by: Haoyang Li <[email protected]>

* wip

Signed-off-by: Haoyang Li <[email protected]>

* Add float to string kernel

Signed-off-by: Haoyang Li <[email protected]>

* Update src/main/cpp/src/cast_float_to_string.cu

Co-authored-by: Mike Wilson <[email protected]>

* Update src/main/cpp/src/cast_float_to_string.cu

Co-authored-by: Mike Wilson <[email protected]>

* address comments and use different precision for float

Signed-off-by: Haoyang Li <[email protected]>

* a runnable format_number demo

Signed-off-by: Haoyang Li <[email protected]>

* rewrite the solution with ryu

Signed-off-by: Haoyang Li <[email protected]>

* update license

Signed-off-by: Haoyang Li <[email protected]>

* clean up

Signed-off-by: Haoyang Li <[email protected]>

* Split ftos_converter out

Signed-off-by: Haoyang Li <[email protected]>

* clean up

Signed-off-by: Haoyang Li <[email protected]>

* resolve cudf conflicts

Signed-off-by: Haoyang Li <[email protected]>

* resolve cudf conflicts

Signed-off-by: Haoyang Li <[email protected]>

* resolve cudf conflicts

Signed-off-by: Haoyang Li <[email protected]>

* resolve cudf conflicts

Signed-off-by: Haoyang Li <[email protected]>

* remove cudf changes

Signed-off-by: Haoyang Li <[email protected]>

* remove cudf changes

Signed-off-by: Haoyang Li <[email protected]>

* add ryu

Signed-off-by: Haoyang Li <[email protected]>

* Add copyright and notice

Signed-off-by: Haoyang Li <[email protected]>

* Fix copyrights and license

Signed-off-by: Haoyang Li <[email protected]>

* cudf conflict resolve

Signed-off-by: Haoyang Li <[email protected]>

* Add format_float kernel

Signed-off-by: Haoyang Li <[email protected]>

* clean up

Signed-off-by: Haoyang Li <[email protected]>

* Fixed two bugs

Signed-off-by: Haoyang Li <[email protected]>

* Added a failed case back

Signed-off-by: Haoyang Li <[email protected]>

* Refactor

Signed-off-by: Haoyang Li <[email protected]>

* Handle d=0 case

Signed-off-by: Haoyang Li <[email protected]>

* Add nv apache license to ftos_converter

Signed-off-by: Haoyang Li <[email protected]>

* Add nv apache license to ftos_converter

Signed-off-by: Haoyang Li <[email protected]>

* Fix an rounding bug

Signed-off-by: Haoyang Li <[email protected]>

* Update src/main/cpp/src/ftos_converter.cu

Co-authored-by: Jason Lowe <[email protected]>

* address some comments

Signed-off-by: Haoyang Li <[email protected]>

* cudf conflict

Signed-off-by: Haoyang Li <[email protected]>

* Update src/main/cpp/src/cast_float_to_string.cu

Co-authored-by: Nghia Truong <[email protected]>

* Make it runable again

Signed-off-by: Haoyang Li <[email protected]>

* address some comments

Signed-off-by: Haoyang Li <[email protected]>

* addressed comments

Signed-off-by: Haoyang Li <[email protected]>

* Address comments

Signed-off-by: Haoyang Li <[email protected]>

* clang format

Signed-off-by: Haoyang Li <[email protected]>

* Address comments

Signed-off-by: Haoyang Li <[email protected]>

* Address comments

Signed-off-by: Haoyang Li <[email protected]>

* address comments

Signed-off-by: Haoyang Li <[email protected]>

* fix build after upmerge

Signed-off-by: Haoyang Li <[email protected]>

* move inf/nan replacement to kernel

Signed-off-by: Haoyang Li <[email protected]>

* Apply suggestions from code review

Co-authored-by: Mike Wilson <[email protected]>
Co-authored-by: Nghia Truong <[email protected]>

* address comments

Signed-off-by: Haoyang Li <[email protected]>

* Apply suggestions from code review

Co-authored-by: Nghia Truong <[email protected]>

* address comments

Signed-off-by: Haoyang Li <[email protected]>

* cudf

Signed-off-by: Haoyang Li <[email protected]>

* cudf

Signed-off-by: Haoyang Li <[email protected]>

* format

Signed-off-by: Haoyang Li <[email protected]>

* cudf reset

Signed-off-by: Haoyang Li <[email protected]>

* Apply suggestions from code review

Co-authored-by: Mike Wilson <[email protected]>

---------

Signed-off-by: Haoyang Li <[email protected]>
Co-authored-by: Mike Wilson <[email protected]>
Co-authored-by: Jason Lowe <[email protected]>
Co-authored-by: Nghia Truong <[email protected]>
  • Loading branch information
4 people authored Dec 18, 2023
1 parent 6bdc68b commit 48d2736
Show file tree
Hide file tree
Showing 10 changed files with 677 additions and 38 deletions.
1 change: 1 addition & 0 deletions src/main/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ add_library(
src/ZOrderJni.cpp
src/bloom_filter.cu
src/cast_decimal_to_string.cu
src/format_float.cu
src/cast_float_to_string.cu
src/cast_string.cu
src/cast_string_to_float.cu
Expand Down
15 changes: 15 additions & 0 deletions src/main/cpp/src/CastStringJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,21 @@ JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromFloat(J
CATCH_CAST_EXCEPTION(env, 0);
}

JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromFloatWithFormat(
JNIEnv* env, jclass, jlong input_column, jint digits)
{
JNI_NULL_CHECK(env, input_column, "input column is null", 0);

try {
cudf::jni::auto_set_device(env);

auto const& cv = *reinterpret_cast<cudf::column_view const*>(input_column);
return cudf::jni::release_as_jlong(
spark_rapids_jni::format_float(cv, digits, cudf::get_default_stream()));
}
CATCH_CAST_EXCEPTION(env, 0);
}

JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromDecimal(JNIEnv* env,
jclass,
jlong input_column)
Expand Down
6 changes: 6 additions & 0 deletions src/main/cpp/src/cast_string.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,12 @@ std::unique_ptr<cudf::column> string_to_float(
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

std::unique_ptr<cudf::column> format_float(
cudf::column_view const& input,
int const digits,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

std::unique_ptr<cudf::column> float_to_string(
cudf::column_view const& input,
rmm::cuda_stream_view stream,
Expand Down
131 changes: 131 additions & 0 deletions src/main/cpp/src/format_float.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "cast_string.hpp"
#include "ftos_converter.cuh"

#include <cudf/column/column_device_view.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/utilities/type_dispatcher.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

namespace spark_rapids_jni {

namespace detail {
namespace {

template <typename FloatType>
struct format_float_fn {
cudf::column_device_view d_floats;
int digits;
cudf::size_type* d_offsets;
char* d_chars;

__device__ cudf::size_type compute_output_size(FloatType const value) const
{
bool constexpr is_float = std::is_same_v<FloatType, float>;
return static_cast<cudf::size_type>(
ftos_converter::compute_format_float_size(static_cast<double>(value), digits, is_float));
}

__device__ void format_float(cudf::size_type const idx) const
{
auto const value = d_floats.element<FloatType>(idx);
bool constexpr is_float = std::is_same_v<FloatType, float>;
auto const output = d_chars + d_offsets[idx];
ftos_converter::format_float(static_cast<double>(value), digits, is_float, output);
}

__device__ void operator()(cudf::size_type const idx) const
{
if (d_floats.is_null(idx)) {
if (d_chars == nullptr) { d_offsets[idx] = 0; }
return;
}
if (d_chars != nullptr) {
format_float(idx);
} else {
d_offsets[idx] = compute_output_size(d_floats.element<FloatType>(idx));
}
}
};

/**
* @brief This dispatch method is for converting floats into strings.
*
* The template function declaration ensures only float types are allowed.
*/
struct dispatch_format_float_fn {
template <typename FloatType, CUDF_ENABLE_IF(std::is_floating_point_v<FloatType>)>
std::unique_ptr<cudf::column> operator()(cudf::column_view const& floats,
int const digits,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
auto const strings_count = floats.size();
if (strings_count == 0) { return cudf::make_empty_column(cudf::type_id::STRING); }

auto const input_ptr = cudf::column_device_view::create(floats, stream);

auto [offsets, chars] = cudf::strings::detail::make_strings_children(
format_float_fn<FloatType>{*input_ptr, digits}, strings_count, stream, mr);

return cudf::make_strings_column(strings_count,
std::move(offsets),
std::move(chars),
floats.null_count(),
cudf::detail::copy_bitmask(floats, stream, mr));
}

// non-float types throw an exception
template <typename T, CUDF_ENABLE_IF(not std::is_floating_point_v<T>)>
std::unique_ptr<cudf::column> operator()(cudf::column_view const&,
int const,
rmm::cuda_stream_view,
rmm::mr::device_memory_resource*) const
{
CUDF_FAIL("Values for format_float function must be a float type.");
}
};

} // namespace

// This will convert all float column types into a strings column.
std::unique_ptr<cudf::column> format_float(cudf::column_view const& floats,
int const digits,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return type_dispatcher(floats.type(), dispatch_format_float_fn{}, floats, digits, stream, mr);
}

} // namespace detail

// external API
std::unique_ptr<cudf::column> format_float(cudf::column_view const& floats,
int const digits,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::format_float(floats, digits, stream, mr);
}

} // namespace spark_rapids_jni
Loading

0 comments on commit 48d2736

Please sign in to comment.