diff --git a/src/main/cpp/src/cast_string.cu b/src/main/cpp/src/cast_string.cu index 88f1f0323f..c491668f92 100644 --- a/src/main/cpp/src/cast_string.cu +++ b/src/main/cpp/src/cast_string.cu @@ -408,12 +408,7 @@ CUDF_KERNEL void string_to_decimal_kernel(T* out, auto const row_start = offsets[row]; auto const len = offsets[row + 1] - row_start; bool const valid_entry = incoming_null_mask == nullptr || bit_is_set(incoming_null_mask, row); - - auto ret = validate_and_exponent(&chars[row_start], len, strip); - bool valid = ret.has_value(); - bool positive; - int decimal_location; - int first_digit; + bool valid = valid_entry && len > 0; // first_digit is distance into the string array for the first digit to process. This skips +, -, // whitespace, etc. decimal_location is the index into the string where the decimal point should @@ -439,8 +434,15 @@ CUDF_KERNEL void string_to_decimal_kernel(T* out, return count; }; + auto const validated = + valid ? validate_and_exponent(&chars[row_start], len, strip) : cuda::std::nullopt; + valid = validated.has_value(); + if (valid) { - thrust::tie(positive, decimal_location, first_digit) = *ret; + bool positive; + int decimal_location; + int first_digit; + thrust::tie(positive, decimal_location, first_digit) = *validated; auto const max_digits_before_decimal = precision + scale; auto const significant_digits_before_decimal_in_string = count_significant_digits( diff --git a/src/main/cpp/tests/cast_string.cpp b/src/main/cpp/tests/cast_string.cpp index efad547869..862469b0af 100644 --- a/src/main/cpp/tests/cast_string.cpp +++ b/src/main/cpp/tests/cast_string.cpp @@ -43,13 +43,13 @@ TYPED_TEST_SUITE(StringToFloatTests, cudf::test::FloatingPointTypes); TYPED_TEST(StringToIntegerTests, Simple) { - auto const strings = test::strings_column_wrapper{"1", "0", "42"}; + auto const strings = test::strings_column_wrapper{"1", "0", "42", "null"}; strings_column_view scv{strings}; auto const result = spark_rapids_jni::string_to_integer( data_type{type_to_id()}, scv, false, true, cudf::get_default_stream()); - test::fixed_width_column_wrapper expected({1, 0, 42}, {1, 1, 1}); + test::fixed_width_column_wrapper expected({1, 0, 42, 0}, {1, 1, 1, 0}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected); } @@ -252,6 +252,24 @@ TYPED_TEST(StringToIntegerTests, Empty) EXPECT_EQ(result->type().id(), type_to_id()); } +TYPED_TEST(StringToIntegerTests, NonEmptyNulls) +{ + auto const strings = test::strings_column_wrapper{"123", "123", "123", "123"}.release(); + auto const valids = std::vector{true, false, true, false}; + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids.begin(), valids.end()); + strings->set_null_mask(null_mask, null_count); + + auto const scv = strings_column_view{*strings}; + EXPECT_EQ(scv.chars_size(cudf::get_default_stream()), 12); // make sure it has non-empty null. + + auto const result = spark_rapids_jni::string_to_integer( + data_type{type_to_id()}, scv, false, true, cudf::get_default_stream()); + + test::fixed_width_column_wrapper expected({123, 0, 123, 0}, {1, 0, 1, 0}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected); +} + TEST_F(StringToDecimalTests, Simple) { auto const strings = test::strings_column_wrapper({"1", "0", "-1"}); @@ -552,6 +570,26 @@ TEST_F(StringToDecimalTests, Empty) EXPECT_EQ(result->type().scale(), 2); } +TEST_F(StringToDecimalTests, NonEmptyNulls) +{ + auto const strings = + test::strings_column_wrapper{"1.23456", "1.23456", "1.23456", "1.23456"}.release(); + auto const valids = std::vector{true, false, true, false}; + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids.begin(), valids.end()); + strings->set_null_mask(null_mask, null_count); + + auto const scv = strings_column_view{*strings}; + EXPECT_EQ(scv.chars_size(cudf::get_default_stream()), 28); // make sure it has non-empty null. + + auto const result = + spark_rapids_jni::string_to_decimal(6, -5, scv, false, true, cudf::get_default_stream()); + + test::fixed_point_column_wrapper expected( + {123456, 0, 123456, 0}, {1, 0, 1, 0}, numeric::scale_type{-5}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected); +} + TYPED_TEST(StringToFloatTests, Simple) { cudf::test::strings_column_wrapper in{"-1.8946e-10", @@ -707,3 +745,21 @@ TYPED_TEST(StringToFloatTests, Empty) EXPECT_EQ(result->size(), 0); } + +TYPED_TEST(StringToFloatTests, NonEmptyNulls) +{ + auto const strings = test::strings_column_wrapper{"1.23", "1.23", "1.23", "1.23"}.release(); + auto const valids = std::vector{true, false, true, false}; + auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids.begin(), valids.end()); + strings->set_null_mask(null_mask, null_count); + + auto const scv = strings_column_view{*strings}; + EXPECT_EQ(scv.chars_size(cudf::get_default_stream()), 16); // make sure it has non-empty null. + + auto const result = spark_rapids_jni::string_to_float( + data_type{type_to_id()}, scv, false, cudf::get_default_stream()); + + test::fixed_width_column_wrapper expected({1.23, 0.0, 1.23, 0.0}, {1, 0, 1, 0}); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected); +}