From 0f1bae882e4d17329164891c15036128329eb28d Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 20 Dec 2024 09:52:59 -0500 Subject: [PATCH 1/6] Fix groupby argmin/max gather of sorted-order indices (#17591) Fixes the gather logic in `groupby_argmin.cu` and `groupby_argmax.cu` that gathers the sorted-order indices from the results of the groupby reduction functions. The resulting indices must be remapped to the sorted-order indices before returning. The `gather` call has been fixed to use an output vector since the [gather documentation indicates the map and result iterators must not overlap](https://nvidia.github.io/cccl/thrust/api/function_group__gathering_1ga6fdb1fe3ff0d9ce01f41a72fa94c56df.html). Also, the `gather_if` is not needed since the groupby reduction does not use the min/max sentinel values in its logic. Closes #16542 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Nghia Truong (https://github.com/ttnghia) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/17591 --- cpp/src/groupby/sort/aggregate.cpp | 10 ++------- cpp/src/groupby/sort/group_argmax.cu | 31 +++++++++++++-------------- cpp/src/groupby/sort/group_argmin.cu | 32 ++++++++++++++-------------- 3 files changed, 33 insertions(+), 40 deletions(-) diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index e9f885a5917..6480070e85a 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -209,10 +209,7 @@ void aggregate_result_functor::operator()(aggregation const& a operator()(*argmin_agg); column_view const argmin_result = cache.get_result(values, *argmin_agg); - // We make a view of ARGMIN result without a null mask and gather using - // this mask. The values in data buffer of ARGMIN result corresponding - // to null values was initialized to ARGMIN_SENTINEL which is an out of - // bounds index value and causes the gathered value to be null. + // Compute the ARGMIN result without the null mask in the gather map. column_view const null_removed_map( data_type(type_to_id()), argmin_result.size(), @@ -251,10 +248,7 @@ void aggregate_result_functor::operator()(aggregation const& a operator()(*argmax_agg); column_view const argmax_result = cache.get_result(values, *argmax_agg); - // We make a view of ARGMAX result without a null mask and gather using - // this mask. The values in data buffer of ARGMAX result corresponding - // to null values was initialized to ARGMAX_SENTINEL which is an out of - // bounds index value and causes the gathered value to be null. + // Compute the ARGMAX result without the null mask in the gather map. column_view const null_removed_map( data_type(type_to_id()), argmax_result.size(), diff --git a/cpp/src/groupby/sort/group_argmax.cu b/cpp/src/groupby/sort/group_argmax.cu index 7dce341130e..329c7c4eb32 100644 --- a/cpp/src/groupby/sort/group_argmax.cu +++ b/cpp/src/groupby/sort/group_argmax.cu @@ -42,22 +42,21 @@ std::unique_ptr group_argmax(column_view const& values, stream, mr); - // The functor returns the index of maximum in the sorted values. - // We need the index of maximum in the original unsorted values. - // So use indices to gather the sort order used to sort `values`. - // Gather map cannot be null so we make a view with the mask removed. - // The values in data buffer of indices corresponding to null values was - // initialized to ARGMAX_SENTINEL. Using gather_if. - // This can't use gather because nulls in gathered column will not store ARGMAX_SENTINEL. - auto indices_view = indices->mutable_view(); - thrust::gather_if(rmm::exec_policy(stream), - indices_view.begin(), // map first - indices_view.end(), // map last - indices_view.begin(), // stencil - key_sort_order.begin(), // input - indices_view.begin(), // result - [] __device__(auto i) { return (i != cudf::detail::ARGMAX_SENTINEL); }); - return indices; + // The functor returns the indices of maximums based on the sorted keys. + // We need the indices of maximums from the original unsorted keys + // so we use these indices and the key_sort_order to map to the correct indices. + // We do not use cudf::gather since we can move the null-mask separately. + auto indices_view = indices->view(); + auto output = rmm::device_uvector(indices_view.size(), stream, mr); + thrust::gather(rmm::exec_policy_nosync(stream), + indices_view.begin(), // map first + indices_view.end(), // map last + key_sort_order.begin(), // input + output.data() // result (must not overlap map) + ); + auto null_count = indices_view.null_count(); + auto null_mask = indices->release().null_mask.release(); + return std::make_unique(std::move(output), std::move(*null_mask), null_count); } } // namespace detail diff --git a/cpp/src/groupby/sort/group_argmin.cu b/cpp/src/groupby/sort/group_argmin.cu index c4bed330b9f..dbfc375fc20 100644 --- a/cpp/src/groupby/sort/group_argmin.cu +++ b/cpp/src/groupby/sort/group_argmin.cu @@ -21,6 +21,7 @@ #include #include +#include #include @@ -42,22 +43,21 @@ std::unique_ptr group_argmin(column_view const& values, stream, mr); - // The functor returns the index of minimum in the sorted values. - // We need the index of minimum in the original unsorted values. - // So use indices to gather the sort order used to sort `values`. - // The values in data buffer of indices corresponding to null values was - // initialized to ARGMIN_SENTINEL. Using gather_if. - // This can't use gather because nulls in gathered column will not store ARGMIN_SENTINEL. - auto indices_view = indices->mutable_view(); - thrust::gather_if(rmm::exec_policy(stream), - indices_view.begin(), // map first - indices_view.end(), // map last - indices_view.begin(), // stencil - key_sort_order.begin(), // input - indices_view.begin(), // result - [] __device__(auto i) { return (i != cudf::detail::ARGMIN_SENTINEL); }); - - return indices; + // The functor returns the indices of minimums based on the sorted keys. + // We need the indices of minimums from the original unsorted keys + // so we use these and the key_sort_order to map to the correct indices. + // We do not use cudf::gather since we can move the null-mask separately. + auto indices_view = indices->view(); + auto output = rmm::device_uvector(indices_view.size(), stream, mr); + thrust::gather(rmm::exec_policy_nosync(stream), + indices_view.begin(), // map first + indices_view.end(), // map last + key_sort_order.begin(), // input + output.data() // result (must not overlap map) + ); + auto null_count = indices_view.null_count(); + auto null_mask = indices->release().null_mask.release(); + return std::make_unique(std::move(output), std::move(*null_mask), null_count); } } // namespace detail From f9f5f7d7ff42e035582fd325bd6128e0e589286d Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 20 Dec 2024 09:54:14 -0500 Subject: [PATCH 2/6] Fix possible int overflow in compute_mixed_join_output_size (#17633) Fixes possible integer overflow condition when the number of rows is near max int32 in `compute_mixed_join_output_size` kernel function. Reference #10368 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/17633 --- cpp/src/join/mixed_join_size_kernel.cuh | 6 +++--- cpp/src/json/json_path.cu | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/join/mixed_join_size_kernel.cuh b/cpp/src/join/mixed_join_size_kernel.cuh index 4049ccf35e1..98170ed719a 100644 --- a/cpp/src/join/mixed_join_size_kernel.cuh +++ b/cpp/src/join/mixed_join_size_kernel.cuh @@ -62,8 +62,8 @@ CUDF_KERNEL void __launch_bounds__(block_size) intermediate_storage + (threadIdx.x * device_expression_data.num_intermediates); std::size_t thread_counter{0}; - cudf::size_type const start_idx = threadIdx.x + blockIdx.x * block_size; - cudf::size_type const stride = block_size * gridDim.x; + auto const start_idx = cudf::detail::grid_1d::global_thread_id(); + auto const stride = cudf::detail::grid_1d::grid_stride(); cudf::size_type const left_num_rows = left_table.num_rows(); cudf::size_type const right_num_rows = right_table.num_rows(); auto const outer_num_rows = (swap_tables ? right_num_rows : left_num_rows); @@ -80,7 +80,7 @@ CUDF_KERNEL void __launch_bounds__(block_size) auto count_equality = pair_expression_equality{ evaluator, thread_intermediate_storage, swap_tables, equality_probe}; - for (cudf::size_type outer_row_index = start_idx; outer_row_index < outer_num_rows; + for (auto outer_row_index = start_idx; outer_row_index < outer_num_rows; outer_row_index += stride) { auto query_pair = pair_func(outer_row_index); if (join_type == join_kind::LEFT_JOIN || join_type == join_kind::FULL_JOIN) { diff --git a/cpp/src/json/json_path.cu b/cpp/src/json/json_path.cu index fd8629ed6f3..e6e01b9c9fe 100644 --- a/cpp/src/json/json_path.cu +++ b/cpp/src/json/json_path.cu @@ -928,7 +928,7 @@ __launch_bounds__(block_size) CUDF_KERNEL get_json_object_options options) { auto tid = cudf::detail::grid_1d::global_thread_id(); - auto const stride = cudf::thread_index_type{blockDim.x} * cudf::thread_index_type{gridDim.x}; + auto const stride = cudf::detail::grid_1d::grid_stride(); size_type warp_valid_count{0}; From fb62d0ea99d6761c081442bd660b8ba2309fb438 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Fri, 20 Dec 2024 09:06:30 -0800 Subject: [PATCH 3/6] Clean up namespaces and improve compression-related headers (#17621) Moved compression-related stuff that was under `cudf::io` to `cudf::io::detail`. Moved the nvcomp adapter from `cudf::io::nvcomp` to `cudf::io::detail::nvcomp`. Extract common compression constants to appropriate headers, and updated the files to include what they use. Changes are made in preparation for adding higher-level compression API that abstracts nvcomp use and simplifies caller code. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Bradley Dice (https://github.com/bdice) - Yunsong Wang (https://github.com/PointKernel) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/17621 --- cpp/include/cudf/io/nvcomp_adapter.hpp | 4 +- cpp/src/io/comp/common.hpp | 37 +++++++++++++++ cpp/src/io/comp/comp.cpp | 5 +- cpp/src/io/comp/comp.hpp | 22 ++++++++- cpp/src/io/comp/debrotli.cu | 8 ++-- cpp/src/io/comp/gpuinflate.cu | 7 +-- cpp/src/io/comp/gpuinflate.hpp | 41 ++-------------- cpp/src/io/comp/io_uncomp.hpp | 6 +-- cpp/src/io/comp/nvcomp_adapter.cpp | 4 +- cpp/src/io/comp/nvcomp_adapter.cu | 6 +-- cpp/src/io/comp/nvcomp_adapter.cuh | 6 +-- cpp/src/io/comp/nvcomp_adapter.hpp | 6 +-- cpp/src/io/comp/snap.cu | 7 +-- cpp/src/io/comp/statistics.cu | 4 +- cpp/src/io/comp/unsnap.cu | 13 +---- cpp/src/io/orc/orc_gpu.hpp | 20 ++++---- cpp/src/io/orc/reader_impl_decode.cu | 2 +- cpp/src/io/orc/stripe_enc.cu | 9 +++- cpp/src/io/orc/writer_impl.cu | 6 ++- cpp/src/io/parquet/page_enc.cu | 3 ++ cpp/src/io/parquet/parquet_gpu.hpp | 18 +++---- cpp/src/io/parquet/reader_impl_chunking.cu | 50 ++++++++++++-------- cpp/src/io/parquet/reader_impl_preprocess.cu | 8 ++-- cpp/src/io/parquet/writer_impl.cu | 1 + cpp/src/io/parquet/writer_impl_helpers.cpp | 2 + cpp/src/io/parquet/writer_impl_helpers.hpp | 4 +- cpp/src/io/text/bgzip_data_chunk_source.cu | 24 ++++++---- cpp/tests/io/comp/decomp_test.cpp | 47 +++++++++--------- cpp/tests/io/orc_test.cpp | 8 ++-- 29 files changed, 206 insertions(+), 172 deletions(-) create mode 100644 cpp/src/io/comp/common.hpp diff --git a/cpp/include/cudf/io/nvcomp_adapter.hpp b/cpp/include/cudf/io/nvcomp_adapter.hpp index 0d74a4158ad..4ad760d278f 100644 --- a/cpp/include/cudf/io/nvcomp_adapter.hpp +++ b/cpp/include/cudf/io/nvcomp_adapter.hpp @@ -22,7 +22,7 @@ #include namespace CUDF_EXPORT cudf { -namespace io::nvcomp { +namespace io::detail::nvcomp { enum class compression_type { SNAPPY, ZSTD, DEFLATE, LZ4, GZIP }; @@ -88,5 +88,5 @@ inline bool operator==(feature_status_parameters const& lhs, feature_status_para [[nodiscard]] std::optional is_decompression_disabled( compression_type compression, feature_status_parameters params = feature_status_parameters()); -} // namespace io::nvcomp +} // namespace io::detail::nvcomp } // namespace CUDF_EXPORT cudf diff --git a/cpp/src/io/comp/common.hpp b/cpp/src/io/comp/common.hpp new file mode 100644 index 00000000000..a81ac60e03a --- /dev/null +++ b/cpp/src/io/comp/common.hpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cudf::io::detail { + +/** + * @brief The size used for padding a data buffer's size to a multiple of the padding. + * + * Padding is necessary for input/output buffers of several compression/decompression kernels + * (inflate_kernel and nvcomp snappy). Such kernels operate on aligned data pointers, which require + * padding to the buffers so that the pointers can shift along the address space to satisfy their + * alignment requirement. + * + * In the meantime, it is not entirely clear why such padding is needed. We need to further + * investigate and implement a better fix rather than just padding the buffer. + * See https://github.com/rapidsai/cudf/issues/13605. + */ +constexpr std::size_t BUFFER_PADDING_MULTIPLE{8}; + +} // namespace cudf::io::detail diff --git a/cpp/src/io/comp/comp.cpp b/cpp/src/io/comp/comp.cpp index 2dda2287e09..26535bed43b 100644 --- a/cpp/src/io/comp/comp.cpp +++ b/cpp/src/io/comp/comp.cpp @@ -87,15 +87,14 @@ std::vector compress_snappy(host_span src, outputs[0] = d_dst; outputs.host_to_device_async(stream); - cudf::detail::hostdevice_vector hd_status(1, stream); + cudf::detail::hostdevice_vector hd_status(1, stream); hd_status[0] = {}; hd_status.host_to_device_async(stream); nvcomp::batched_compress(nvcomp::compression_type::SNAPPY, inputs, outputs, hd_status, stream); hd_status.device_to_host_sync(stream); - CUDF_EXPECTS(hd_status[0].status == cudf::io::compression_status::SUCCESS, - "snappy compression failed"); + CUDF_EXPECTS(hd_status[0].status == compression_status::SUCCESS, "snappy compression failed"); return cudf::detail::make_std_vector_sync(d_dst, stream); } diff --git a/cpp/src/io/comp/comp.hpp b/cpp/src/io/comp/comp.hpp index 652abbbeda6..e16f26e1f06 100644 --- a/cpp/src/io/comp/comp.hpp +++ b/cpp/src/io/comp/comp.hpp @@ -16,16 +16,34 @@ #pragma once +#include "common.hpp" + #include #include -#include -#include #include namespace CUDF_EXPORT cudf { namespace io::detail { +/** + * @brief Status of a compression/decompression operation. + */ +enum class compression_status : uint8_t { + SUCCESS, ///< Successful, output is valid + FAILURE, ///< Failed, output is invalid (e.g. input is unsupported in some way) + SKIPPED, ///< Operation skipped (if conversion, uncompressed data can be used) + OUTPUT_OVERFLOW, ///< Output buffer is too small; operation can succeed with larger output +}; + +/** + * @brief Descriptor of compression/decompression result. + */ +struct compression_result { + uint64_t bytes_written; + compression_status status; +}; + /** * @brief Compresses a system memory buffer. * diff --git a/cpp/src/io/comp/debrotli.cu b/cpp/src/io/comp/debrotli.cu index 72649dbe427..151f72d262e 100644 --- a/cpp/src/io/comp/debrotli.cu +++ b/cpp/src/io/comp/debrotli.cu @@ -63,8 +63,8 @@ THE SOFTWARE. #include -namespace cudf { -namespace io { +namespace cudf::io::detail { + constexpr uint32_t huffman_lookup_table_width = 8; constexpr int8_t brotli_code_length_codes = 18; constexpr uint32_t brotli_num_distance_short_codes = 16; @@ -2020,7 +2020,6 @@ CUDF_KERNEL void __launch_bounds__(block_size, 2) results[block_id].status = (s->error == 0) ? compression_status::SUCCESS : compression_status::FAILURE; // Return ext heap used by last block (statistics) - results[block_id].reserved = s->fb_size; } } @@ -2115,5 +2114,4 @@ void gpu_debrotli(device_span const> inputs, #endif } -} // namespace io -} // namespace cudf +} // namespace cudf::io::detail diff --git a/cpp/src/io/comp/gpuinflate.cu b/cpp/src/io/comp/gpuinflate.cu index 090ea1430b5..6e5ce4ce6c3 100644 --- a/cpp/src/io/comp/gpuinflate.cu +++ b/cpp/src/io/comp/gpuinflate.cu @@ -49,8 +49,7 @@ Mark Adler madler@alumni.caltech.edu #include -namespace cudf { -namespace io { +namespace cudf::io::detail { constexpr int max_bits = 15; // maximum bits in a code constexpr int max_l_codes = 286; // maximum number of literal/length codes @@ -1139,7 +1138,6 @@ CUDF_KERNEL void __launch_bounds__(block_size) default: return compression_status::FAILURE; } }(); - results[z].reserved = (int)(state->end - state->cur); // Here mainly for debug purposes } } @@ -1224,5 +1222,4 @@ void gpu_copy_uncompressed_blocks(device_span const> } } -} // namespace io -} // namespace cudf +} // namespace cudf::io::detail diff --git a/cpp/src/io/comp/gpuinflate.hpp b/cpp/src/io/comp/gpuinflate.hpp index 8bfca2b30df..4b09bd5a84c 100644 --- a/cpp/src/io/comp/gpuinflate.hpp +++ b/cpp/src/io/comp/gpuinflate.hpp @@ -16,6 +16,8 @@ #pragma once +#include "io/comp/comp.hpp" + #include #include #include @@ -24,44 +26,10 @@ #include -namespace cudf { -namespace io { - -/** - * @brief Status of a compression/decompression operation. - */ -enum class compression_status : uint8_t { - SUCCESS, ///< Successful, output is valid - FAILURE, ///< Failed, output is invalid (e.g. input is unsupported in some way) - SKIPPED, ///< Operation skipped (if conversion, uncompressed data can be used) - OUTPUT_OVERFLOW, ///< Output buffer is too small; operation can succeed with larger output -}; - -/** - * @brief Descriptor of compression/decompression result. - */ -struct compression_result { - uint64_t bytes_written; - compression_status status; - uint32_t reserved; -}; +namespace cudf::io::detail { enum class gzip_header_included { NO, YES }; -/** - * @brief The value used for padding a data buffer such that its size will be multiple of it. - * - * Padding is necessary for input/output buffers of several compression/decompression kernels - * (inflate_kernel and nvcomp snappy). Such kernels operate on aligned data pointers, which require - * padding to the buffers so that the pointers can shift along the address space to satisfy their - * alignment requirement. - * - * In the meantime, it is not entirely clear why such padding is needed. We need to further - * investigate and implement a better fix rather than just padding the buffer. - * See https://github.com/rapidsai/cudf/issues/13605. - */ -constexpr std::size_t BUFFER_PADDING_MULTIPLE{8}; - /** * @brief Interface for decompressing GZIP-compressed data * @@ -169,5 +137,4 @@ void gpu_snap(device_span const> inputs, device_span results, rmm::cuda_stream_view stream); -} // namespace io -} // namespace cudf +} // namespace cudf::io::detail diff --git a/cpp/src/io/comp/io_uncomp.hpp b/cpp/src/io/comp/io_uncomp.hpp index ca722a9b7ee..711a1c3274f 100644 --- a/cpp/src/io/comp/io_uncomp.hpp +++ b/cpp/src/io/comp/io_uncomp.hpp @@ -16,15 +16,13 @@ #pragma once +#include "common.hpp" + #include #include -#include -#include #include -using cudf::host_span; - namespace CUDF_EXPORT cudf { namespace io::detail { diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp index d45c02f374f..3a4e315348c 100644 --- a/cpp/src/io/comp/nvcomp_adapter.cpp +++ b/cpp/src/io/comp/nvcomp_adapter.cpp @@ -30,7 +30,7 @@ #include -namespace cudf::io::nvcomp { +namespace cudf::io::detail::nvcomp { namespace { // Dispatcher for nvcompBatchedDecompressGetTempSizeEx @@ -478,4 +478,4 @@ std::optional compress_max_allowed_chunk_size(compression_type compressi } } -} // namespace cudf::io::nvcomp +} // namespace cudf::io::detail::nvcomp diff --git a/cpp/src/io/comp/nvcomp_adapter.cu b/cpp/src/io/comp/nvcomp_adapter.cu index 794d452ebf2..cf5996dfd93 100644 --- a/cpp/src/io/comp/nvcomp_adapter.cu +++ b/cpp/src/io/comp/nvcomp_adapter.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ #include #include -namespace cudf::io::nvcomp { +namespace cudf::io::detail::nvcomp { batched_args create_batched_nvcomp_args(device_span const> inputs, device_span const> outputs, @@ -127,4 +127,4 @@ std::pair max_chunk_and_total_input_size(device_span @@ -27,7 +27,7 @@ #include -namespace cudf::io::nvcomp { +namespace cudf::io::detail::nvcomp { struct batched_args { rmm::device_uvector input_data_ptrs; @@ -76,4 +76,4 @@ void skip_unsupported_inputs(device_span input_sizes, std::pair max_chunk_and_total_input_size(device_span input_sizes, rmm::cuda_stream_view stream); -} // namespace cudf::io::nvcomp +} // namespace cudf::io::detail::nvcomp diff --git a/cpp/src/io/comp/nvcomp_adapter.hpp b/cpp/src/io/comp/nvcomp_adapter.hpp index 2e1cda2d6b7..5c402523168 100644 --- a/cpp/src/io/comp/nvcomp_adapter.hpp +++ b/cpp/src/io/comp/nvcomp_adapter.hpp @@ -16,7 +16,7 @@ #pragma once -#include "gpuinflate.hpp" +#include "io/comp/comp.hpp" #include #include @@ -25,7 +25,7 @@ #include -namespace cudf::io::nvcomp { +namespace cudf::io::detail::nvcomp { /** * @brief Device batch decompression of given type. * @@ -103,4 +103,4 @@ void batched_compress(compression_type compression, device_span results, rmm::cuda_stream_view stream); -} // namespace cudf::io::nvcomp +} // namespace cudf::io::detail::nvcomp diff --git a/cpp/src/io/comp/snap.cu b/cpp/src/io/comp/snap.cu index 7d4dcffa713..1443bfd38a2 100644 --- a/cpp/src/io/comp/snap.cu +++ b/cpp/src/io/comp/snap.cu @@ -19,8 +19,7 @@ #include -namespace cudf { -namespace io { +namespace cudf::io::detail { constexpr int hash_bits = 12; // TBD: Tentatively limits to 2-byte codes to prevent long copy search followed by long literal @@ -329,7 +328,6 @@ CUDF_KERNEL void __launch_bounds__(128) results[blockIdx.x].bytes_written = s->dst - s->dst_base; results[blockIdx.x].status = (s->dst > s->end) ? compression_status::FAILURE : compression_status::SUCCESS; - results[blockIdx.x].reserved = 0; } } @@ -345,5 +343,4 @@ void gpu_snap(device_span const> inputs, } } -} // namespace io -} // namespace cudf +} // namespace cudf::io::detail diff --git a/cpp/src/io/comp/statistics.cu b/cpp/src/io/comp/statistics.cu index faf967041bc..caee9145d2c 100644 --- a/cpp/src/io/comp/statistics.cu +++ b/cpp/src/io/comp/statistics.cu @@ -21,7 +21,7 @@ #include #include -namespace cudf::io { +namespace cudf::io::detail { writer_compression_statistics collect_compression_statistics( device_span const> inputs, @@ -61,4 +61,4 @@ writer_compression_statistics collect_compression_statistics( output_size_successful}; } -} // namespace cudf::io +} // namespace cudf::io::detail diff --git a/cpp/src/io/comp/unsnap.cu b/cpp/src/io/comp/unsnap.cu index 9b01272ac70..cf841c435a3 100644 --- a/cpp/src/io/comp/unsnap.cu +++ b/cpp/src/io/comp/unsnap.cu @@ -21,12 +21,10 @@ #include -namespace cudf { -namespace io { +namespace cudf::io::detail { constexpr int32_t batch_size = (1 << 5); constexpr int32_t batch_count = (1 << 2); constexpr int32_t prefetch_size = (1 << 9); // 512B, in 32B chunks -constexpr bool log_cyclecount = false; void __device__ busy_wait(size_t cycles) { @@ -647,7 +645,6 @@ CUDF_KERNEL void __launch_bounds__(block_size) auto cur = s->src.begin(); auto const end = s->src.end(); s->error = 0; - if (log_cyclecount) { s->tstart = clock(); } if (cur < end) { // Read uncompressed size (varint), limited to 32-bit uint32_t uncompressed_size = *cur++; @@ -705,11 +702,6 @@ CUDF_KERNEL void __launch_bounds__(block_size) results[strm_id].bytes_written = s->uncompressed_size - s->bytes_left; results[strm_id].status = (s->error == 0) ? compression_status::SUCCESS : compression_status::FAILURE; - if (log_cyclecount) { - results[strm_id].reserved = clock() - s->tstart; - } else { - results[strm_id].reserved = 0; - } } } @@ -724,5 +716,4 @@ void gpu_unsnap(device_span const> inputs, unsnap_kernel<128><<>>(inputs, outputs, results); } -} // namespace io -} // namespace cudf +} // namespace cudf::io::detail diff --git a/cpp/src/io/orc/orc_gpu.hpp b/cpp/src/io/orc/orc_gpu.hpp index 654ee1e012c..f4e75f78dec 100644 --- a/cpp/src/io/orc/orc_gpu.hpp +++ b/cpp/src/io/orc/orc_gpu.hpp @@ -16,7 +16,7 @@ #pragma once -#include "io/comp/gpuinflate.hpp" +#include "io/comp/comp.hpp" #include "io/statistics/statistics.cuh" #include "io/utilities/column_buffer.hpp" #include "orc.hpp" @@ -73,14 +73,14 @@ struct CompressedStreamInfo { uint8_t const* compressed_data{}; // [in] base ptr to compressed stream data uint8_t* uncompressed_data{}; // [in] base ptr to uncompressed stream data or NULL if not known yet - size_t compressed_data_size{}; // [in] compressed data size for this stream - device_span* dec_in_ctl{}; // [in] input buffer to decompress - device_span* dec_out_ctl{}; // [in] output buffer to decompress into - device_span dec_res{}; // [in] results of decompression - device_span* copy_in_ctl{}; // [out] input buffer to copy - device_span* copy_out_ctl{}; // [out] output buffer to copy to - uint32_t num_compressed_blocks{}; // [in,out] number of entries in decctl(in), number of - // compressed blocks(out) + size_t compressed_data_size{}; // [in] compressed data size for this stream + device_span* dec_in_ctl{}; // [in] input buffer to decompress + device_span* dec_out_ctl{}; // [in] output buffer to decompress into + device_span dec_res{}; // [in] results of decompression + device_span* copy_in_ctl{}; // [out] input buffer to copy + device_span* copy_out_ctl{}; // [out] output buffer to copy to + uint32_t num_compressed_blocks{}; // [in,out] number of entries in decctl(in), number of + // compressed blocks(out) uint32_t num_uncompressed_blocks{}; // [in,out] number of entries in dec_in_ctl(in), number of // uncompressed blocks(out) uint64_t max_uncompressed_size{}; // [out] maximum uncompressed data size of stream @@ -414,7 +414,7 @@ std::optional CompressOrcDataStreams( bool collect_statistics, device_2dspan strm_desc, device_2dspan enc_streams, - device_span comp_res, + device_span comp_res, rmm::cuda_stream_view stream); /** diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 0081ed30d17..b661bb4ff90 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -269,7 +269,7 @@ rmm::device_buffer decompress_stripe_data( num_uncompressed_blocks}; device_span> copy_out_view{inflate_out.data() + num_compressed_blocks, num_uncompressed_blocks}; - gpu_copy_uncompressed_blocks(copy_in_view, copy_out_view, stream); + cudf::io::detail::gpu_copy_uncompressed_blocks(copy_in_view, copy_out_view, stream); } // Copy without stream sync, thus need to wait for stream sync below to access. diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index 07172b6b7f7..79ecca0ca99 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "io/comp/gpuinflate.hpp" #include "io/comp/nvcomp_adapter.hpp" #include "io/utilities/block_utils.cuh" #include "io/utilities/time_utils.cuh" @@ -44,7 +45,11 @@ namespace io { namespace orc { namespace gpu { +namespace nvcomp = cudf::io::detail::nvcomp; + using cudf::detail::device_2dspan; +using cudf::io::detail::compression_result; +using cudf::io::detail::compression_status; constexpr int scratch_buffer_size = 512 * 4; constexpr int compact_streams_block_size = 1024; @@ -1385,7 +1390,7 @@ std::optional CompressOrcDataStreams( if (compression == SNAPPY) { try { if (nvcomp::is_compression_disabled(nvcomp::compression_type::SNAPPY)) { - gpu_snap(comp_in, comp_out, comp_res, stream); + cudf::io::detail::gpu_snap(comp_in, comp_out, comp_res, stream); } else { nvcomp::batched_compress( nvcomp::compression_type::SNAPPY, comp_in, comp_out, comp_res, stream); @@ -1429,7 +1434,7 @@ std::optional CompressOrcDataStreams( strm_desc, comp_in, comp_out, comp_res, compressed_data, comp_blk_size, max_comp_blk_size); if (collect_statistics) { - return cudf::io::collect_compression_statistics(comp_in, comp_res, stream); + return cudf::io::detail::collect_compression_statistics(comp_in, comp_res, stream); } else { return std::nullopt; } diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 6b9c19368dc..ce868b83c04 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -71,6 +71,8 @@ namespace cudf::io::orc::detail { +namespace nvcomp = cudf::io::detail::nvcomp; + template [[nodiscard]] constexpr int varint_size(T val) { @@ -2023,8 +2025,8 @@ size_t max_compression_output_size(CompressionKind compression_kind, uint32_t co { if (compression_kind == NONE) return 0; - return compress_max_output_chunk_size(to_nvcomp_compression_type(compression_kind), - compression_blocksize); + return nvcomp::compress_max_output_chunk_size(to_nvcomp_compression_type(compression_kind), + compression_blocksize); } std::unique_ptr make_table_meta(table_view const& input) diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index e9558735929..a1edd21f8a2 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -51,6 +51,9 @@ namespace { using ::cudf::detail::device_2dspan; +using cudf::io::detail::compression_result; +using cudf::io::detail::compression_status; + constexpr int encode_block_size = 128; constexpr int rle_buffer_size = 2 * encode_block_size; constexpr int num_encode_warps = encode_block_size / cudf::detail::warp_size; diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index ce9d48693ec..b2563ab5065 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -17,7 +17,7 @@ #pragma once #include "error.hpp" -#include "io/comp/gpuinflate.hpp" +#include "io/comp/comp.hpp" #include "io/parquet/parquet.hpp" #include "io/parquet/parquet_common.hpp" #include "io/statistics/statistics.cuh" @@ -599,12 +599,12 @@ struct EncColumnChunk { */ struct EncPage { // all pointers at the top to keep things properly aligned - uint8_t* page_data; //!< Ptr to uncompressed page - uint8_t* compressed_data; //!< Ptr to compressed page - EncColumnChunk* chunk; //!< Chunk that this page belongs to - compression_result* comp_res; //!< Ptr to compression result - uint32_t* def_histogram; //!< Histogram of counts for each definition level - uint32_t* rep_histogram; //!< Histogram of counts for each repetition level + uint8_t* page_data; //!< Ptr to uncompressed page + uint8_t* compressed_data; //!< Ptr to compressed page + EncColumnChunk* chunk; //!< Chunk that this page belongs to + cudf::io::detail::compression_result* comp_res; //!< Ptr to compression result + uint32_t* def_histogram; //!< Histogram of counts for each definition level + uint32_t* rep_histogram; //!< Histogram of counts for each repetition level // put this here in case it's ever made 64-bit encode_kernel_mask kernel_mask; //!< Mask used to control which encoding kernels to run // the rest can be 4 byte aligned @@ -1023,7 +1023,7 @@ void EncodePages(device_span pages, bool write_v2_headers, device_span> comp_in, device_span> comp_out, - device_span comp_res, + device_span comp_res, rmm::cuda_stream_view stream); /** @@ -1046,7 +1046,7 @@ void DecideCompression(device_span chunks, rmm::cuda_stream_view * @param[in] stream CUDA stream to use */ void EncodePageHeaders(device_span pages, - device_span comp_res, + device_span comp_res, device_span page_stats, statistics_chunk const* chunk_stats, rmm::cuda_stream_view stream); diff --git a/cpp/src/io/parquet/reader_impl_chunking.cu b/cpp/src/io/parquet/reader_impl_chunking.cu index 27312a4da89..933be889b1a 100644 --- a/cpp/src/io/parquet/reader_impl_chunking.cu +++ b/cpp/src/io/parquet/reader_impl_chunking.cu @@ -15,6 +15,8 @@ */ #include "compact_protocol_reader.hpp" +#include "io/comp/comp.hpp" +#include "io/comp/gpuinflate.hpp" #include "io/comp/nvcomp_adapter.hpp" #include "io/utilities/time_utils.cuh" #include "reader_impl.hpp" @@ -44,6 +46,10 @@ namespace cudf::io::parquet::detail { namespace { +namespace nvcomp = cudf::io::detail::nvcomp; +using cudf::io::detail::compression_result; +using cudf::io::detail::compression_status; + struct split_info { row_range rows; int64_t split_pos; @@ -795,14 +801,16 @@ std::vector compute_page_splits_by_row(device_span 0) { - debrotli_scratch.resize(get_gpu_debrotli_scratch_size(codec.num_pages), stream); + debrotli_scratch.resize(cudf::io::detail::get_gpu_debrotli_scratch_size(codec.num_pages), + stream); } } // Dispatch batches of pages to decompress for each codec. // Buffer needs to be padded, required by `gpuDecodePageData`. rmm::device_buffer decomp_pages( - cudf::util::round_up_safe(total_decomp_size, BUFFER_PADDING_MULTIPLE), stream); + cudf::util::round_up_safe(total_decomp_size, cudf::io::detail::BUFFER_PADDING_MULTIPLE), + stream); auto comp_in = cudf::detail::make_empty_host_vector>(num_comp_pages, stream); @@ -874,8 +882,11 @@ std::vector compute_page_splits_by_row(device_span compute_page_splits_by_row(device_span @@ -251,8 +252,8 @@ void generate_depth_remappings( if (source->is_device_read_preferred(io_size)) { // Buffer needs to be padded. // Required by `gpuDecodePageData`. - page_data[chunk] = - rmm::device_buffer(cudf::util::round_up_safe(io_size, BUFFER_PADDING_MULTIPLE), stream); + page_data[chunk] = rmm::device_buffer( + cudf::util::round_up_safe(io_size, cudf::io::detail::BUFFER_PADDING_MULTIPLE), stream); auto fut_read_size = source->device_read_async( io_offset, io_size, static_cast(page_data[chunk].data()), stream); read_tasks.emplace_back(std::move(fut_read_size)); @@ -261,7 +262,8 @@ void generate_depth_remappings( // Buffer needs to be padded. // Required by `gpuDecodePageData`. page_data[chunk] = rmm::device_buffer( - cudf::util::round_up_safe(read_buffer->size(), BUFFER_PADDING_MULTIPLE), stream); + cudf::util::round_up_safe(read_buffer->size(), cudf::io::detail::BUFFER_PADDING_MULTIPLE), + stream); CUDF_CUDA_TRY(cudaMemcpyAsync(page_data[chunk].data(), read_buffer->data(), read_buffer->size(), diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 6db92462498..6b1a20701f9 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -23,6 +23,7 @@ #include "compact_protocol_reader.hpp" #include "compact_protocol_writer.hpp" #include "interop/decimal_conversion_utilities.cuh" +#include "io/comp/gpuinflate.hpp" #include "io/comp/nvcomp_adapter.hpp" #include "io/parquet/parquet.hpp" #include "io/parquet/parquet_gpu.hpp" diff --git a/cpp/src/io/parquet/writer_impl_helpers.cpp b/cpp/src/io/parquet/writer_impl_helpers.cpp index 396d44c0763..f15ea1f3c37 100644 --- a/cpp/src/io/parquet/writer_impl_helpers.cpp +++ b/cpp/src/io/parquet/writer_impl_helpers.cpp @@ -21,6 +21,8 @@ #include "writer_impl_helpers.hpp" +#include "io/comp/nvcomp_adapter.hpp" + #include #include #include diff --git a/cpp/src/io/parquet/writer_impl_helpers.hpp b/cpp/src/io/parquet/writer_impl_helpers.hpp index a85411594e9..14a9a0ed5b7 100644 --- a/cpp/src/io/parquet/writer_impl_helpers.hpp +++ b/cpp/src/io/parquet/writer_impl_helpers.hpp @@ -20,11 +20,11 @@ */ #pragma once -#include "io/comp/nvcomp_adapter.hpp" #include "parquet_common.hpp" #include #include +#include namespace cudf::io::parquet::detail { @@ -42,7 +42,7 @@ Compression to_parquet_compression(compression_type compression); * @param codec Compression codec * @return Translated nvcomp compression type */ -nvcomp::compression_type to_nvcomp_compression_type(Compression codec); +cudf::io::detail::nvcomp::compression_type to_nvcomp_compression_type(Compression codec); /** * @brief Function that computes input alignment requirements for the given compression type. diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu index 06069630685..162da62ef03 100644 --- a/cpp/src/io/text/bgzip_data_chunk_source.cu +++ b/cpp/src/io/text/bgzip_data_chunk_source.cu @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "io/comp/gpuinflate.hpp" #include "io/comp/nvcomp_adapter.hpp" #include "io/text/device_data_chunks.hpp" @@ -41,6 +42,8 @@ namespace cudf::io::text { namespace { +namespace nvcomp = cudf::io::detail::nvcomp; + /** * @brief Transforms offset tuples of the form [compressed_begin, compressed_end, * decompressed_begin, decompressed_end] into span tuples of the form [compressed_device_span, @@ -73,7 +76,8 @@ class bgzip_data_chunk_reader : public data_chunk_reader { { // Buffer needs to be padded. // Required by `inflate_kernel`. - device.resize(cudf::util::round_up_safe(host.size(), BUFFER_PADDING_MULTIPLE), stream); + device.resize(cudf::util::round_up_safe(host.size(), cudf::io::detail::BUFFER_PADDING_MULTIPLE), + stream); cudf::detail::cuda_memcpy_async( device_span{device}.subspan(0, host.size()), host, stream); } @@ -94,7 +98,7 @@ class bgzip_data_chunk_reader : public data_chunk_reader { rmm::device_uvector d_decompressed_offsets; rmm::device_uvector> d_compressed_spans; rmm::device_uvector> d_decompressed_spans; - rmm::device_uvector d_decompression_results; + rmm::device_uvector d_decompression_results; std::size_t compressed_size_with_headers{}; std::size_t max_decompressed_size{}; // this is usually equal to decompressed_size() @@ -152,16 +156,16 @@ class bgzip_data_chunk_reader : public data_chunk_reader { gpuinflate(d_compressed_spans, d_decompressed_spans, d_decompression_results, - gzip_header_included::NO, + cudf::io::detail::gzip_header_included::NO, stream); } else { - cudf::io::nvcomp::batched_decompress(cudf::io::nvcomp::compression_type::DEFLATE, - d_compressed_spans, - d_decompressed_spans, - d_decompression_results, - max_decompressed_size, - decompressed_size(), - stream); + nvcomp::batched_decompress(nvcomp::compression_type::DEFLATE, + d_compressed_spans, + d_decompressed_spans, + d_decompression_results, + max_decompressed_size, + decompressed_size(), + stream); } } is_decompressed = true; diff --git a/cpp/tests/io/comp/decomp_test.cpp b/cpp/tests/io/comp/decomp_test.cpp index 54262dc3b44..5bbe8b63c47 100644 --- a/cpp/tests/io/comp/decomp_test.cpp +++ b/cpp/tests/io/comp/decomp_test.cpp @@ -30,6 +30,9 @@ #include using cudf::device_span; +using cudf::io::detail::compression_result; +using cudf::io::detail::compression_status; +namespace nvcomp = cudf::io::detail::nvcomp; /** * @brief Base test fixture for decompression @@ -61,7 +64,7 @@ struct DecompressTest : public cudf::test::BaseFixture { inf_out[0] = dst; inf_out.host_to_device_async(stream); - cudf::detail::hostdevice_vector inf_stat(1, stream); + cudf::detail::hostdevice_vector inf_stat(1, stream); inf_stat[0] = {}; inf_stat.host_to_device_async(stream); @@ -69,7 +72,7 @@ struct DecompressTest : public cudf::test::BaseFixture { CUDF_CUDA_TRY(cudaMemcpyAsync( decompressed.data(), dst.data(), dst.size(), cudaMemcpyDefault, stream.value())); inf_stat.device_to_host_sync(stream); - ASSERT_EQ(inf_stat[0].status, cudf::io::compression_status::SUCCESS); + ASSERT_EQ(inf_stat[0].status, compression_status::SUCCESS); } }; @@ -79,13 +82,13 @@ struct DecompressTest : public cudf::test::BaseFixture { struct GzipDecompressTest : public DecompressTest { void dispatch(device_span> d_inf_in, device_span> d_inf_out, - device_span d_inf_stat) + device_span d_inf_stat) { - cudf::io::gpuinflate(d_inf_in, - d_inf_out, - d_inf_stat, - cudf::io::gzip_header_included::YES, - cudf::get_default_stream()); + cudf::io::detail::gpuinflate(d_inf_in, + d_inf_out, + d_inf_stat, + cudf::io::detail::gzip_header_included::YES, + cudf::get_default_stream()); } }; @@ -95,9 +98,9 @@ struct GzipDecompressTest : public DecompressTest { struct SnappyDecompressTest : public DecompressTest { void dispatch(device_span> d_inf_in, device_span> d_inf_out, - device_span d_inf_stat) + device_span d_inf_stat) { - cudf::io::gpu_unsnap(d_inf_in, d_inf_out, d_inf_stat, cudf::get_default_stream()); + cudf::io::detail::gpu_unsnap(d_inf_in, d_inf_out, d_inf_stat, cudf::get_default_stream()); } }; @@ -107,17 +110,17 @@ struct SnappyDecompressTest : public DecompressTest { struct BrotliDecompressTest : public DecompressTest { void dispatch(device_span> d_inf_in, device_span> d_inf_out, - device_span d_inf_stat) + device_span d_inf_stat) { - rmm::device_buffer d_scratch{cudf::io::get_gpu_debrotli_scratch_size(1), + rmm::device_buffer d_scratch{cudf::io::detail::get_gpu_debrotli_scratch_size(1), cudf::get_default_stream()}; - cudf::io::gpu_debrotli(d_inf_in, - d_inf_out, - d_inf_stat, - d_scratch.data(), - d_scratch.size(), - cudf::get_default_stream()); + cudf::io::detail::gpu_debrotli(d_inf_in, + d_inf_out, + d_inf_stat, + d_scratch.data(), + d_scratch.size(), + cudf::get_default_stream()); } }; @@ -181,8 +184,8 @@ TEST_F(BrotliDecompressTest, HelloWorld) TEST_F(NvcompConfigTest, Compression) { - using cudf::io::nvcomp::compression_type; - auto const& comp_disabled = cudf::io::nvcomp::is_compression_disabled; + using nvcomp::compression_type; + auto const& comp_disabled = nvcomp::is_compression_disabled; EXPECT_FALSE(comp_disabled(compression_type::DEFLATE, {true, true})); // all integrations enabled required @@ -201,8 +204,8 @@ TEST_F(NvcompConfigTest, Compression) TEST_F(NvcompConfigTest, Decompression) { - using cudf::io::nvcomp::compression_type; - auto const& decomp_disabled = cudf::io::nvcomp::is_decompression_disabled; + using nvcomp::compression_type; + auto const& decomp_disabled = nvcomp::is_decompression_disabled; EXPECT_FALSE(decomp_disabled(compression_type::DEFLATE, {true, true})); // all integrations enabled required diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index fce99187516..2209a30149d 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -40,6 +40,8 @@ #include #include +namespace nvcomp = cudf::io::detail::nvcomp; + template using column_wrapper = std::conditional_t, @@ -1135,7 +1137,7 @@ TEST_F(OrcReaderTest, SingleInputs) TEST_F(OrcReaderTest, zstdCompressionRegression) { - if (cudf::io::nvcomp::is_decompression_disabled(cudf::io::nvcomp::compression_type::ZSTD)) { + if (nvcomp::is_decompression_disabled(nvcomp::compression_type::ZSTD)) { GTEST_SKIP() << "Newer nvCOMP version is required"; } @@ -1700,8 +1702,8 @@ TEST_F(OrcMetadataReaderTest, TestNested) TEST_F(OrcReaderTest, ZstdMaxCompressionRate) { - if (cudf::io::nvcomp::is_decompression_disabled(cudf::io::nvcomp::compression_type::ZSTD) or - cudf::io::nvcomp::is_compression_disabled(cudf::io::nvcomp::compression_type::ZSTD)) { + if (nvcomp::is_decompression_disabled(nvcomp::compression_type::ZSTD) or + nvcomp::is_compression_disabled(nvcomp::compression_type::ZSTD)) { GTEST_SKIP() << "Newer nvCOMP version is required"; } From 69d62cbb7943adfab32832202a84565c673aa0d0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 20 Dec 2024 11:59:41 -0800 Subject: [PATCH 4/6] Remove cudf._lib.utils in favor of python APIs (#17625) Contributes to https://github.com/rapidsai/cudf/issues/17317 Dependent on https://github.com/rapidsai/cudf/pull/17582 Did a search across RAPIDS and Morpheus and didn't find usage of these methods. Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17625 --- python/cudf/cudf/_lib/CMakeLists.txt | 2 +- python/cudf/cudf/_lib/__init__.pxd | 0 python/cudf/cudf/_lib/utils.pxd | 6 -- python/cudf/cudf/_lib/utils.pyx | 94 ---------------------------- python/cudf/cudf/core/frame.py | 4 +- python/cudf/cudf/io/avro.py | 13 +++- python/cudf/cudf/io/csv.py | 16 +++-- python/cudf/cudf/io/json.py | 26 ++++---- python/cudf/cudf/io/orc.py | 37 +++++++++-- python/cudf/cudf/io/parquet.py | 29 +++++---- 10 files changed, 85 insertions(+), 142 deletions(-) delete mode 100644 python/cudf/cudf/_lib/__init__.pxd delete mode 100644 python/cudf/cudf/_lib/utils.pxd delete mode 100644 python/cudf/cudf/_lib/utils.pyx diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index da4faabf189..ff6fba1c3e8 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -12,7 +12,7 @@ # the License. # ============================================================================= -set(cython_sources column.pyx scalar.pyx strings_udf.pyx types.pyx utils.pyx) +set(cython_sources column.pyx scalar.pyx strings_udf.pyx types.pyx) set(linked_libraries cudf::cudf) rapids_cython_create_modules( diff --git a/python/cudf/cudf/_lib/__init__.pxd b/python/cudf/cudf/_lib/__init__.pxd deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/python/cudf/cudf/_lib/utils.pxd b/python/cudf/cudf/_lib/utils.pxd deleted file mode 100644 index 900be721c9a..00000000000 --- a/python/cudf/cudf/_lib/utils.pxd +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. - -cpdef data_from_pylibcudf_table(tbl, column_names, index_names=*) -cpdef data_from_pylibcudf_io(tbl_with_meta, column_names = *, index_names = *) -cpdef columns_from_pylibcudf_table(tbl) -cpdef _data_from_columns(columns, column_names, index_names=*) diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx deleted file mode 100644 index 975c9eb741c..00000000000 --- a/python/cudf/cudf/_lib/utils.pyx +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. -import cudf - -from cudf._lib.column cimport Column - - -cpdef columns_from_pylibcudf_table(tbl): - """Convert a pylibcudf table into list of columns. - - Parameters - ---------- - tbl : pylibcudf.Table - The pylibcudf table whose columns will be extracted - - Returns - ------- - list[Column] - A list of columns. - """ - return [Column.from_pylibcudf(plc) for plc in tbl.columns()] - - -cpdef _data_from_columns(columns, column_names, index_names=None): - """Convert a list of columns into a dict with an index. - - This method is intended to provide the bridge between the columns returned - from calls to libcudf or pylibcudf APIs and the cuDF Python Frame objects, which - require named columns and a separate index. - - Since cuDF Python has an independent representation of a table as a - collection of columns, this function simply returns a dict of columns - suitable for conversion into data to be passed to cuDF constructors. - This method returns the columns of the table in the order they are - stored in libcudf, but calling code is responsible for partitioning and - labeling them as needed. - - Parameters - ---------- - columns : list[Column] - The columns to be extracted - column_names : iterable - The keys associated with the columns in the output data. - index_names : iterable, optional - If provided, an iterable of strings that will be used to label the - corresponding first set of columns into a (Multi)Index. If this - argument is omitted, all columns are assumed to be part of the output - table and no index is constructed. - """ - # First construct the index, if any - index = ( - # TODO: For performance, the _from_data methods of Frame types assume - # that the passed index object is already an Index because cudf.Index - # and cudf.as_index are expensive. As a result, this function is - # currently somewhat inconsistent in returning a dict of columns for - # the data while actually constructing the Index object here (instead - # of just returning a dict for that as well). As we clean up the - # Frame factories we may want to look for a less dissonant approach - # that does not impose performance penalties. - cudf.core.index._index_from_data( - { - name: columns[i] - for i, name in enumerate(index_names) - } - ) - if index_names is not None - else None - ) - n_index_columns = len(index_names) if index_names is not None else 0 - data = { - name: columns[i + n_index_columns] - for i, name in enumerate(column_names) - } - return data, index - - -cpdef data_from_pylibcudf_table(tbl, column_names, index_names=None): - return _data_from_columns( - columns_from_pylibcudf_table(tbl), - column_names, - index_names - ) - -cpdef data_from_pylibcudf_io(tbl_with_meta, column_names=None, index_names=None): - """ - Unpacks the TableWithMetadata from libcudf I/O - into a dict of columns and an Index (cuDF format) - """ - if column_names is None: - column_names = tbl_with_meta.column_names(include_children=False) - return _data_from_columns( - columns=[Column.from_pylibcudf(plc) for plc in tbl_with_meta.columns], - column_names=column_names, - index_names=index_names - ) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 9aadbf8f47a..8f45c6f0115 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -861,7 +861,9 @@ def _quantile_table( column_order, null_precedence, ) - columns = libcudf.utils.columns_from_pylibcudf_table(plc_table) + columns = [ + ColumnBase.from_pylibcudf(col) for col in plc_table.columns() + ] return self._from_columns_like_self( columns, column_names=self._column_names, diff --git a/python/cudf/cudf/io/avro.py b/python/cudf/cudf/io/avro.py index 4966cdb86e1..dcbdd4423fc 100644 --- a/python/cudf/cudf/io/avro.py +++ b/python/cudf/cudf/io/avro.py @@ -3,7 +3,7 @@ import pylibcudf as plc import cudf -from cudf._lib.utils import data_from_pylibcudf_io +from cudf._lib.column import Column from cudf.utils import ioutils @@ -46,5 +46,12 @@ def read_avro( options.set_columns(columns) plc_result = plc.io.avro.read_avro(options) - - return cudf.DataFrame._from_data(*data_from_pylibcudf_io(plc_result)) + data = { + name: Column.from_pylibcudf(col) + for name, col in zip( + plc_result.column_names(include_children=False), + plc_result.columns, + strict=True, + ) + } + return cudf.DataFrame._from_data(data) diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py index da9a66f3874..6d617cbf38e 100644 --- a/python/cudf/cudf/io/csv.py +++ b/python/cudf/cudf/io/csv.py @@ -15,8 +15,8 @@ import pylibcudf as plc import cudf +from cudf._lib.column import Column from cudf._lib.types import dtype_to_pylibcudf_type -from cudf._lib.utils import data_from_pylibcudf_io from cudf.api.types import is_hashable, is_scalar from cudf.core.buffer import acquire_spill_lock from cudf.utils import ioutils @@ -251,9 +251,17 @@ def read_csv( if na_values is not None: options.set_na_values([str(val) for val in na_values]) - df = cudf.DataFrame._from_data( - *data_from_pylibcudf_io(plc.io.csv.read_csv(options)) - ) + table_w_meta = plc.io.csv.read_csv(options) + data = { + name: Column.from_pylibcudf(col) + for name, col in zip( + table_w_meta.column_names(include_children=False), + table_w_meta.columns, + strict=True, + ) + } + + df = cudf.DataFrame._from_data(data) if isinstance(dtype, abc.Mapping): for k, v in dtype.items(): diff --git a/python/cudf/cudf/io/json.py b/python/cudf/cudf/io/json.py index 4be556e1d67..ff326e09315 100644 --- a/python/cudf/cudf/io/json.py +++ b/python/cudf/cudf/io/json.py @@ -15,7 +15,6 @@ import cudf from cudf._lib.column import Column from cudf._lib.types import dtype_to_pylibcudf_type -from cudf._lib.utils import _data_from_columns, data_from_pylibcudf_io from cudf.core.buffer import acquire_spill_lock from cudf.utils import ioutils from cudf.utils.dtypes import _maybe_convert_to_default_type @@ -178,13 +177,11 @@ def read_json( ) ) ) - df = cudf.DataFrame._from_data( - *_data_from_columns( - columns=[Column.from_pylibcudf(col) for col in res_cols], - column_names=res_col_names, - index_names=None, - ) - ) + data = { + name: Column.from_pylibcudf(col) + for name, col in zip(res_col_names, res_cols, strict=True) + } + df = cudf.DataFrame._from_data(data) ioutils._add_df_col_struct_names(df, res_child_names) return df else: @@ -207,10 +204,15 @@ def read_json( extra_parameters=kwargs, ) ) - - df = cudf.DataFrame._from_data( - *data_from_pylibcudf_io(table_w_meta) - ) + data = { + name: Column.from_pylibcudf(col) + for name, col in zip( + table_w_meta.column_names(include_children=False), + table_w_meta.columns, + strict=True, + ) + } + df = cudf.DataFrame._from_data(data) # Post-processing to add in struct column names ioutils._add_df_col_struct_names(df, table_w_meta.child_names) diff --git a/python/cudf/cudf/io/orc.py b/python/cudf/cudf/io/orc.py index 5103137bc77..f3124552fd1 100644 --- a/python/cudf/cudf/io/orc.py +++ b/python/cudf/cudf/io/orc.py @@ -10,10 +10,11 @@ import pylibcudf as plc import cudf +from cudf._lib.column import Column from cudf._lib.types import dtype_to_pylibcudf_type -from cudf._lib.utils import data_from_pylibcudf_io from cudf.api.types import is_list_like from cudf.core.buffer import acquire_spill_lock +from cudf.core.index import _index_from_data from cudf.utils import ioutils try: @@ -323,11 +324,35 @@ def read_orc( actual_index_names = list(index_col_names.values()) col_names = names[len(actual_index_names) :] - data, index = data_from_pylibcudf_io( - tbl_w_meta, - col_names if columns is None else names, - actual_index_names, - ) + result_col_names = col_names if columns is None else names + if actual_index_names is None: + index = None + data = { + name: Column.from_pylibcudf(col) + for name, col in zip( + result_col_names, tbl_w_meta.columns, strict=True + ) + } + else: + result_columns = [ + Column.from_pylibcudf(col) for col in tbl_w_meta.columns + ] + index = _index_from_data( + dict( + zip( + actual_index_names, + result_columns[: len(actual_index_names)], + strict=True, + ) + ) + ) + data = dict( + zip( + result_col_names, + result_columns[len(actual_index_names) :], + strict=True, + ) + ) if is_range_index: index = range_idx diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py index c13489630a3..feb6e12da8c 100644 --- a/python/cudf/cudf/io/parquet.py +++ b/python/cudf/cudf/io/parquet.py @@ -23,10 +23,6 @@ import cudf from cudf._lib.column import Column -from cudf._lib.utils import ( - _data_from_columns, - data_from_pylibcudf_io, -) from cudf.api.types import is_list_like from cudf.core.buffer import acquire_spill_lock from cudf.core.column import as_column, column_empty @@ -1238,16 +1234,11 @@ def _read_parquet( # Drop residual columns to save memory tbl._columns[i] = None - df = cudf.DataFrame._from_data( - *_data_from_columns( - columns=[ - Column.from_pylibcudf(plc) - for plc in concatenated_columns - ], - column_names=column_names, - index_names=None, - ) - ) + data = { + name: Column.from_pylibcudf(col) + for name, col in zip(column_names, concatenated_columns) + } + df = cudf.DataFrame._from_data(data) df = _process_metadata( df, column_names, @@ -1287,8 +1278,16 @@ def _read_parquet( options.set_filter(filters) tbl_w_meta = plc.io.parquet.read_parquet(options) + data = { + name: Column.from_pylibcudf(col) + for name, col in zip( + tbl_w_meta.column_names(include_children=False), + tbl_w_meta.columns, + strict=True, + ) + } - df = cudf.DataFrame._from_data(*data_from_pylibcudf_io(tbl_w_meta)) + df = cudf.DataFrame._from_data(data) df = _process_metadata( df, From 3add496e12b71c76cd8353c2c92c7cd64eb37fdb Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 20 Dec 2024 14:20:31 -0600 Subject: [PATCH 5/6] Use cuda-python `cuda.bindings` import names. (#17585) This PR updates cuDF to use the new cuda-python `cuda.bindings` layout. See https://github.com/rapidsai/build-planning/issues/117. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Matthew Roeschke (https://github.com/mroeschke) - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/cudf/pull/17585 --- python/cudf/cudf/core/udf/utils.py | 6 +++--- python/cudf/cudf/utils/gpu_utils.py | 2 +- python/cudf/pyproject.toml | 2 -- python/cudf_kafka/pyproject.toml | 2 -- python/cudf_polars/pyproject.toml | 2 -- python/custreamz/pyproject.toml | 2 -- python/dask_cudf/pyproject.toml | 2 -- python/pylibcudf/pylibcudf/utils.pyx | 10 +++++----- python/pylibcudf/pyproject.toml | 2 -- 9 files changed, 9 insertions(+), 21 deletions(-) diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py index bfe716f0afc..4bd5a1e7040 100644 --- a/python/cudf/cudf/core/udf/utils.py +++ b/python/cudf/cudf/core/udf/utils.py @@ -9,7 +9,7 @@ import cupy as cp import llvmlite.binding as ll import numpy as np -from cuda import cudart +from cuda.bindings import runtime from numba import cuda, typeof from numba.core.datamodel import default_manager, models from numba.core.errors import TypingError @@ -356,8 +356,8 @@ def set_malloc_heap_size(size=None): if size is None: size = _STRINGS_UDF_DEFAULT_HEAP_SIZE if size != _heap_size: - (ret,) = cudart.cudaDeviceSetLimit( - cudart.cudaLimit.cudaLimitMallocHeapSize, size + (ret,) = runtime.cudaDeviceSetLimit( + runtime.cudaLimit.cudaLimitMallocHeapSize, size ) if ret.value != 0: raise RuntimeError("Unable to set cudaMalloc heap size") diff --git a/python/cudf/cudf/utils/gpu_utils.py b/python/cudf/cudf/utils/gpu_utils.py index b5387ddeb5f..10ab3f6bb1e 100644 --- a/python/cudf/cudf/utils/gpu_utils.py +++ b/python/cudf/cudf/utils/gpu_utils.py @@ -15,7 +15,7 @@ def validate_setup(): import warnings - from cuda.cudart import cudaDeviceAttr, cudaError_t + from cuda.bindings.runtime import cudaDeviceAttr, cudaError_t from rmm._cuda.gpu import ( CUDARuntimeError, diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index 21c18ef0174..2fdf6b34b8f 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -98,8 +98,6 @@ filterwarnings = [ "error", "ignore:::.*xdist.*", "ignore:::.*pytest.*", - # https://github.com/rapidsai/build-planning/issues/116 - "ignore:.*cuda..* module is deprecated.*:DeprecationWarning", # some third-party dependencies (e.g. 'boto3') still using datetime.datetime.utcnow() "ignore:.*datetime.*utcnow.*scheduled for removal.*:DeprecationWarning:botocore", # Deprecation warning from Pyarrow Table.to_pandas() with pandas-2.2+ diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml index 29fcd161444..a9d937435e9 100644 --- a/python/cudf_kafka/pyproject.toml +++ b/python/cudf_kafka/pyproject.toml @@ -60,8 +60,6 @@ addopts = "--tb=native --strict-config --strict-markers" empty_parameter_set_mark = "fail_at_collect" filterwarnings = [ "error", - # https://github.com/rapidsai/build-planning/issues/116 - "ignore:.*cuda..* module is deprecated.*:DeprecationWarning", ] xfail_strict = true diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index b781b13ec10..5904942aea2 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -66,8 +66,6 @@ addopts = "--tb=native --strict-config --strict-markers" empty_parameter_set_mark = "fail_at_collect" filterwarnings = [ "error", - # https://github.com/rapidsai/build-planning/issues/116 - "ignore:.*cuda..* module is deprecated.*:DeprecationWarning", ] xfail_strict = true diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml index ed43ab83d53..7820157d89b 100644 --- a/python/custreamz/pyproject.toml +++ b/python/custreamz/pyproject.toml @@ -93,8 +93,6 @@ addopts = "--tb=native --strict-config --strict-markers" empty_parameter_set_mark = "fail_at_collect" filterwarnings = [ "error", - # https://github.com/rapidsai/build-planning/issues/116 - "ignore:.*cuda..* module is deprecated.*:DeprecationWarning", "ignore:unclosed Date: Fri, 20 Dec 2024 14:32:34 -0800 Subject: [PATCH 6/6] Use more pylibcudf.types instead of cudf._lib.types (#17619) Contributes to https://github.com/rapidsai/cudf/issues/17317 Primary change is to use `pylibcudf.TypeId` instead of an ad-hoc one defined in `cudf._lib.types`. Additionally uses pylibcudf more consistently and inlines/removes some seldom uses/dead code Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/17619 --- python/cudf/cudf/_lib/__init__.py | 7 - python/cudf/cudf/_lib/column.pyx | 10 +- python/cudf/cudf/_lib/scalar.pyx | 53 +++-- python/cudf/cudf/_lib/types.pxd | 5 - python/cudf/cudf/_lib/types.pyx | 225 +++++--------------- python/cudf/cudf/core/column/categorical.py | 4 +- python/cudf/cudf/core/column/column.py | 4 +- python/cudf/cudf/core/dtypes.py | 4 +- python/cudf/cudf/utils/dtypes.py | 2 +- 9 files changed, 87 insertions(+), 227 deletions(-) diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py index 10f9d813ccc..11473d60698 100644 --- a/python/cudf/cudf/_lib/__init__.py +++ b/python/cudf/cudf/_lib/__init__.py @@ -1,9 +1,2 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. -import numpy as np - from . import strings_udf - -MAX_COLUMN_SIZE = np.iinfo(np.int32).max -MAX_COLUMN_SIZE_STR = "INT32_MAX" -MAX_STRING_COLUMN_BYTES = np.iinfo(np.int32).max -MAX_STRING_COLUMN_BYTES_STR = "INT32_MAX" diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx index 245a5d03981..f7dcd89ea48 100644 --- a/python/cudf/cudf/_lib/column.pyx +++ b/python/cudf/cudf/_lib/column.pyx @@ -31,12 +31,12 @@ from rmm.pylibrmm.device_buffer cimport DeviceBuffer from cudf._lib.types cimport ( dtype_from_column_view, - dtype_to_data_type, dtype_to_pylibcudf_type, ) from cudf._lib.types import dtype_from_pylibcudf_column +from pylibcudf cimport DataType as plc_DataType cimport pylibcudf.libcudf.copying as cpp_copying cimport pylibcudf.libcudf.types as libcudf_types cimport pylibcudf.libcudf.unary as libcudf_unary @@ -361,7 +361,7 @@ cdef class Column: col = self data_dtype = col.dtype - cdef libcudf_types.data_type dtype = dtype_to_data_type(data_dtype) + cdef plc_DataType dtype = dtype_to_pylibcudf_type(data_dtype) cdef libcudf_types.size_type offset = self.offset cdef vector[mutable_column_view] children cdef void* data @@ -398,7 +398,7 @@ cdef class Column: self._data = None return mutable_column_view( - dtype, + dtype.c_obj, self.size, data, mask, @@ -424,7 +424,7 @@ cdef class Column: col = self data_dtype = col.dtype - cdef libcudf_types.data_type dtype = dtype_to_data_type(data_dtype) + cdef plc_DataType dtype = dtype_to_pylibcudf_type(data_dtype) cdef libcudf_types.size_type offset = self.offset cdef vector[column_view] children cdef void* data @@ -450,7 +450,7 @@ cdef class Column: cdef libcudf_types.size_type c_null_count = null_count return column_view( - dtype, + dtype.c_obj, self.size, data, mask, diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index 3d3bdd730a8..40bd50acf16 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -10,24 +10,22 @@ from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.utility cimport move -import pylibcudf +import pylibcudf as plc import cudf -from cudf._lib.types import LIBCUDF_TO_SUPPORTED_NUMPY_TYPES from cudf.core.dtypes import ListDtype, StructDtype +from cudf._lib.types import PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES +from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id from cudf.core.missing import NA, NaT -cimport pylibcudf.libcudf.types as libcudf_types # We currently need this cimport because some of the implementations here # access the c_obj of the scalar, and because we need to be able to call # pylibcudf.Scalar.from_libcudf. Both of those are temporarily acceptable until # DeviceScalar is phased out entirely from cuDF Cython (at which point # cudf.Scalar will be directly backed by pylibcudf.Scalar). -from pylibcudf cimport Scalar as plc_Scalar +from pylibcudf cimport Scalar as plc_Scalar, type_id as plc_TypeID from pylibcudf.libcudf.scalar.scalar cimport list_scalar, scalar, struct_scalar -from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id - def _replace_nested(obj, check, replacement): if isinstance(obj, list): @@ -62,12 +60,12 @@ def gather_metadata(dtypes): """ out = [] for name, dtype in dtypes.items(): - v = pylibcudf.interop.ColumnMetadata(name) + v = plc.interop.ColumnMetadata(name) if isinstance(dtype, cudf.StructDtype): v.children_meta = gather_metadata(dtype.fields) elif isinstance(dtype, cudf.ListDtype): # Offsets column is unnamed and has no children - v.children_meta.append(pylibcudf.interop.ColumnMetadata("")) + v.children_meta.append(plc.interop.ColumnMetadata("")) v.children_meta.extend( gather_metadata({"": dtype.element_type}) ) @@ -81,7 +79,7 @@ cdef class DeviceScalar: # that from_unique_ptr is implemented is probably dereferencing this in an # invalid state. See what the best way to fix that is. def __cinit__(self, *args, **kwargs): - self.c_value = pylibcudf.Scalar.__new__(pylibcudf.Scalar) + self.c_value = plc.Scalar.__new__(plc.Scalar) def __init__(self, value, dtype): """ @@ -127,20 +125,20 @@ cdef class DeviceScalar: pa_array = pa.array([pa.scalar(value, type=pa_type)]) pa_table = pa.Table.from_arrays([pa_array], names=[""]) - table = pylibcudf.interop.from_arrow(pa_table) + table = plc.interop.from_arrow(pa_table) column = table.columns()[0] if isinstance(dtype, cudf.core.dtypes.DecimalDtype): if isinstance(dtype, cudf.core.dtypes.Decimal32Dtype): - column = pylibcudf.unary.cast( - column, pylibcudf.DataType(pylibcudf.TypeId.DECIMAL32, -dtype.scale) + column = plc.unary.cast( + column, plc.DataType(plc.TypeId.DECIMAL32, -dtype.scale) ) elif isinstance(dtype, cudf.core.dtypes.Decimal64Dtype): - column = pylibcudf.unary.cast( - column, pylibcudf.DataType(pylibcudf.TypeId.DECIMAL64, -dtype.scale) + column = plc.unary.cast( + column, plc.DataType(plc.TypeId.DECIMAL64, -dtype.scale) ) - self.c_value = pylibcudf.copying.get_element(column, 0) + self.c_value = plc.copying.get_element(column, 0) self._dtype = dtype def _to_host_scalar(self): @@ -150,7 +148,7 @@ cdef class DeviceScalar: null_type = NaT if is_datetime or is_timedelta else NA metadata = gather_metadata({"": self.dtype})[0] - ps = pylibcudf.interop.to_arrow(self.c_value, metadata) + ps = plc.interop.to_arrow(self.c_value, metadata) if not ps.is_valid: return null_type @@ -225,34 +223,33 @@ cdef class DeviceScalar: return s cdef void _set_dtype(self, dtype=None): - cdef libcudf_types.data_type cdtype = self.get_raw_ptr()[0].type() - + cdef plc_TypeID cdtype_id = self.c_value.type().id() if dtype is not None: self._dtype = dtype - elif cdtype.id() in { - libcudf_types.type_id.DECIMAL32, - libcudf_types.type_id.DECIMAL64, - libcudf_types.type_id.DECIMAL128, + elif cdtype_id in { + plc_TypeID.DECIMAL32, + plc_TypeID.DECIMAL64, + plc_TypeID.DECIMAL128, }: raise TypeError( "Must pass a dtype when constructing from a fixed-point scalar" ) - elif cdtype.id() == libcudf_types.type_id.STRUCT: + elif cdtype_id == plc_TypeID.STRUCT: struct_table_view = (self.get_raw_ptr())[0].view() self._dtype = StructDtype({ str(i): dtype_from_column_view(struct_table_view.column(i)) for i in range(struct_table_view.num_columns()) }) - elif cdtype.id() == libcudf_types.type_id.LIST: + elif cdtype_id == plc_TypeID.LIST: if ( self.get_raw_ptr() - )[0].view().type().id() == libcudf_types.type_id.LIST: + )[0].view().type().id() == plc_TypeID.LIST: self._dtype = dtype_from_column_view( (self.get_raw_ptr())[0].view() ) else: self._dtype = ListDtype( - LIBCUDF_TO_SUPPORTED_NUMPY_TYPES[ + PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[ ( (self.get_raw_ptr())[0] .view().type().id() @@ -260,8 +257,8 @@ cdef class DeviceScalar: ] ) else: - self._dtype = LIBCUDF_TO_SUPPORTED_NUMPY_TYPES[ - (cdtype.id()) + self._dtype = PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[ + (cdtype_id) ] diff --git a/python/cudf/cudf/_lib/types.pxd b/python/cudf/cudf/_lib/types.pxd index c2b760490c1..18b1d26e4db 100644 --- a/python/cudf/cudf/_lib/types.pxd +++ b/python/cudf/cudf/_lib/types.pxd @@ -1,16 +1,11 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. from libc.stdint cimport int32_t -from libcpp cimport bool -cimport pylibcudf.libcudf.types as libcudf_types from pylibcudf.libcudf.column.column_view cimport column_view -from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view ctypedef int32_t underlying_type_t_type_id cdef dtype_from_column_view(column_view cv) -cdef libcudf_types.data_type dtype_to_data_type(dtype) except * cpdef dtype_to_pylibcudf_type(dtype) -cdef bool is_decimal_type_id(libcudf_types.type_id tid) except * diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx index f169ea12b10..777bd070b32 100644 --- a/python/cudf/cudf/_lib/types.pyx +++ b/python/cudf/cudf/_lib/types.pyx @@ -1,7 +1,5 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. -from enum import IntEnum - import numpy as np import pandas as pd @@ -11,138 +9,46 @@ cimport pylibcudf.libcudf.types as libcudf_types from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view -import pylibcudf +import pylibcudf as plc import cudf -class TypeId(IntEnum): - EMPTY = libcudf_types.type_id.EMPTY - INT8 = libcudf_types.type_id.INT8 - INT16 = libcudf_types.type_id.INT16 - INT32 = libcudf_types.type_id.INT32 - INT64 = libcudf_types.type_id.INT64 - UINT8 = libcudf_types.type_id.UINT8 - UINT16 = libcudf_types.type_id.UINT16 - UINT32 = libcudf_types.type_id.UINT32 - UINT64 = libcudf_types.type_id.UINT64 - FLOAT32 = libcudf_types.type_id.FLOAT32 - FLOAT64 = libcudf_types.type_id.FLOAT64 - BOOL8 = libcudf_types.type_id.BOOL8 - TIMESTAMP_DAYS = ( - libcudf_types.type_id.TIMESTAMP_DAYS - ) - TIMESTAMP_SECONDS = ( - libcudf_types.type_id.TIMESTAMP_SECONDS - ) - TIMESTAMP_MILLISECONDS = ( - ( - libcudf_types.type_id.TIMESTAMP_MILLISECONDS - ) - ) - TIMESTAMP_MICROSECONDS = ( - ( - libcudf_types.type_id.TIMESTAMP_MICROSECONDS - ) - ) - TIMESTAMP_NANOSECONDS = ( - libcudf_types.type_id.TIMESTAMP_NANOSECONDS - ) - DURATION_SECONDS = ( - libcudf_types.type_id.DURATION_SECONDS - ) - DURATION_MILLISECONDS = ( - libcudf_types.type_id.DURATION_MILLISECONDS - ) - DURATION_MICROSECONDS = ( - libcudf_types.type_id.DURATION_MICROSECONDS - ) - DURATION_NANOSECONDS = ( - libcudf_types.type_id.DURATION_NANOSECONDS - ) - STRING = libcudf_types.type_id.STRING - DECIMAL32 = libcudf_types.type_id.DECIMAL32 - DECIMAL64 = libcudf_types.type_id.DECIMAL64 - DECIMAL128 = libcudf_types.type_id.DECIMAL128 - STRUCT = libcudf_types.type_id.STRUCT - - -SUPPORTED_NUMPY_TO_LIBCUDF_TYPES = { - np.dtype("int8"): TypeId.INT8, - np.dtype("int16"): TypeId.INT16, - np.dtype("int32"): TypeId.INT32, - np.dtype("int64"): TypeId.INT64, - np.dtype("uint8"): TypeId.UINT8, - np.dtype("uint16"): TypeId.UINT16, - np.dtype("uint32"): TypeId.UINT32, - np.dtype("uint64"): TypeId.UINT64, - np.dtype("float32"): TypeId.FLOAT32, - np.dtype("float64"): TypeId.FLOAT64, - np.dtype("datetime64[s]"): TypeId.TIMESTAMP_SECONDS, - np.dtype("datetime64[ms]"): TypeId.TIMESTAMP_MILLISECONDS, - np.dtype("datetime64[us]"): TypeId.TIMESTAMP_MICROSECONDS, - np.dtype("datetime64[ns]"): TypeId.TIMESTAMP_NANOSECONDS, - np.dtype("object"): TypeId.STRING, - np.dtype("bool"): TypeId.BOOL8, - np.dtype("timedelta64[s]"): TypeId.DURATION_SECONDS, - np.dtype("timedelta64[ms]"): TypeId.DURATION_MILLISECONDS, - np.dtype("timedelta64[us]"): TypeId.DURATION_MICROSECONDS, - np.dtype("timedelta64[ns]"): TypeId.DURATION_NANOSECONDS, -} - SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES = { - k: pylibcudf.TypeId(v).value - for k, v in SUPPORTED_NUMPY_TO_LIBCUDF_TYPES.items() + np.dtype("int8"): plc.types.TypeId.INT8, + np.dtype("int16"): plc.types.TypeId.INT16, + np.dtype("int32"): plc.types.TypeId.INT32, + np.dtype("int64"): plc.types.TypeId.INT64, + np.dtype("uint8"): plc.types.TypeId.UINT8, + np.dtype("uint16"): plc.types.TypeId.UINT16, + np.dtype("uint32"): plc.types.TypeId.UINT32, + np.dtype("uint64"): plc.types.TypeId.UINT64, + np.dtype("float32"): plc.types.TypeId.FLOAT32, + np.dtype("float64"): plc.types.TypeId.FLOAT64, + np.dtype("datetime64[s]"): plc.types.TypeId.TIMESTAMP_SECONDS, + np.dtype("datetime64[ms]"): plc.types.TypeId.TIMESTAMP_MILLISECONDS, + np.dtype("datetime64[us]"): plc.types.TypeId.TIMESTAMP_MICROSECONDS, + np.dtype("datetime64[ns]"): plc.types.TypeId.TIMESTAMP_NANOSECONDS, + np.dtype("object"): plc.types.TypeId.STRING, + np.dtype("bool"): plc.types.TypeId.BOOL8, + np.dtype("timedelta64[s]"): plc.types.TypeId.DURATION_SECONDS, + np.dtype("timedelta64[ms]"): plc.types.TypeId.DURATION_MILLISECONDS, + np.dtype("timedelta64[us]"): plc.types.TypeId.DURATION_MICROSECONDS, + np.dtype("timedelta64[ns]"): plc.types.TypeId.DURATION_NANOSECONDS, } - -LIBCUDF_TO_SUPPORTED_NUMPY_TYPES = { - # There's no equivalent to EMPTY in cudf. We translate EMPTY - # columns from libcudf to ``int8`` columns of all nulls in Python. - # ``int8`` is chosen because it uses the least amount of memory. - TypeId.EMPTY: np.dtype("int8"), - TypeId.INT8: np.dtype("int8"), - TypeId.INT16: np.dtype("int16"), - TypeId.INT32: np.dtype("int32"), - TypeId.INT64: np.dtype("int64"), - TypeId.UINT8: np.dtype("uint8"), - TypeId.UINT16: np.dtype("uint16"), - TypeId.UINT32: np.dtype("uint32"), - TypeId.UINT64: np.dtype("uint64"), - TypeId.FLOAT32: np.dtype("float32"), - TypeId.FLOAT64: np.dtype("float64"), - TypeId.BOOL8: np.dtype("bool"), - TypeId.TIMESTAMP_SECONDS: np.dtype("datetime64[s]"), - TypeId.TIMESTAMP_MILLISECONDS: np.dtype("datetime64[ms]"), - TypeId.TIMESTAMP_MICROSECONDS: np.dtype("datetime64[us]"), - TypeId.TIMESTAMP_NANOSECONDS: np.dtype("datetime64[ns]"), - TypeId.DURATION_SECONDS: np.dtype("timedelta64[s]"), - TypeId.DURATION_MILLISECONDS: np.dtype("timedelta64[ms]"), - TypeId.DURATION_MICROSECONDS: np.dtype("timedelta64[us]"), - TypeId.DURATION_NANOSECONDS: np.dtype("timedelta64[ns]"), - TypeId.STRING: np.dtype("object"), - TypeId.STRUCT: np.dtype("object"), -} - PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES = { - pylibcudf.TypeId(k).value: v - for k, v in LIBCUDF_TO_SUPPORTED_NUMPY_TYPES.items() + plc_type: np_type + for np_type, plc_type in SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES.items() } +# There's no equivalent to EMPTY in cudf. We translate EMPTY +# columns from libcudf to ``int8`` columns of all nulls in Python. +# ``int8`` is chosen because it uses the least amount of memory. +PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.TypeId.EMPTY] = np.dtype("int8") +PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.TypeId.STRUCT] = np.dtype("object") +PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.TypeId.LIST] = np.dtype("object") -duration_unit_map = { - TypeId.DURATION_SECONDS: "s", - TypeId.DURATION_MILLISECONDS: "ms", - TypeId.DURATION_MICROSECONDS: "us", - TypeId.DURATION_NANOSECONDS: "ns" -} - -datetime_unit_map = { - TypeId.TIMESTAMP_SECONDS: "s", - TypeId.TIMESTAMP_MILLISECONDS: "ms", - TypeId.TIMESTAMP_MICROSECONDS: "us", - TypeId.TIMESTAMP_NANOSECONDS: "ns", -} -size_type_dtype = LIBCUDF_TO_SUPPORTED_NUMPY_TYPES[pylibcudf.types.SIZE_TYPE_ID] +size_type_dtype = PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.SIZE_TYPE_ID] cdef dtype_from_lists_column_view(column_view cv): @@ -190,71 +96,40 @@ cdef dtype_from_column_view(column_view cv): scale=-cv.type().scale() ) else: - return LIBCUDF_TO_SUPPORTED_NUMPY_TYPES[ + return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[ (tid) ] -cdef libcudf_types.data_type dtype_to_data_type(dtype) except *: - # Note: This function is to be phased out in favor of - # dtype_to_pylibcudf_type which will return a pylibcudf - # DataType object - cdef libcudf_types.type_id tid - if isinstance(dtype, cudf.ListDtype): - tid = libcudf_types.type_id.LIST - elif isinstance(dtype, cudf.StructDtype): - tid = libcudf_types.type_id.STRUCT - elif isinstance(dtype, cudf.Decimal128Dtype): - tid = libcudf_types.type_id.DECIMAL128 - elif isinstance(dtype, cudf.Decimal64Dtype): - tid = libcudf_types.type_id.DECIMAL64 - elif isinstance(dtype, cudf.Decimal32Dtype): - tid = libcudf_types.type_id.DECIMAL32 - else: - tid = ( - ( - SUPPORTED_NUMPY_TO_LIBCUDF_TYPES[np.dtype(dtype)])) - - if is_decimal_type_id(tid): - return libcudf_types.data_type(tid, -dtype.scale) - else: - return libcudf_types.data_type(tid) cpdef dtype_to_pylibcudf_type(dtype): if isinstance(dtype, cudf.ListDtype): - return pylibcudf.DataType(pylibcudf.TypeId.LIST) + return plc.DataType(plc.TypeId.LIST) elif isinstance(dtype, cudf.StructDtype): - return pylibcudf.DataType(pylibcudf.TypeId.STRUCT) + return plc.DataType(plc.TypeId.STRUCT) elif isinstance(dtype, cudf.Decimal128Dtype): - tid = pylibcudf.TypeId.DECIMAL128 - return pylibcudf.DataType(tid, -dtype.scale) + tid = plc.TypeId.DECIMAL128 + return plc.DataType(tid, -dtype.scale) elif isinstance(dtype, cudf.Decimal64Dtype): - tid = pylibcudf.TypeId.DECIMAL64 - return pylibcudf.DataType(tid, -dtype.scale) + tid = plc.TypeId.DECIMAL64 + return plc.DataType(tid, -dtype.scale) elif isinstance(dtype, cudf.Decimal32Dtype): - tid = pylibcudf.TypeId.DECIMAL32 - return pylibcudf.DataType(tid, -dtype.scale) - # libcudf types don't support localization so convert to the base type + tid = plc.TypeId.DECIMAL32 + return plc.DataType(tid, -dtype.scale) + # libcudf types don't support timezones so convert to the base type elif isinstance(dtype, pd.DatetimeTZDtype): dtype = np.dtype(f"(tid) - ] + return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[tid] diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index d9b54008e85..b10b8dfe207 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -1192,10 +1192,10 @@ def _concat( codes = [o.codes for o in objs] newsize = sum(map(len, codes)) - if newsize > libcudf.MAX_COLUMN_SIZE: + if newsize > np.iinfo(libcudf.types.size_type_dtype).max: raise MemoryError( f"Result of concat cannot have " - f"size > {libcudf.MAX_COLUMN_SIZE_STR}" + f"size > {libcudf.types.size_type_dtype}_MAX" ) elif newsize == 0: codes_col = column.column_empty(0, head.codes.dtype) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 75b9070b53f..31efe267c96 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -2537,10 +2537,10 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase: ) newsize = sum(map(len, objs)) - if newsize > libcudf.MAX_COLUMN_SIZE: + if newsize > np.iinfo(libcudf.types.size_type_dtype).max: raise MemoryError( f"Result of concat cannot have " - f"size > {libcudf.MAX_COLUMN_SIZE_STR}" + f"size > {libcudf.types.size_type_dtype}_MAX" ) elif newsize == 0: return column_empty(0, head.dtype) diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py index 971f0be77f8..8ed233ba737 100644 --- a/python/cudf/cudf/core/dtypes.py +++ b/python/cudf/cudf/core/dtypes.py @@ -56,7 +56,9 @@ def dtype(arbitrary): else: if np_dtype.kind in set("OU"): return np.dtype("object") - elif np_dtype not in cudf._lib.types.SUPPORTED_NUMPY_TO_LIBCUDF_TYPES: + elif ( + np_dtype not in cudf._lib.types.SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES + ): raise TypeError(f"Unsupported type {np_dtype}") return np_dtype diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index 57bf08e6eec..ca8f9cac2d0 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -151,7 +151,7 @@ def cudf_dtype_from_pydata_dtype(dtype): return cudf.core.dtypes.Decimal64Dtype elif cudf.api.types.is_decimal128_dtype(dtype): return cudf.core.dtypes.Decimal128Dtype - elif dtype in cudf._lib.types.SUPPORTED_NUMPY_TO_LIBCUDF_TYPES: + elif dtype in cudf._lib.types.SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES: return dtype.type return infer_dtype_from_object(dtype)