Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nvCOMP GZIP integration #16770

Merged
merged 19 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/include/cudf/io/nvcomp_adapter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
namespace CUDF_EXPORT cudf {
namespace io::nvcomp {

enum class compression_type { SNAPPY, ZSTD, DEFLATE, LZ4 };
enum class compression_type { SNAPPY, ZSTD, DEFLATE, LZ4, GZIP };

/**
* @brief Set of parameters that impact whether nvCOMP features are enabled.
Expand Down
14 changes: 11 additions & 3 deletions cpp/src/io/comp/nvcomp_adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <cudf/utilities/error.hpp>

#include <nvcomp/deflate.h>
#include <nvcomp/gzip.h>
#include <nvcomp/lz4.h>
#include <nvcomp/snappy.h>
#include <nvcomp/zstd.h>
Expand All @@ -44,6 +45,8 @@ auto batched_decompress_get_temp_size_ex(compression_type compression, Args&&...
return nvcompBatchedLZ4DecompressGetTempSizeEx(std::forward<Args>(args)...);
case compression_type::DEFLATE:
return nvcompBatchedDeflateDecompressGetTempSizeEx(std::forward<Args>(args)...);
case compression_type::GZIP:
return nvcompBatchedGzipDecompressGetTempSizeEx(std::forward<Args>(args)...);
default: CUDF_FAIL("Unsupported compression type");
}
}
Expand Down Expand Up @@ -73,6 +76,8 @@ auto batched_decompress_async(compression_type compression, Args&&... args)
case compression_type::DEFLATE:
return nvcompBatchedDeflateDecompressAsync(std::forward<Args>(args)...);
case compression_type::LZ4: return nvcompBatchedLZ4DecompressAsync(std::forward<Args>(args)...);
case compression_type::GZIP:
return nvcompBatchedGzipDecompressAsync(std::forward<Args>(args)...);
default: CUDF_FAIL("Unsupported compression type");
}
}
Expand All @@ -84,6 +89,7 @@ std::string compression_type_name(compression_type compression)
case compression_type::ZSTD: return "Zstandard";
case compression_type::DEFLATE: return "Deflate";
case compression_type::LZ4: return "LZ4";
case compression_type::GZIP: return "GZIP";
}
return "compression_type(" + std::to_string(static_cast<int>(compression)) + ")";
}
Expand Down Expand Up @@ -359,8 +365,8 @@ std::optional<std::string> is_compression_disabled_impl(compression_type compres
return "nvCOMP use is disabled through the `LIBCUDF_NVCOMP_POLICY` environment variable.";
}
return std::nullopt;
default: return "Unsupported compression type";
}
return "Unsupported compression type";
}

std::optional<std::string> is_compression_disabled(compression_type compression,
Expand Down Expand Up @@ -396,7 +402,8 @@ std::optional<std::string> is_decompression_disabled_impl(compression_type compr
feature_status_parameters params)
{
switch (compression) {
case compression_type::DEFLATE: {
case compression_type::DEFLATE:
case compression_type::GZIP: {
if (not params.are_all_integrations_enabled) {
return "DEFLATE decompression is experimental, you can enable it through "
"`LIBCUDF_NVCOMP_POLICY` environment variable.";
Expand Down Expand Up @@ -447,6 +454,7 @@ std::optional<std::string> is_decompression_disabled(compression_type compressio
size_t required_alignment(compression_type compression)
{
switch (compression) {
case compression_type::GZIP:
Copy link
Contributor Author

@vuule vuule Sep 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nvCOMP omitted nvcompGzipRequiredAlignment, so I could not follow the pattern we use for other formats.
We can return the same value as deflate because it's the same compression format; the only difference is an additional header in GZIP.
The alternative would be to use a hard-coded number.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does this require the same alignment as DEFLATE instead of its own alignment?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#16770 (comment)
we posted at the same time :)

updated that comment to provide context

case compression_type::DEFLATE: return nvcompDeflateRequiredAlignment;
case compression_type::SNAPPY: return nvcompSnappyRequiredAlignment;
case compression_type::ZSTD: return nvcompZstdRequiredAlignment;
Expand All @@ -462,7 +470,7 @@ std::optional<size_t> compress_max_allowed_chunk_size(compression_type compressi
case compression_type::SNAPPY: return nvcompSnappyCompressionMaxAllowedChunkSize;
case compression_type::ZSTD: return nvcompZstdCompressionMaxAllowedChunkSize;
case compression_type::LZ4: return nvcompLZ4CompressionMaxAllowedChunkSize;
default: return std::nullopt;
default: CUDF_FAIL("Unsupported compression type");
}
}

Expand Down
14 changes: 12 additions & 2 deletions cpp/src/io/parquet/reader_impl_chunking.cu
Original file line number Diff line number Diff line change
Expand Up @@ -865,8 +865,18 @@ std::vector<row_range> compute_page_splits_by_row(device_span<cumulative_page_in

switch (codec.compression_type) {
case GZIP:
gpuinflate(
d_comp_in_view, d_comp_out_view, d_comp_res_view, gzip_header_included::YES, stream);
if (cudf::io::nvcomp_integration::is_all_enabled()) {
nvcomp::batched_decompress(nvcomp::compression_type::GZIP,
d_comp_in_view,
d_comp_out_view,
d_comp_res_view,
codec.max_decompressed_size,
codec.total_decomp_size,
stream);
} else {
gpuinflate(
d_comp_in_view, d_comp_out_view, d_comp_res_view, gzip_header_included::YES, stream);
}
break;
case SNAPPY:
if (cudf::io::nvcomp_integration::is_stable_enabled()) {
Expand Down
6 changes: 5 additions & 1 deletion docs/cudf/source/user_guide/io/io.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ IO format.

</div>


**Notes:**

- \[¹\] - Not all orientations are GPU-accelerated.
Expand Down Expand Up @@ -177,4 +176,9 @@ If no value is set, behavior will be the same as the "STABLE" option.
+-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+
| DEFLATE | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | Experimental | Experimental | ❌ |
+-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+
| LZ4 | ❌ | ❌ | Stable | Stable | ❌ | ❌ | Stable | Stable | ❌ |
+-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+
| GZIP | ❌ | ❌ | Experimental | Experimental | ❌ | ❌ | ❌ | ❌ | ❌ |
+-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+

```
Loading