From 692ea24357d32b1242c476f0ed33498c815ac921 Mon Sep 17 00:00:00 2001 From: Devin D'Angelo Date: Sat, 30 Sep 2023 01:22:52 -0400 Subject: [PATCH] Update Default Parquet Write Compression (#7692) * update compression default * fix tests --------- Co-authored-by: Andrew Lamb --- datafusion/common/src/config.rs | 2 +- datafusion/sqllogictest/test_files/information_schema.slt | 2 +- docs/source/user-guide/configs.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index b34c64ff8893..261c2bf435a4 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -307,7 +307,7 @@ config_namespace! { /// lzo, brotli(level), lz4, zstd(level), and lz4_raw. /// These values are not case sensitive. If NULL, uses /// default parquet writer setting - pub compression: Option, default = None + pub compression: Option, default = Some("zstd(3)".into()) /// Sets if dictionary encoding is enabled. If NULL, uses /// default parquet writer setting diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index f90901021637..12aa9089a0c9 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -156,7 +156,7 @@ datafusion.execution.parquet.bloom_filter_enabled false datafusion.execution.parquet.bloom_filter_fpp NULL datafusion.execution.parquet.bloom_filter_ndv NULL datafusion.execution.parquet.column_index_truncate_length NULL -datafusion.execution.parquet.compression NULL +datafusion.execution.parquet.compression zstd(3) datafusion.execution.parquet.created_by datafusion datafusion.execution.parquet.data_page_row_count_limit 18446744073709551615 datafusion.execution.parquet.data_pagesize_limit 1048576 diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 7fe229b4d3c6..638ac5a36b83 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -58,7 +58,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.execution.parquet.data_pagesize_limit | 1048576 | Sets best effort maximum size of data page in bytes | | datafusion.execution.parquet.write_batch_size | 1024 | Sets write_batch_size in bytes | | datafusion.execution.parquet.writer_version | 1.0 | Sets parquet writer version valid values are "1.0" and "2.0" | -| datafusion.execution.parquet.compression | NULL | Sets default parquet compression codec Valid values are: uncompressed, snappy, gzip(level), lzo, brotli(level), lz4, zstd(level), and lz4_raw. These values are not case sensitive. If NULL, uses default parquet writer setting | +| datafusion.execution.parquet.compression | zstd(3) | Sets default parquet compression codec Valid values are: uncompressed, snappy, gzip(level), lzo, brotli(level), lz4, zstd(level), and lz4_raw. These values are not case sensitive. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.dictionary_enabled | NULL | Sets if dictionary encoding is enabled. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.dictionary_page_size_limit | 1048576 | Sets best effort maximum dictionary page size, in bytes | | datafusion.execution.parquet.statistics_enabled | NULL | Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting |