diff --git a/dask/dataframe/io/parquet/utils.py b/dask/dataframe/io/parquet/utils.py index 1344902af03..5e714322dad 100644 --- a/dask/dataframe/io/parquet/utils.py +++ b/dask/dataframe/io/parquet/utils.py @@ -174,7 +174,7 @@ def read_metadata( @classmethod def default_blocksize(cls): - return "128 MiB" + return "256 MiB" @classmethod def read_partition( @@ -907,7 +907,7 @@ def _infer_split_row_groups(row_group_sizes, blocksize, aggregate_files=False): # Use blocksize to choose an appropriate split_row_groups value if row_group_sizes: blocksize = parse_bytes(blocksize) - if aggregate_files or np.sum(row_group_sizes) > blocksize: + if aggregate_files or np.sum(row_group_sizes) > 2 * blocksize: # If we are aggregating files, or the file is larger # than `blocksize`, set split_row_groups to "adaptive" return "adaptive"