Skip to content

Commit

Permalink
remove blocksize and aggregate_files handling
Browse files Browse the repository at this point in the history
  • Loading branch information
rjzamora committed Sep 24, 2024
1 parent 8d87c54 commit 8cfe71e
Showing 1 changed file with 24 additions and 32 deletions.
56 changes: 24 additions & 32 deletions python/dask_cudf/dask_cudf/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -807,42 +807,34 @@ def read_parquet(path, filesystem="fsspec", engine=None, **kwargs):
raise NotImplementedError(
"parquet_file_extension is not supported when using the pyarrow filesystem."
)
if blocksize == "default":
blocksize = "256 MiB"
if isinstance(aggregate_files, str):
if blocksize is not None and blocksize != "default":
raise NotImplementedError(
f"aggregate_files={aggregate_files} is not supported when using the pyarrow filesystem."
"blocksize is not supported when using the pyarrow filesystem."
)
min_partition_size = config.get(
"dataframe.parquet.minimum-partition-size"
)
if aggregate_files:
# Use "minimum-partition-size" config to control file aggregation
min_partition_size = blocksize

with config.set(
{
"dataframe.parquet.minimum-partition-size": min_partition_size
}
):
return dx.new_collection(
CudfReadParquetPyarrowFS(
path,
columns=dx._util._convert_to_list(columns),
filters=filters,
categories=categories,
index=index,
calculate_divisions=calculate_divisions,
storage_options=storage_options,
filesystem=filesystem,
ignore_metadata_file=ignore_metadata_file,
arrow_to_pandas=arrow_to_pandas,
pyarrow_strings_enabled=pyarrow_strings_enabled(),
kwargs=kwargs,
_series=isinstance(columns, str),
)
if aggregate_files is not None:
raise NotImplementedError(
"aggregate_files is not supported when using the pyarrow filesystem. "
"Please use the 'dataframe.parquet.minimum-partition-size' config."
)

return dx.new_collection(
CudfReadParquetPyarrowFS(
path,
columns=dx._util._convert_to_list(columns),
filters=filters,
categories=categories,
index=index,
calculate_divisions=calculate_divisions,
storage_options=storage_options,
filesystem=filesystem,
ignore_metadata_file=ignore_metadata_file,
arrow_to_pandas=arrow_to_pandas,
pyarrow_strings_enabled=pyarrow_strings_enabled(),
kwargs=kwargs,
_series=isinstance(columns, str),
)
)

@staticmethod
def read_csv(
path,
Expand Down

0 comments on commit 8cfe71e

Please sign in to comment.