diff --git a/nemo_curator/utils/distributed_utils.py b/nemo_curator/utils/distributed_utils.py index a9d792b4..8adc982b 100644 --- a/nemo_curator/utils/distributed_utils.py +++ b/nemo_curator/utils/distributed_utils.py @@ -485,7 +485,6 @@ def read_data_files_per_partition( columns: Optional[List[str]] = None, **kwargs, ) -> dd.DataFrame: - input_files = sorted(input_files) if files_per_partition > 1: input_files = [ input_files[i : i + files_per_partition]