diff --git a/dspy/datasets/dataloader.py b/dspy/datasets/dataloader.py index c9da0270d..1e9ba1a78 100644 --- a/dspy/datasets/dataloader.py +++ b/dspy/datasets/dataloader.py @@ -62,7 +62,14 @@ def from_json(self, file_path:str, fields: List[str] = None, input_keys: Tuple[s return [dspy.Example({field:row[field] for field in fields}).with_inputs(*input_keys) for row in dataset] + def from_parquet(self, file_path: str, fields: List[str] = None, input_keys: Tuple[str] = ()) -> List[dspy.Example]: + dataset = load_dataset("parquet", data_files=file_path)["train"] + if not fields: + fields = list(dataset.features) + + return [dspy.Example({field: row[field] for field in fields}).with_inputs(input_keys) for row in dataset] + def sample( self, dataset: List[dspy.Example], @@ -110,4 +117,4 @@ def train_test_split( train_dataset = dataset_shuffled[:train_end] test_dataset = dataset_shuffled[train_end:train_end + test_end] - return {'train': train_dataset, 'test': test_dataset} + return {'train': train_dataset, 'test': test_dataset} \ No newline at end of file