From b51f6e63cc6be9a8312af34e95dcc6a698eb4629 Mon Sep 17 00:00:00 2001 From: William FH <13333726+hinthornw@users.noreply.github.com> Date: Mon, 15 Jul 2024 13:35:17 -0700 Subject: [PATCH] Add dataset split update + list methods (#857) --- js/package.json | 4 +- js/src/client.ts | 91 ++++++++++++++++++++++++++++++++++++++ js/src/index.ts | 2 +- python/langsmith/client.py | 76 +++++++++++++++++++++++++++++++ 4 files changed, 170 insertions(+), 3 deletions(-) diff --git a/js/package.json b/js/package.json index eee94a13a..f6bb02d18 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "langsmith", - "version": "0.1.36", + "version": "0.1.37", "description": "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform.", "packageManager": "yarn@1.22.19", "files": [ @@ -261,4 +261,4 @@ }, "./package.json": "./package.json" } -} +} \ No newline at end of file diff --git a/js/src/client.ts b/js/src/client.ts index 0f44d5124..6cba0c43b 100644 --- a/js/src/client.ts +++ b/js/src/client.ts @@ -2300,6 +2300,97 @@ export class Client { return result; } + public async listDatasetSplits({ + datasetId, + datasetName, + asOf, + }: { + datasetId?: string; + datasetName?: string; + asOf?: string | Date; + }): Promise { + let datasetId_: string; + if (datasetId === undefined && datasetName === undefined) { + throw new Error("Must provide dataset name or ID"); + } else if (datasetId !== undefined && datasetName !== undefined) { + throw new Error("Must provide either datasetName or datasetId, not both"); + } else if (datasetId === undefined) { + const dataset = await this.readDataset({ datasetName }); + datasetId_ = dataset.id; + } else { + datasetId_ = datasetId; + } + + assertUuid(datasetId_); + + const params = new URLSearchParams(); + const dataset_version = asOf + ? typeof asOf === "string" + ? asOf + : asOf?.toISOString() + : undefined; + if (dataset_version) { + params.append("as_of", dataset_version); + } + + const response = await this._get( + `/datasets/${datasetId_}/splits`, + params + ); + return response; + } + + public async updateDatasetSplits({ + datasetId, + datasetName, + splitName, + exampleIds, + remove = false, + }: { + datasetId?: string; + datasetName?: string; + splitName: string; + exampleIds: string[]; + remove?: boolean; + }): Promise { + let datasetId_: string; + if (datasetId === undefined && datasetName === undefined) { + throw new Error("Must provide dataset name or ID"); + } else if (datasetId !== undefined && datasetName !== undefined) { + throw new Error("Must provide either datasetName or datasetId, not both"); + } else if (datasetId === undefined) { + const dataset = await this.readDataset({ datasetName }); + datasetId_ = dataset.id; + } else { + datasetId_ = datasetId; + } + + assertUuid(datasetId_); + + const data = { + split_name: splitName, + examples: exampleIds.map((id) => { + assertUuid(id); + return id; + }), + remove, + }; + + const response = await this.caller.call( + fetch, + `${this.apiUrl}/datasets/${datasetId_}/splits`, + { + method: "PUT", + headers: { ...this.headers, "Content-Type": "application/json" }, + body: JSON.stringify(data), + signal: AbortSignal.timeout(this.timeout_ms), + ...this.fetchOptions, + } + ); + + await raiseForStatus(response, "update dataset splits"); + } + /** * @deprecated This method is deprecated and will be removed in future LangSmith versions, use `evaluate` from `langsmith/evaluation` instead. */ diff --git a/js/src/index.ts b/js/src/index.ts index 575faa25a..429988932 100644 --- a/js/src/index.ts +++ b/js/src/index.ts @@ -12,4 +12,4 @@ export type { export { RunTree, type RunTreeConfig } from "./run_trees.js"; // Update using yarn bump-version -export const __version__ = "0.1.36"; +export const __version__ = "0.1.37"; diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 2b7647cf6..951c7407c 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3334,6 +3334,82 @@ def delete_example(self, example_id: ID_TYPE) -> None: ) ls_utils.raise_for_status_with_text(response) + def list_dataset_splits( + self, + *, + dataset_id: Optional[ID_TYPE] = None, + dataset_name: Optional[str] = None, + as_of: Optional[Union[str, datetime.datetime]] = None, + ) -> List[str]: + """Get the splits for a dataset. + + Args: + dataset_id (ID_TYPE): The ID of the dataset. + as_of (Optional[Union[str, datetime.datetime]], optional): The version + of the dataset to retrieve splits for. Can be a timestamp or a + string tag. Defaults to "latest". + + Returns: + List[str]: The names of this dataset's. + """ + if dataset_id is None: + if dataset_name is None: + raise ValueError("Must provide dataset name or ID") + dataset_id = self.read_dataset(dataset_name=dataset_name).id + params = {} + if as_of is not None: + params["as_of"] = ( + as_of.isoformat() if isinstance(as_of, datetime.datetime) else as_of + ) + + response = self.request_with_retries( + "GET", + f"/datasets/{_as_uuid(dataset_id, 'dataset_id')}/splits", + params=params, + ) + ls_utils.raise_for_status_with_text(response) + return response.json() + + def update_dataset_splits( + self, + *, + dataset_id: Optional[ID_TYPE] = None, + dataset_name: Optional[str] = None, + split_name: str, + example_ids: List[ID_TYPE], + remove: bool = False, + ) -> None: + """Update the splits for a dataset. + + Args: + dataset_id (ID_TYPE): The ID of the dataset to update. + split_name (str): The name of the split to update. + example_ids (List[ID_TYPE]): The IDs of the examples to add to or + remove from the split. + remove (bool, optional): If True, remove the examples from the split. + If False, add the examples to the split. Defaults to False. + + Returns: + None + """ + if dataset_id is None: + if dataset_name is None: + raise ValueError("Must provide dataset name or ID") + dataset_id = self.read_dataset(dataset_name=dataset_name).id + data = { + "split_name": split_name, + "examples": [ + str(_as_uuid(id_, f"example_ids[{i}]")) + for i, id_ in enumerate(example_ids) + ], + "remove": remove, + } + + response = self.request_with_retries( + "PUT", f"/datasets/{_as_uuid(dataset_id, 'dataset_id')}/splits", json=data + ) + ls_utils.raise_for_status_with_text(response) + def _resolve_run_id( self, run: Union[ls_schemas.Run, ls_schemas.RunBase, str, uuid.UUID],