Skip to content

Commit

Permalink
Add dataset split update + list methods (#857)
Browse files Browse the repository at this point in the history
  • Loading branch information
hinthornw authored Jul 15, 2024
1 parent 86e3701 commit b51f6e6
Show file tree
Hide file tree
Showing 4 changed files with 170 additions and 3 deletions.
4 changes: 2 additions & 2 deletions js/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "langsmith",
"version": "0.1.36",
"version": "0.1.37",
"description": "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform.",
"packageManager": "[email protected]",
"files": [
Expand Down Expand Up @@ -261,4 +261,4 @@
},
"./package.json": "./package.json"
}
}
}
91 changes: 91 additions & 0 deletions js/src/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2300,6 +2300,97 @@ export class Client {
return result;
}

public async listDatasetSplits({
datasetId,
datasetName,
asOf,
}: {
datasetId?: string;
datasetName?: string;
asOf?: string | Date;
}): Promise<string[]> {
let datasetId_: string;
if (datasetId === undefined && datasetName === undefined) {
throw new Error("Must provide dataset name or ID");
} else if (datasetId !== undefined && datasetName !== undefined) {
throw new Error("Must provide either datasetName or datasetId, not both");
} else if (datasetId === undefined) {
const dataset = await this.readDataset({ datasetName });
datasetId_ = dataset.id;
} else {
datasetId_ = datasetId;
}

assertUuid(datasetId_);

const params = new URLSearchParams();
const dataset_version = asOf
? typeof asOf === "string"
? asOf
: asOf?.toISOString()
: undefined;
if (dataset_version) {
params.append("as_of", dataset_version);
}

const response = await this._get<string[]>(
`/datasets/${datasetId_}/splits`,
params
);
return response;
}

public async updateDatasetSplits({
datasetId,
datasetName,
splitName,
exampleIds,
remove = false,
}: {
datasetId?: string;
datasetName?: string;
splitName: string;
exampleIds: string[];
remove?: boolean;
}): Promise<void> {
let datasetId_: string;
if (datasetId === undefined && datasetName === undefined) {
throw new Error("Must provide dataset name or ID");
} else if (datasetId !== undefined && datasetName !== undefined) {
throw new Error("Must provide either datasetName or datasetId, not both");
} else if (datasetId === undefined) {
const dataset = await this.readDataset({ datasetName });
datasetId_ = dataset.id;
} else {
datasetId_ = datasetId;
}

assertUuid(datasetId_);

const data = {
split_name: splitName,
examples: exampleIds.map((id) => {
assertUuid(id);
return id;
}),
remove,
};

const response = await this.caller.call(
fetch,
`${this.apiUrl}/datasets/${datasetId_}/splits`,
{
method: "PUT",
headers: { ...this.headers, "Content-Type": "application/json" },
body: JSON.stringify(data),
signal: AbortSignal.timeout(this.timeout_ms),
...this.fetchOptions,
}
);

await raiseForStatus(response, "update dataset splits");
}

/**
* @deprecated This method is deprecated and will be removed in future LangSmith versions, use `evaluate` from `langsmith/evaluation` instead.
*/
Expand Down
2 changes: 1 addition & 1 deletion js/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ export type {
export { RunTree, type RunTreeConfig } from "./run_trees.js";

// Update using yarn bump-version
export const __version__ = "0.1.36";
export const __version__ = "0.1.37";
76 changes: 76 additions & 0 deletions python/langsmith/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3334,6 +3334,82 @@ def delete_example(self, example_id: ID_TYPE) -> None:
)
ls_utils.raise_for_status_with_text(response)

def list_dataset_splits(
self,
*,
dataset_id: Optional[ID_TYPE] = None,
dataset_name: Optional[str] = None,
as_of: Optional[Union[str, datetime.datetime]] = None,
) -> List[str]:
"""Get the splits for a dataset.
Args:
dataset_id (ID_TYPE): The ID of the dataset.
as_of (Optional[Union[str, datetime.datetime]], optional): The version
of the dataset to retrieve splits for. Can be a timestamp or a
string tag. Defaults to "latest".
Returns:
List[str]: The names of this dataset's.
"""
if dataset_id is None:
if dataset_name is None:
raise ValueError("Must provide dataset name or ID")
dataset_id = self.read_dataset(dataset_name=dataset_name).id
params = {}
if as_of is not None:
params["as_of"] = (
as_of.isoformat() if isinstance(as_of, datetime.datetime) else as_of
)

response = self.request_with_retries(
"GET",
f"/datasets/{_as_uuid(dataset_id, 'dataset_id')}/splits",
params=params,
)
ls_utils.raise_for_status_with_text(response)
return response.json()

def update_dataset_splits(
self,
*,
dataset_id: Optional[ID_TYPE] = None,
dataset_name: Optional[str] = None,
split_name: str,
example_ids: List[ID_TYPE],
remove: bool = False,
) -> None:
"""Update the splits for a dataset.
Args:
dataset_id (ID_TYPE): The ID of the dataset to update.
split_name (str): The name of the split to update.
example_ids (List[ID_TYPE]): The IDs of the examples to add to or
remove from the split.
remove (bool, optional): If True, remove the examples from the split.
If False, add the examples to the split. Defaults to False.
Returns:
None
"""
if dataset_id is None:
if dataset_name is None:
raise ValueError("Must provide dataset name or ID")
dataset_id = self.read_dataset(dataset_name=dataset_name).id
data = {
"split_name": split_name,
"examples": [
str(_as_uuid(id_, f"example_ids[{i}]"))
for i, id_ in enumerate(example_ids)
],
"remove": remove,
}

response = self.request_with_retries(
"PUT", f"/datasets/{_as_uuid(dataset_id, 'dataset_id')}/splits", json=data
)
ls_utils.raise_for_status_with_text(response)

def _resolve_run_id(
self,
run: Union[ls_schemas.Run, ls_schemas.RunBase, str, uuid.UUID],
Expand Down

0 comments on commit b51f6e6

Please sign in to comment.