Skip to content

Commit

Permalink
feat(datasets): add support for bulk updating examples (#884)
Browse files Browse the repository at this point in the history
Do not land until the langsmith pr hits prod
  • Loading branch information
samnoyes authored Jul 18, 2024
2 parents 1a69df4 + 5a04c37 commit fa06b91
Show file tree
Hide file tree
Showing 8 changed files with 165 additions and 5 deletions.
2 changes: 1 addition & 1 deletion js/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "langsmith",
"version": "0.1.37",
"version": "0.1.38",
"description": "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform.",
"packageManager": "[email protected]",
"files": [
Expand Down
22 changes: 22 additions & 0 deletions js/src/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {
Example,
ExampleCreate,
ExampleUpdate,
ExampleUpdateWithId,
Feedback,
FeedbackConfig,
FeedbackIngestToken,
Expand Down Expand Up @@ -2303,6 +2304,27 @@ export class Client {
return result;
}

public async updateExamples(update: ExampleUpdateWithId[]): Promise<object> {
const response = await this.caller.call(
fetch,
`${this.apiUrl}/examples/bulk`,
{
method: "PATCH",
headers: { ...this.headers, "Content-Type": "application/json" },
body: JSON.stringify(update),
signal: AbortSignal.timeout(this.timeout_ms),
...this.fetchOptions,
}
);
if (!response.ok) {
throw new Error(
`Failed to update examples: ${response.status} ${response.statusText}`
);
}
const result = await response.json();
return result;
}

public async listDatasetSplits({
datasetId,
datasetName,
Expand Down
2 changes: 1 addition & 1 deletion js/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ export type {
export { RunTree, type RunTreeConfig } from "./run_trees.js";

// Update using yarn bump-version
export const __version__ = "0.1.37";
export const __version__ = "0.1.38";
4 changes: 4 additions & 0 deletions js/src/schemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,10 @@ export interface ExampleUpdate {
metadata?: KVMap;
split?: string | string[];
}

export interface ExampleUpdateWithId extends ExampleUpdate {
id: string;
}
export interface BaseDataset {
name: string;
description: string;
Expand Down
36 changes: 35 additions & 1 deletion js/src/tests/client.int.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,45 @@ test.concurrent("Test LangSmith Client Dataset CRD", async () => {
const newExampleValue2 = await client.readExample(example.id);
expect(newExampleValue2.inputs.col1).toBe("updatedExampleCol3");
expect(newExampleValue2.metadata?.dataset_split).toStrictEqual(["my_split3"]);

const newExample = await client.createExample(
{ col1: "newAddedExampleCol1" },
{ col2: "newAddedExampleCol2" },
{ datasetId: newDataset.id }
);
const newExampleValue_ = await client.readExample(newExample.id);
expect(newExampleValue_.inputs.col1).toBe("newAddedExampleCol1");
expect(newExampleValue_.outputs?.col2).toBe("newAddedExampleCol2");

await client.updateExamples([
{
id: newExample.id,
inputs: { col1: "newUpdatedExampleCol1" },
outputs: { col2: "newUpdatedExampleCol2" },
metadata: { foo: "baz" },
},
{
id: example.id,
inputs: { col1: "newNewUpdatedExampleCol" },
outputs: { col2: "newNewUpdatedExampleCol2" },
metadata: { foo: "qux" },
},
]);
const updatedExample = await client.readExample(newExample.id);
expect(updatedExample.inputs.col1).toBe("newUpdatedExampleCol1");
expect(updatedExample.outputs?.col2).toBe("newUpdatedExampleCol2");
expect(updatedExample.metadata?.foo).toBe("baz");

const updatedExample2 = await client.readExample(example.id);
expect(updatedExample2.inputs.col1).toBe("newNewUpdatedExampleCol");
expect(updatedExample2.outputs?.col2).toBe("newNewUpdatedExampleCol2");
expect(updatedExample2.metadata?.foo).toBe("qux");

await client.deleteExample(example.id);
const examples2 = await toArray(
client.listExamples({ datasetId: newDataset.id })
);
expect(examples2.length).toBe(1);
expect(examples2.length).toBe(2);

await client.deleteDataset({ datasetId });
const rawDataset = await client.createDataset(fileName, {
Expand Down
66 changes: 66 additions & 0 deletions python/langsmith/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3338,6 +3338,72 @@ def update_example(
ls_utils.raise_for_status_with_text(response)
return response.json()

def update_examples(
self,
*,
example_ids: Sequence[ID_TYPE],
inputs: Optional[Sequence[Optional[Dict[str, Any]]]] = None,
outputs: Optional[Sequence[Optional[Mapping[str, Any]]]] = None,
metadata: Optional[Sequence[Optional[Dict]]] = None,
splits: Optional[Sequence[Optional[str | List[str]]]] = None,
dataset_ids: Optional[Sequence[Optional[ID_TYPE]]] = None,
) -> None:
"""Update multiple examples.
Parameters
----------
example_ids : Sequence[ID_TYPE]
The IDs of the examples to update.
inputs : Optional[Sequence[Optional[Dict[str, Any]]], default=None
The input values for the examples.
outputs : Optional[Sequence[Optional[Mapping[str, Any]]]], default=None
The output values for the examples.
metadata : Optional[Sequence[Optional[Mapping[str, Any]]]], default=None
The metadata for the examples.
split : Optional[Sequence[Optional[str | List[str]]]], default=None
The splits for the examples, which are divisions
of your dataset such as 'train', 'test', or 'validation'.
dataset_ids : Optional[Sequence[Optional[ID_TYPE]]], default=None
The IDs of the datasets to move the examples to.
Returns:
-------
None
"""
examples = [
{
"id": id_,
"inputs": in_,
"outputs": out_,
"dataset_id": dataset_id_,
"metadata": metadata_,
"split": split_,
}
for id_, in_, out_, metadata_, split_, dataset_id_ in zip(
example_ids,
inputs or [None] * len(example_ids),
outputs or [None] * len(example_ids),
metadata or [None] * len(example_ids),
splits or [None] * len(example_ids),
dataset_ids or [None] * len(example_ids),
)
]
response = self.request_with_retries(
"PATCH",
"/examples/bulk",
headers={**self._headers, "Content-Type": "application/json"},
data=(
_dumps_json(
[
{k: v for k, v in example.items() if v is not None}
for example in examples
]
)
),
)
ls_utils.raise_for_status_with_text(response)
return response.json()

def delete_example(self, example_id: ID_TYPE) -> None:
"""Delete an example by ID.
Expand Down
2 changes: 1 addition & 1 deletion python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "langsmith"
version = "0.1.90"
version = "0.1.91"
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
authors = ["LangChain <[email protected]>"]
license = "MIT"
Expand Down
36 changes: 35 additions & 1 deletion python/tests/integration_tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,45 @@ def test_datasets(langchain_client: Client) -> None:
assert updated_example_value.outputs["col2"] == "updatedExampleCol2"
assert (updated_example_value.metadata or {}).get("foo") == "bar"

new_example = langchain_client.create_example(
inputs={"col1": "newAddedExampleCol1"},
outputs={"col2": "newAddedExampleCol2"},
dataset_id=new_dataset.id,
)
example_value = langchain_client.read_example(new_example.id)
assert example_value.inputs is not None
assert example_value.inputs["col1"] == "newAddedExampleCol1"
assert example_value.outputs is not None
assert example_value.outputs["col2"] == "newAddedExampleCol2"

langchain_client.update_examples(
example_ids=[new_example.id, example.id],
inputs=[{"col1": "newUpdatedExampleCol1"}, {"col1": "newNewUpdatedExampleCol"}],
outputs=[
{"col2": "newUpdatedExampleCol2"},
{"col2": "newNewUpdatedExampleCol2"},
],
metadata=[{"foo": "baz"}, {"foo": "qux"}],
)
updated_example = langchain_client.read_example(new_example.id)
assert updated_example.id == new_example.id
assert updated_example.inputs["col1"] == "newUpdatedExampleCol1"
assert updated_example.outputs is not None
assert updated_example.outputs["col2"] == "newUpdatedExampleCol2"
assert (updated_example.metadata or {}).get("foo") == "baz"

updated_example = langchain_client.read_example(example.id)
assert updated_example.id == example.id
assert updated_example.inputs["col1"] == "newNewUpdatedExampleCol"
assert updated_example.outputs is not None
assert updated_example.outputs["col2"] == "newNewUpdatedExampleCol2"
assert (updated_example.metadata or {}).get("foo") == "qux"

langchain_client.delete_example(example.id)
examples2 = list(
langchain_client.list_examples(dataset_id=new_dataset.id) # type: ignore
)
assert len(examples2) == 1
assert len(examples2) == 2
langchain_client.delete_dataset(dataset_id=dataset_id)


Expand Down

0 comments on commit fa06b91

Please sign in to comment.