Skip to content

Commit

Permalink
Clone dataset schema (#1278)
Browse files Browse the repository at this point in the history
  • Loading branch information
hinthornw authored Dec 5, 2024
1 parent 185a926 commit 59a8c09
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 5 deletions.
27 changes: 22 additions & 5 deletions python/langsmith/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2324,9 +2324,10 @@ def read_shared_dataset(
share_token: str,
) -> ls_schemas.Dataset:
"""Get shared datasets."""
_, token_uuid = _parse_token_or_url(share_token, self.api_url)
response = self.request_with_retries(
"GET",
f"/public/{_as_uuid(share_token, 'share_token')}/datasets",
f"/public/{token_uuid}/datasets",
headers=self._headers,
)
ls_utils.raise_for_status_with_text(response)
Expand Down Expand Up @@ -2794,6 +2795,7 @@ def create_dataset(
data_type: ls_schemas.DataType = ls_schemas.DataType.kv,
inputs_schema: Optional[Dict[str, Any]] = None,
outputs_schema: Optional[Dict[str, Any]] = None,
transformations: Optional[List[ls_schemas.DatasetTransformation]] = None,
metadata: Optional[dict] = None,
) -> ls_schemas.Dataset:
"""Create a dataset in the LangSmith API.
Expand All @@ -2802,22 +2804,34 @@ def create_dataset(
----------
dataset_name : str
The name of the dataset.
description : str or None, default=None
description : Optional[str], default=None
The description of the dataset.
data_type : DataType or None, default=DataType.kv
data_type : ls_schemas.DataType, default=ls_schemas.DataType.kv
The data type of the dataset.
metadata: dict or None, default=None
inputs_schema : Optional[Dict[str, Any]], default=None
The schema definition for the inputs of the dataset.
outputs_schema : Optional[Dict[str, Any]], default=None
The schema definition for the outputs of the dataset.
transformations : Optional[List[ls_schemas.DatasetTransformation]], default=None
A list of transformations to apply to the dataset.
metadata : Optional[dict], default=None
Additional metadata to associate with the dataset.
Returns:
-------
Dataset
ls_schemas.Dataset
The created dataset.
Raises:
------
requests.HTTPError
If the request to create the dataset fails.
"""
dataset: Dict[str, Any] = {
"name": dataset_name,
"data_type": data_type.value,
"created_at": datetime.datetime.now().isoformat(),
"transformations": transformations,
"extra": {"metadata": metadata} if metadata else None,
}
if description is not None:
Expand Down Expand Up @@ -3280,6 +3294,9 @@ def clone_public_dataset(
dataset_name=dataset_name,
description=ds.description,
data_type=ds.data_type or ls_schemas.DataType.kv,
inputs_schema=ds.inputs_schema,
outputs_schema=ds.outputs_schema,
transformations=ds.transformations,
)
try:
self.create_examples(
Expand Down
17 changes: 17 additions & 0 deletions python/langsmith/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,22 @@ class Config:
frozen = True


DatasetTransformationType = Literal[
"remove_system_messages",
"convert_to_openai_message",
"convert_to_openai_tool",
"remove_extra_fields",
"extract_tools_from_run",
]


class DatasetTransformation(TypedDict, total=False):
"""Schema for dataset transformations."""

path: List[str]
transformation_type: Union[DatasetTransformationType, str]


class Dataset(DatasetBase):
"""Dataset ORM model."""

Expand All @@ -182,6 +198,7 @@ class Dataset(DatasetBase):
last_session_start_time: Optional[datetime] = None
inputs_schema: Optional[Dict[str, Any]] = None
outputs_schema: Optional[Dict[str, Any]] = None
transformations: Optional[List[DatasetTransformation]] = None
_host_url: Optional[str] = PrivateAttr(default=None)
_tenant_id: Optional[UUID] = PrivateAttr(default=None)
_public_path: Optional[str] = PrivateAttr(default=None)
Expand Down

0 comments on commit 59a8c09

Please sign in to comment.