diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index 03a8f5e37..3c9b15df0 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -172,15 +172,11 @@ def _parse_token_or_url(
def _is_langchain_hosted(url: str) -> bool:
"""Check if the URL is langchain hosted.
- Parameters
- ----------
- url : str
- The URL to check.
+ Args:
+ url (str): The URL to check.
Returns:
- -------
- bool
- True if the URL is langchain hosted, False otherwise.
+ bool: True if the URL is langchain hosted, False otherwise.
"""
try:
netloc = urllib_parse.urlsplit(url).netloc.split(":")[0]
@@ -201,9 +197,7 @@ def _default_retry_config() -> Retry:
If urllib3 version is 1.26 or greater, retry on all methods.
Returns:
- -------
- Retry
- The default retry configuration.
+ Retry: The default retry configuration.
"""
retry_params = dict(
total=3,
@@ -231,10 +225,8 @@ def _default_retry_config() -> Retry:
def close_session(session: requests.Session) -> None:
"""Close the session.
- Parameters
- ----------
- session : Session
- The session to close.
+ Args:
+ session (requests.Session): The session to close.
"""
logger.debug("Closing Client.session")
session.close()
@@ -243,17 +235,15 @@ def close_session(session: requests.Session) -> None:
def _validate_api_key_if_hosted(api_url: str, api_key: Optional[str]) -> None:
"""Verify API key is provided if url not localhost.
- Parameters
- ----------
- api_url : str
- The API URL.
- api_key : str or None
- The API key.
+ Args:
+ api_url (str): The API URL.
+ api_key (Optional[str]): The API key.
+
+ Returns:
+ None
Raises:
- ------
- LangSmithUserError
- If the API key is not provided when using the hosted service.
+ LangSmithUserError: If the API key is not provided when using the hosted service.
"""
# If the domain is langchain.com, raise error if no api_key
if not api_key:
@@ -268,9 +258,7 @@ def _get_tracing_sampling_rate() -> float | None:
"""Get the tracing sampling rate.
Returns:
- -------
- float
- The tracing sampling rate.
+ Optional[float]: The tracing sampling rate.
"""
sampling_rate_str = ls_utils.get_env_var("TRACING_SAMPLING_RATE")
if sampling_rate_str is None:
@@ -410,49 +398,38 @@ def __init__(
) -> None:
"""Initialize a Client instance.
- Parameters
- ----------
- api_url : str or None, default=None
- URL for the LangSmith API. Defaults to the LANGCHAIN_ENDPOINT
- environment variable or https://api.smith.langchain.com if not set.
- api_key : str or None, default=None
- API key for the LangSmith API. Defaults to the LANGCHAIN_API_KEY
- environment variable.
- retry_config : Retry or None, default=None
- Retry configuration for the HTTPAdapter.
- timeout_ms : int, tuple[int, int], or None, default=None
- Timeout for the HTTPAdapter. Can also be a 2-tuple of
- (connect timeout, read timeout) to set them separately.
- web_url : str or None, default=None
- URL for the LangSmith web app. Default is auto-inferred from
- the ENDPOINT.
- session: requests.Session or None, default=None
- The session to use for requests. If None, a new session will be
- created.
- anonymizer : Optional[Callable[[dict], dict]]
- A function applied for masking serialized run inputs and outputs,
- before sending to the API.
- hide_inputs: Whether to hide run inputs when tracing with this client.
- If True, hides the entire inputs. If a function, applied to
- all run inputs when creating runs.
- hide_outputs: Whether to hide run outputs when tracing with this client.
- If True, hides the entire outputs. If a function, applied to
- all run outputs when creating runs.
- info: Optional[ls_schemas.LangSmithInfo]
- The information about the LangSmith API. If not provided, it will
- be fetched from the API.
- api_urls: Optional[Dict[str, str]]
- A dictionary of write API URLs and their corresponding API keys.
- Useful for multi-tenant setups. Data is only read from the first
- URL in the dictionary. However, ONLY Runs are written (POST and PATCH)
- to all URLs in the dictionary. Feedback, sessions, datasets, examples,
- annotation queues and evaluation results are only written to the first.
+ Args:
+ api_url (Optional[str]): URL for the LangSmith API. Defaults to the LANGCHAIN_ENDPOINT
+ environment variable or https://api.smith.langchain.com if not set.
+ api_key (Optional[str]): API key for the LangSmith API. Defaults to the LANGCHAIN_API_KEY
+ environment variable.
+ retry_config (Optional[Retry]): Retry configuration for the HTTPAdapter.
+ timeout_ms (Optional[Union[int, Tuple[int, int]]]): Timeout for the HTTPAdapter. Can also be a 2-tuple of
+ (connect timeout, read timeout) to set them separately.
+ web_url (Optional[str]): URL for the LangSmith web app. Default is auto-inferred from
+ the ENDPOINT.
+ session (Optional[requests.Session]): The session to use for requests. If None, a new session will be
+ created.
+ auto_batch_tracing (bool, default=True): Whether to automatically batch tracing.
+ anonymizer (Optional[Callable[[dict], dict]]): A function applied for masking serialized run inputs and outputs,
+ before sending to the API.
+ hide_inputs (Optional[Union[Callable[[dict], dict], bool]]): Whether to hide run inputs when tracing with this client.
+ If True, hides the entire inputs. If a function, applied to
+ all run inputs when creating runs.
+ hide_outputs (Optional[Union[Callable[[dict], dict], bool]]): Whether to hide run outputs when tracing with this client.
+ If True, hides the entire outputs. If a function, applied to
+ all run outputs when creating runs.
+ info (Optional[ls_schemas.LangSmithInfo]): The information about the LangSmith API.
+ If not provided, it will be fetched from the API.
+ api_urls (Optional[Dict[str, str]]): A dictionary of write API URLs and their corresponding API keys.
+ Useful for multi-tenant setups. Data is only read from the first
+ URL in the dictionary. However, ONLY Runs are written (POST and PATCH)
+ to all URLs in the dictionary. Feedback, sessions, datasets, examples,
+ annotation queues and evaluation results are only written to the first.
Raises:
- ------
- LangSmithUserError
- If the API key is not provided when using the hosted service.
- If both api_url and api_urls are provided.
+ LangSmithUserError: If the API key is not provided when using the hosted service.
+ If both api_url and api_urls are provided.
"""
if api_url and api_urls:
raise ls_utils.LangSmithUserError(
@@ -581,9 +558,7 @@ def _repr_html_(self) -> str:
"""Return an HTML representation of the instance with a link to the URL.
Returns:
- -------
- str
- The HTML representation of the instance.
+ str: The HTML representation of the instance.
"""
link = self._host_url
return f'LangSmith Client'
@@ -592,9 +567,7 @@ def __repr__(self) -> str:
"""Return a string representation of the instance with a link to the URL.
Returns:
- -------
- str
- The string representation of the instance.
+ str: The string representation of the instance.
"""
return f"Client (API URL: {self.api_url})"
@@ -612,9 +585,7 @@ def _headers(self) -> Dict[str, str]:
"""Get the headers for the API request.
Returns:
- -------
- Dict[str, str]
- The headers for the API request.
+ Dict[str, str]: The headers for the API request.
"""
headers = {
"User-Agent": f"langsmith-py/{langsmith.__version__}",
@@ -629,9 +600,7 @@ def info(self) -> ls_schemas.LangSmithInfo:
"""Get the information about the LangSmith API.
Returns:
- -------
- Optional[ls_schemas.LangSmithInfo]
- The information about the LangSmith API, or None if the API is
+ ls_schemas.LangSmithInfo: The information about the LangSmith API, or None if the API is
not available.
"""
if self._info is None:
@@ -697,42 +666,26 @@ def request_with_retries(
) -> requests.Response:
"""Send a request with retries.
- Parameters
- ----------
- request_method : str
- The HTTP request method.
- pathname : str
- The pathname of the request URL. Will be appended to the API URL.
- request_kwargs : Mapping
- Additional request parameters.
- stop_after_attempt : int, default=1
- The number of attempts to make.
- retry_on : Sequence[Type[BaseException]] or None, default=None
- The exceptions to retry on. In addition to:
- [LangSmithConnectionError, LangSmithAPIError].
- to_ignore : Sequence[Type[BaseException]] or None, default=None
- The exceptions to ignore / pass on.
- handle_response : Callable[[requests.Response, int], Any] or None, default=None
- A function to handle the response and return whether to continue
- retrying.
- **kwargs : Any
- Additional keyword arguments to pass to the request.
+ Args:
+ method (str): The HTTP request method.
+ pathname (str): The pathname of the request URL. Will be appended to the API URL.
+ request_kwargs (Mapping): Additional request parameters.
+ stop_after_attempt (int, default=1): The number of attempts to make.
+ retry_on (Optional[Sequence[Type[BaseException]]]): The exceptions to retry on. In addition to:
+ [LangSmithConnectionError, LangSmithAPIError].
+ to_ignore (Optional[Sequence[Type[BaseException]]]): The exceptions to ignore / pass on.
+ handle_response (Optional[Callable[[requests.Response, int], Any]]): A function to handle the response and return whether to continue retrying.
+ _context (str, default=""): The context of the request.
+ **kwargs (Any): Additional keyword arguments to pass to the request.
Returns:
- -------
- Response
- The response object.
+ requests.Response: The response object.
Raises:
- ------
- LangSmithAPIError
- If a server error occurs.
- LangSmithUserError
- If the request fails.
- LangSmithConnectionError
- If a connection error occurs.
- LangSmithError
- If the request fails.
+ LangSmithAPIError: If a server error occurs.
+ LangSmithUserError: If the request fails.
+ LangSmithConnectionError: If a connection error occurs.
+ LangSmithError: If the request fails.
"""
request_kwargs = request_kwargs or {}
request_kwargs = {
@@ -925,16 +878,11 @@ def _get_paginated_list(
) -> Iterator[dict]:
"""Get a paginated list of items.
- Parameters
- ----------
- path : str
- The path of the request URL.
- params : dict or None, default=None
- The query parameters.
+ Args:
+ path (str): The path of the request URL.
+ params (Optional[dict]): The query parameters.
Yields:
- ------
- dict
The items in the paginated list.
"""
params_ = params.copy() if params else {}
@@ -967,19 +915,13 @@ def _get_cursor_paginated_list(
) -> Iterator[dict]:
"""Get a cursor paginated list of items.
- Parameters
- ----------
- path : str
- The path of the request URL.
- body : dict or None, default=None
- The query body.
- request_method : str, default="post"
- The HTTP request method.
- data_key : str, default="runs"
+ Args:
+ path (str): The path of the request URL.
+ body (Optional[dict]): The query body.
+ request_method (Literal["GET", "POST"], default="POST"): The HTTP request method.
+ data_key (str, default="runs"): The key in the response body that contains the items.
Yields:
- ------
- dict
The items in the paginated list.
"""
params_ = body.copy() if body else {}
@@ -1016,30 +958,40 @@ def upload_dataframe(
) -> ls_schemas.Dataset:
"""Upload a dataframe as individual examples to the LangSmith API.
- Parameters
- ----------
- df : pd.DataFrame
- The dataframe to upload.
- name : str
- The name of the dataset.
- input_keys : Sequence[str]
- The input keys.
- output_keys : Sequence[str]
- The output keys.
- description : str or None, default=None
- The description of the dataset.
- data_type : DataType or None, default=DataType.kv
- The data type of the dataset.
+ Args:
+ df (pd.DataFrame): The dataframe to upload.
+ name (str): The name of the dataset.
+ input_keys (Sequence[str]): The input keys.
+ output_keys (Sequence[str]): The output keys.
+ description (Optional[str]): The description of the dataset.
+ data_type (Optional[DataType]): The data type of the dataset.
Returns:
- -------
- Dataset
- The uploaded dataset.
+ Dataset: The uploaded dataset.
Raises:
- ------
- ValueError
- If the csv_file is not a string or tuple.
+ ValueError: If the csv_file is not a string or tuple.
+
+ Examples:
+ .. code-block:: python
+ from langsmith import Client
+ import os
+ import pandas as pd
+
+ client = Client()
+
+ df = pd.read_parquet('path/to/your/myfile.parquet')
+ input_keys = ['column1', 'column2'] # replace with your input column names
+ output_keys = ['output1', 'output2'] # replace with your output column names
+
+ dataset = client.upload_dataframe(
+ df=df,
+ input_keys=input_keys,
+ output_keys=output_keys,
+ name="My Parquet Dataset",
+ description="Dataset created from a parquet file",
+ data_type="kv" # The default
+ )
"""
csv_file = io.BytesIO()
df.to_csv(csv_file, index=False)
@@ -1065,32 +1017,41 @@ def upload_csv(
) -> ls_schemas.Dataset:
"""Upload a CSV file to the LangSmith API.
- Parameters
- ----------
- csv_file : str or Tuple[str, BytesIO]
- The CSV file to upload. If a string, it should be the path
- If a tuple, it should be a tuple containing the filename
- and a BytesIO object.
- input_keys : Sequence[str]
- The input keys.
- output_keys : Sequence[str]
- The output keys.
- name : str or None, default=None
- The name of the dataset.
- description : str or None, default=None
- The description of the dataset.
- data_type : DataType or None, default=DataType.kv
- The data type of the dataset.
+ Args:
+ csv_file (Union[str, Tuple[str, io.BytesIO]]): The CSV file to upload. If a string, it should be the path
+ If a tuple, it should be a tuple containing the filename
+ and a BytesIO object.
+ input_keys (Sequence[str]): The input keys.
+ output_keys (Sequence[str]): The output keys.
+ name (Optional[str]): The name of the dataset.
+ description (Optional[str]): The description of the dataset.
+ data_type (Optional[ls_schemas.DataType]): The data type of the dataset.
Returns:
- -------
- Dataset
- The uploaded dataset.
+ Dataset: The uploaded dataset.
Raises:
- ------
- ValueError
- If the csv_file is not a string or tuple.
+ ValueError: If the csv_file is not a string or tuple.
+
+ Examples:
+ .. code-block:: python
+ from langsmith import Client
+ import os
+
+ client = Client()
+
+ csv_file = 'path/to/your/myfile.csv'
+ input_keys = ['column1', 'column2'] # replace with your input column names
+ output_keys = ['output1', 'output2'] # replace with your output column names
+
+ dataset = client.upload_csv(
+ csv_file=csv_file,
+ input_keys=input_keys,
+ output_keys=output_keys,
+ name="My CSV Dataset",
+ description="Dataset created from a CSV file",
+ data_type="kv" # The default
+ )
"""
data = {
"input_keys": input_keys,
@@ -1144,8 +1105,8 @@ def _run_transform(
Args:
run (Union[ls_schemas.Run, dict]): The run object to transform.
- update (bool, optional): Whether the payload is for an "update" event.
- copy (bool, optional): Whether to deepcopy run inputs/outputs.
+ update (Optional[bool]): Whether the payload is for an "update" event.
+ copy (Optional[bool]): Whether to deepcopy run inputs/outputs.
Returns:
dict: The transformed run object as a dictionary.
@@ -1242,24 +1203,42 @@ def create_run(
) -> None:
"""Persist a run to the LangSmith API.
- Parameters
- ----------
- name : str
- The name of the run.
- inputs : Dict[str, Any]
- The input values for the run.
- run_type : str
- The type of the run, such as tool, chain, llm, retriever,
- embedding, prompt, or parser.
- revision_id : ID_TYPE or None, default=None
- The revision ID of the run.
- **kwargs : Any
- Additional keyword arguments.
+ Args:
+ name (str): The name of the run.
+ inputs (Dict[str, Any]): The input values for the run.
+ run_type (str): The type of the run, such as tool, chain, llm, retriever,
+ embedding, prompt, or parser.
+ project_name (Optional[str]): The project name of the run.
+ revision_id (Optional[Union[UUID, str]]): The revision ID of the run.
+ **kwargs (Any): Additional keyword arguments.
+
+ Returns:
+ None
Raises:
- ------
- LangSmithUserError
- If the API key is not provided when using the hosted service.
+ LangSmithUserError: If the API key is not provided when using the hosted service.
+
+ Examples:
+ .. code-block:: python
+ from langsmith import Client
+ import datetime
+ from uuid import uuid4
+
+ client = Client()
+
+ run_id = uuid4()
+ client.create_run(
+ id=run_id,
+ project_name=project_name,
+ name="test_run",
+ run_type="llm",
+ inputs={"prompt": "hello world"},
+ outputs={"generation": "hi there"},
+ start_time=datetime.datetime.now(datetime.timezone.utc),
+ end_time=datetime.datetime.now(datetime.timezone.utc),
+ hide_inputs=True,
+ hide_outputs=True,
+ )
"""
project_name = project_name or kwargs.pop(
"session_name",
@@ -1418,25 +1397,89 @@ def batch_ingest_runs(
"""Batch ingest/upsert multiple runs in the Langsmith system.
Args:
- create (Optional[Sequence[Union[ls_schemas.Run, RunLikeDict]]]):
+ create (Optional[Sequence[Union[Run, RunLikeDict]]]):
A sequence of `Run` objects or equivalent dictionaries representing
runs to be created / posted.
- update (Optional[Sequence[Union[ls_schemas.Run, RunLikeDict]]]):
+ update (Optional[Sequence[Union[Run, RunLikeDict]]]):
A sequence of `Run` objects or equivalent dictionaries representing
runs that have already been created and should be updated / patched.
- pre_sampled (bool, optional): Whether the runs have already been subject
+ pre_sampled (bool, default=False): Whether the runs have already been subject
to sampling, and therefore should not be sampled again.
Defaults to False.
- Returns:
- None
-
Raises:
LangsmithAPIError: If there is an error in the API request.
+ Returns:
+ None
+
Note:
- The run objects MUST contain the dotted_order and trace_id fields
to be accepted by the API.
+
+ Examples:
+ .. code-block:: python
+ from langsmith import Client
+ import datetime
+ from uuid import uuid4
+
+ client = Client()
+ _session = "__test_batch_ingest_runs"
+ trace_id = uuid4()
+ trace_id_2 = uuid4()
+ run_id_2 = uuid4()
+ current_time = datetime.datetime.now(datetime.timezone.utc).strftime(
+ "%Y%m%dT%H%M%S%fZ"
+ )
+ later_time = (
+ datetime.datetime.now(datetime.timezone.utc) + timedelta(seconds=1)
+ ).strftime("%Y%m%dT%H%M%S%fZ")
+
+ runs_to_create = [
+ {
+ "id": str(trace_id),
+ "session_name": _session,
+ "name": "run 1",
+ "run_type": "chain",
+ "dotted_order": f"{current_time}{str(trace_id)}",
+ "trace_id": str(trace_id),
+ "inputs": {"input1": 1, "input2": 2},
+ "outputs": {"output1": 3, "output2": 4},
+ },
+ {
+ "id": str(trace_id_2),
+ "session_name": _session,
+ "name": "run 3",
+ "run_type": "chain",
+ "dotted_order": f"{current_time}{str(trace_id_2)}",
+ "trace_id": str(trace_id_2),
+ "inputs": {"input1": 1, "input2": 2},
+ "error": "error",
+ },
+ {
+ "id": str(run_id_2),
+ "session_name": _session,
+ "name": "run 2",
+ "run_type": "chain",
+ "dotted_order": f"{current_time}{str(trace_id)}."
+ f"{later_time}{str(run_id_2)}",
+ "trace_id": str(trace_id),
+ "parent_run_id": str(trace_id),
+ "inputs": {"input1": 5, "input2": 6},
+ },
+ ]
+ runs_to_update = [
+ {
+ "id": str(run_id_2),
+ "dotted_order": f"{current_time}{str(trace_id)}."
+ f"{later_time}{str(run_id_2)}",
+ "trace_id": str(trace_id),
+ "parent_run_id": str(trace_id),
+ "outputs": {"output1": 4, "output2": 5},
+ },
+ ]
+
+ client.batch_ingest_runs(create=runs_to_create, update=runs_to_update)
"""
if not create and not update:
return
@@ -1547,19 +1590,83 @@ def multipart_ingest(
update (Optional[Sequence[Union[ls_schemas.Run, RunLikeDict]]]):
A sequence of `Run` objects or equivalent dictionaries representing
runs that have already been created and should be updated / patched.
- pre_sampled (bool, optional): Whether the runs have already been subject
+ pre_sampled (bool, default=False): Whether the runs have already been subject
to sampling, and therefore should not be sampled again.
Defaults to False.
- Returns:
- None
-
Raises:
LangsmithAPIError: If there is an error in the API request.
+
+ Returns:
+ None
Note:
- The run objects MUST contain the dotted_order and trace_id fields
to be accepted by the API.
+
+ Examples:
+ .. code-block:: python
+ from langsmith import Client
+ import datetime
+ from uuid import uuid4
+
+ client = Client()
+ _session = "__test_batch_ingest_runs"
+ trace_id = uuid4()
+ trace_id_2 = uuid4()
+ run_id_2 = uuid4()
+ current_time = datetime.datetime.now(datetime.timezone.utc).strftime(
+ "%Y%m%dT%H%M%S%fZ"
+ )
+ later_time = (
+ datetime.datetime.now(datetime.timezone.utc) + timedelta(seconds=1)
+ ).strftime("%Y%m%dT%H%M%S%fZ")
+
+ runs_to_create = [
+ {
+ "id": str(trace_id),
+ "session_name": _session,
+ "name": "run 1",
+ "run_type": "chain",
+ "dotted_order": f"{current_time}{str(trace_id)}",
+ "trace_id": str(trace_id),
+ "inputs": {"input1": 1, "input2": 2},
+ "outputs": {"output1": 3, "output2": 4},
+ },
+ {
+ "id": str(trace_id_2),
+ "session_name": _session,
+ "name": "run 3",
+ "run_type": "chain",
+ "dotted_order": f"{current_time}{str(trace_id_2)}",
+ "trace_id": str(trace_id_2),
+ "inputs": {"input1": 1, "input2": 2},
+ "error": "error",
+ },
+ {
+ "id": str(run_id_2),
+ "session_name": _session,
+ "name": "run 2",
+ "run_type": "chain",
+ "dotted_order": f"{current_time}{str(trace_id)}."
+ f"{later_time}{str(run_id_2)}",
+ "trace_id": str(trace_id),
+ "parent_run_id": str(trace_id),
+ "inputs": {"input1": 5, "input2": 6},
+ },
+ ]
+ runs_to_update = [
+ {
+ "id": str(run_id_2),
+ "dotted_order": f"{current_time}{str(trace_id)}."
+ f"{later_time}{str(run_id_2)}",
+ "trace_id": str(trace_id),
+ "parent_run_id": str(trace_id),
+ "outputs": {"output1": 4, "output2": 5},
+ },
+ ]
+
+ client.multipart_ingest(create=runs_to_create, update=runs_to_update)
"""
if not (create or update):
return
@@ -1688,31 +1795,52 @@ def update_run(
) -> None:
"""Update a run in the LangSmith API.
- Parameters
- ----------
- run_id : str or UUID
- The ID of the run to update.
- name : str or None, default=None
- The name of the run.
- end_time : datetime or None
- The end time of the run.
- error : str or None, default=None
- The error message of the run.
- inputs : Dict or None, default=None
- The input values for the run.
- outputs : Dict or None, default=None
- The output values for the run.
- events : Sequence[dict] or None, default=None
- The events for the run.
- extra : Dict or None, default=None
- The extra information for the run.
- tags : List[str] or None, default=None
- The tags for the run.
- attachments: dict[str, ls_schemas.Attachment] or None, default=None
- A dictionary of attachments to add to the run. The keys are the attachment names,
- and the values are Attachment objects containing the data and mime type.
- **kwargs : Any
- Kwargs are ignored.
+ Args:
+ run_id (Union[UUID, str]): The ID of the run to update.
+ name (Optional[str]): The name of the run.
+ end_time (Optional[datetime.datetime]): The end time of the run.
+ error (Optional[str]): The error message of the run.
+ inputs (Optional[Dict]): The input values for the run.
+ outputs (Optional[Dict]): The output values for the run.
+ events (Optional[Sequence[dict]]): The events for the run.
+ extra (Optional[Dict]): The extra information for the run.
+ tags (Optional[List[str]]): The tags for the run.
+ attachments (Optional[Dict[str, Attachment]]): A dictionary of attachments to add to the run. The keys are the attachment names,
+ and the values are Attachment objects containing the data and mime type.
+ **kwargs (Any): Kwargs are ignored.
+
+ Returns:
+ None
+
+ Examples:
+ .. code-block:: python
+ from langsmith import Client
+ import datetime
+ from uuid import uuid4
+
+ client = Client()
+ project_name = "__test_update_run"
+
+ start_time = datetime.datetime.now()
+ revision_id = uuid4()
+ run: dict = dict(
+ id=uuid4(),
+ name="test_run",
+ run_type="llm",
+ inputs={"text": "hello world"},
+ project_name=project_name,
+ api_url=os.getenv("LANGCHAIN_ENDPOINT"),
+ start_time=start_time,
+ extra={"extra": "extra"},
+ revision_id=revision_id,
+ )
+ # Create the run
+ client.create_run(**run)
+ run["outputs"] = {"output": ["Hi"]}
+ run["extra"]["foo"] = "bar"
+ run["name"] = "test_run_updated"
+ # Update the run
+ client.update_run(run["id"], **run)
"""
data: Dict[str, Any] = {
"id": _as_uuid(run_id, "run_id"),
@@ -1782,20 +1910,14 @@ def _update_run(self, run_update: dict) -> None:
def _load_child_runs(self, run: ls_schemas.Run) -> ls_schemas.Run:
"""Load child runs for a given run.
- Parameters
- ----------
- run : Run
- The run to load child runs for.
+ Args:
+ run (Run): The run to load child runs for.
Returns:
- -------
- Run
- The run with loaded child runs.
+ Run: The run with loaded child runs.
Raises:
- ------
- LangSmithError
- If a child run has no parent.
+ LangSmithError: If a child run has no parent.
"""
child_runs = self.list_runs(id=run.child_run_ids)
treemap: DefaultDict[uuid.UUID, List[ls_schemas.Run]] = collections.defaultdict(
@@ -1820,17 +1942,24 @@ def read_run(
) -> ls_schemas.Run:
"""Read a run from the LangSmith API.
- Parameters
- ----------
- run_id : str or UUID
- The ID of the run to read.
- load_child_runs : bool, default=False
- Whether to load nested child runs.
+ Args:
+ run_id (Union[UUID, str]):
+ The ID of the run to read.
+ load_child_runs (bool, default=False):
+ Whether to load nested child runs.
Returns:
- -------
- Run
- The run.
+ Run: The run read from the LangSmith API.
+
+ Examples:
+ .. code-block:: python
+ from langsmith import Client
+
+ # Existing run
+ run_id = "your-run-id"
+
+ client = Client()
+ stored_run = client.read_run(run_id)
"""
response = self.request_with_retries(
"GET", f"/runs/{_as_uuid(run_id, 'run_id')}"
@@ -1863,108 +1992,93 @@ def list_runs(
) -> Iterator[ls_schemas.Run]:
"""List runs from the LangSmith API.
- Parameters
- ----------
- project_id : UUID or None, default=None
- The ID(s) of the project to filter by.
- project_name : str or None, default=None
- The name(s) of the project to filter by.
- run_type : str or None, default=None
- The type of the runs to filter by.
- trace_id : UUID or None, default=None
- The ID of the trace to filter by.
- reference_example_id : UUID or None, default=None
- The ID of the reference example to filter by.
- query : str or None, default=None
- The query string to filter by.
- filter : str or None, default=None
- The filter string to filter by.
- trace_filter : str or None, default=None
- Filter to apply to the ROOT run in the trace tree. This is meant to
- be used in conjunction with the regular `filter` parameter to let you
- filter runs by attributes of the root run within a trace.
- tree_filter : str or None, default=None
- Filter to apply to OTHER runs in the trace tree, including
- sibling and child runs. This is meant to be used in conjunction with
- the regular `filter` parameter to let you filter runs by attributes
- of any run within a trace.
- is_root : bool or None, default=None
- Whether to filter by root runs.
- parent_run_id : UUID or None, default=None
- The ID of the parent run to filter by.
- start_time : datetime or None, default=None
- The start time to filter by.
- error : bool or None, default=None
- Whether to filter by error status.
- run_ids : List[str or UUID] or None, default=None
- The IDs of the runs to filter by.
- limit : int or None, default=None
- The maximum number of runs to return.
- **kwargs : Any
- Additional keyword arguments.
+ Args:
+ project_id (Optional[Union[UUID, str], Sequence[Union[UUID, str]]]):
+ The ID(s) of the project to filter by.
+ project_name (Optional[Union[str, Sequence[str]]]): The name(s) of the project to filter by.
+ run_type (Optional[str]): The type of the runs to filter by.
+ trace_id (Optional[Union[UUID, str]]): The ID of the trace to filter by.
+ reference_example_id (Optional[Union[UUID, str]]): The ID of the reference example to filter by.
+ query (Optional[str]): The query string to filter by.
+ filter (Optional[str]): The filter string to filter by.
+ trace_filter (Optional[str]): Filter to apply to the ROOT run in the trace tree. This is meant to
+ be used in conjunction with the regular `filter` parameter to let you
+ filter runs by attributes of the root run within a trace.
+ tree_filter (Optional[str]): Filter to apply to OTHER runs in the trace tree, including
+ sibling and child runs. This is meant to be used in conjunction with
+ the regular `filter` parameter to let you filter runs by attributes
+ of any run within a trace.
+ is_root (Optional[bool]): Whether to filter by root runs.
+ parent_run_id (Optional[Union[UUID, str]]):
+ The ID of the parent run to filter by.
+ start_time (Optional[datetime.datetime]):
+ The start time to filter by.
+ error (Optional[bool]): Whether to filter by error status.
+ run_ids (Optional[Sequence[Union[UUID, str]]]):
+ The IDs of the runs to filter by.
+ select (Optional[Sequence[str]]): The fields to select.
+ limit (Optional[int]): The maximum number of runs to return.
+ **kwargs (Any): Additional keyword arguments.
Yields:
- ------
- Run
The runs.
Examples:
- --------
- .. code-block:: python
+ .. code-block:: python
- # List all runs in a project
- project_runs = client.list_runs(project_name="")
+ # List all runs in a project
+ project_runs = client.list_runs(project_name="")
- # List LLM and Chat runs in the last 24 hours
- todays_llm_runs = client.list_runs(
- project_name="",
- start_time=datetime.now() - timedelta(days=1),
- run_type="llm",
- )
+ # List LLM and Chat runs in the last 24 hours
+ todays_llm_runs = client.list_runs(
+ project_name="",
+ start_time=datetime.now() - timedelta(days=1),
+ run_type="llm",
+ )
- # List root traces in a project
- root_runs = client.list_runs(project_name="", is_root=1)
+ # List root traces in a project
+ root_runs = client.list_runs(project_name="", is_root=1)
- # List runs without errors
- correct_runs = client.list_runs(project_name="", error=False)
+ # List runs without errors
+ correct_runs = client.list_runs(project_name="", error=False)
- # List runs and only return their inputs/outputs (to speed up the query)
- input_output_runs = client.list_runs(
- project_name="", select=["inputs", "outputs"]
- )
+ # List runs and only return their inputs/outputs (to speed up the query)
+ input_output_runs = client.list_runs(
+ project_name="", select=["inputs", "outputs"]
+ )
- # List runs by run ID
- run_ids = [
- "a36092d2-4ad5-4fb4-9c0d-0dba9a2ed836",
- "9398e6be-964f-4aa4-8ae9-ad78cd4b7074",
- ]
- selected_runs = client.list_runs(id=run_ids)
+ # List runs by run ID
+ run_ids = [
+ "a36092d2-4ad5-4fb4-9c0d-0dba9a2ed836",
+ "9398e6be-964f-4aa4-8ae9-ad78cd4b7074",
+ ]
+ selected_runs = client.list_runs(id=run_ids)
- # List all "chain" type runs that took more than 10 seconds and had
- # `total_tokens` greater than 5000
- chain_runs = client.list_runs(
- project_name="",
- filter='and(eq(run_type, "chain"), gt(latency, 10), gt(total_tokens, 5000))',
- )
+ # List all "chain" type runs that took more than 10 seconds and had
+ # `total_tokens` greater than 5000
+ chain_runs = client.list_runs(
+ project_name="",
+ filter='and(eq(run_type, "chain"), gt(latency, 10), gt(total_tokens, 5000))',
+ )
- # List all runs called "extractor" whose root of the trace was assigned feedback "user_score" score of 1
- good_extractor_runs = client.list_runs(
- project_name="",
- filter='eq(name, "extractor")',
- trace_filter='and(eq(feedback_key, "user_score"), eq(feedback_score, 1))',
- )
+ # List all runs called "extractor" whose root of the trace was assigned feedback "user_score" score of 1
+ good_extractor_runs = client.list_runs(
+ project_name="",
+ filter='eq(name, "extractor")',
+ trace_filter='and(eq(feedback_key, "user_score"), eq(feedback_score, 1))',
+ )
- # List all runs that started after a specific timestamp and either have "error" not equal to null or a "Correctness" feedback score equal to 0
- complex_runs = client.list_runs(
- project_name="",
- filter='and(gt(start_time, "2023-07-15T12:34:56Z"), or(neq(error, null), and(eq(feedback_key, "Correctness"), eq(feedback_score, 0.0))))',
- )
+ # List all runs that started after a specific timestamp and either have "error" not equal to null or a "Correctness" feedback score equal to 0
+ complex_runs = client.list_runs(
+ project_name="",
+ filter='and(gt(start_time, "2023-07-15T12:34:56Z"), or(neq(error, null), and(eq(feedback_key, "Correctness"), eq(feedback_score, 0.0))))',
+ )
- # List all runs where `tags` include "experimental" or "beta" and `latency` is greater than 2 seconds
- tagged_runs = client.list_runs(
- project_name="",
- filter='and(or(has(tags, "experimental"), has(tags, "beta")), gt(latency, 2))',
- )
+ # List all runs where `tags` include "experimental" or "beta" and `latency` is greater than 2 seconds
+ tagged_runs = client.list_runs(
+ project_name="",
+ filter='and(or(has(tags, "experimental"), has(tags, "beta")), gt(latency, 2))',
+ )
""" # noqa: E501
project_ids = []
if isinstance(project_id, (uuid.UUID, str)):
@@ -2061,12 +2175,13 @@ def get_run_stats(
based on the runs that match the query.
Args:
- id (Optional[List[ID_TYPE]]): List of run IDs to filter by.
- trace (Optional[ID_TYPE]): Trace ID to filter by.
- parent_run (Optional[ID_TYPE]): Parent run ID to filter by.
+ id (Optional[List[Union[UUID, str]]]): List of run IDs to filter by.
+ trace (Optional[Union[UUID, str]]): Trace ID to filter by.
+ parent_run (Optional[Union[UUID, str]]): Parent run ID to filter by.
run_type (Optional[str]): Run type to filter by.
- projects (Optional[List[ID_TYPE]]): List of session IDs to filter by.
- reference_example (Optional[List[ID_TYPE]]): List of reference example IDs to filter by.
+ project_names (Optional[List[str]]): List of project names to filter by.
+ project_ids (Optional[List[Union[UUID, str]]]): List of project IDs to filter by.
+ reference_example_ids (Optional[List[Union[UUID, str]]]): List of reference example IDs to filter by.
start_time (Optional[str]): Start time to filter by.
end_time (Optional[str]): End time to filter by.
error (Optional[bool]): Filter by error status.
@@ -2135,19 +2250,13 @@ def get_run_url(
More for use interacting with runs after the fact
for data analysis or ETL workloads.
- Parameters
- ----------
- run : Run
- The run.
- project_name : str or None, default=None
- The name of the project.
- project_id : UUID or None, default=None
- The ID of the project.
+ Args:
+ run (RunBase): The run.
+ project_name (Optional[str]): The name of the project.
+ project_id (Optional[Union[UUID, str]]): The ID of the project.
Returns:
- -------
- str
- The URL for the run.
+ str: The URL for the run.
"""
if session_id := getattr(run, "session_id", None):
pass
@@ -2167,7 +2276,16 @@ def get_run_url(
)
def share_run(self, run_id: ID_TYPE, *, share_id: Optional[ID_TYPE] = None) -> str:
- """Get a share link for a run."""
+ """Get a share link for a run.
+
+ Args:
+ run_id (Union[UUID, str]): The ID of the run to share.
+ share_id (Optional[Union[UUID, str]]): Custom share ID.
+ If not provided, a random UUID will be generated.
+
+ Returns:
+ str: The URL of the shared run.
+ """
run_id_ = _as_uuid(run_id, "run_id")
data = {
"run_id": str(run_id_),
@@ -2184,7 +2302,14 @@ def share_run(self, run_id: ID_TYPE, *, share_id: Optional[ID_TYPE] = None) -> s
return f"{self._host_url}/public/{share_token}/r"
def unshare_run(self, run_id: ID_TYPE) -> None:
- """Delete share link for a run."""
+ """Delete share link for a run.
+
+ Args:
+ run_id (Union[UUID, str]): The ID of the run to unshare.
+
+ Returns:
+ None
+ """
response = self.request_with_retries(
"DELETE",
f"/runs/{_as_uuid(run_id, 'run_id')}/share",
@@ -2196,7 +2321,7 @@ def read_run_shared_link(self, run_id: ID_TYPE) -> Optional[str]:
"""Retrieve the shared link for a specific run.
Args:
- run_id (ID_TYPE): The ID of the run.
+ run_id (Union[UUID, str]): The ID of the run.
Returns:
Optional[str]: The shared link for the run, or None if the link is not
@@ -2214,14 +2339,30 @@ def read_run_shared_link(self, run_id: ID_TYPE) -> Optional[str]:
return f"{self._host_url}/public/{result['share_token']}/r"
def run_is_shared(self, run_id: ID_TYPE) -> bool:
- """Get share state for a run."""
+ """Get share state for a run.
+
+ Args:
+ run_id (Union[UUID, str]): The ID of the run.
+
+ Returns:
+ bool: True if the run is shared, False otherwise.
+ """
link = self.read_run_shared_link(_as_uuid(run_id, "run_id"))
return link is not None
def read_shared_run(
self, share_token: Union[ID_TYPE, str], run_id: Optional[ID_TYPE] = None
) -> ls_schemas.Run:
- """Get shared runs."""
+ """Get shared runs.
+
+ Args:
+ share_token (Union[UUID, str]): The share token or URL of the shared run.
+ run_id (Optional[Union[UUID, str]]): The ID of the specific run to retrieve.
+ If not provided, the full shared run will be returned.
+
+ Returns:
+ Run: The shared run.
+ """
_, token_uuid = _parse_token_or_url(share_token, "", kind="run")
path = f"/public/{token_uuid}/run"
if run_id is not None:
@@ -2237,7 +2378,15 @@ def read_shared_run(
def list_shared_runs(
self, share_token: Union[ID_TYPE, str], run_ids: Optional[List[str]] = None
) -> Iterator[ls_schemas.Run]:
- """Get shared runs."""
+ """Get shared runs.
+
+ Args:
+ share_token (Union[UUID, str]): The share token or URL of the shared run.
+ run_ids (Optional[List[str]]): A list of run IDs to filter the results by.
+
+ Yields:
+ A shared run.
+ """
body = {"id": run_ids} if run_ids else {}
_, token_uuid = _parse_token_or_url(share_token, "", kind="run")
for run in self._get_cursor_paginated_list(
@@ -2254,7 +2403,7 @@ def read_dataset_shared_schema(
"""Retrieve the shared schema of a dataset.
Args:
- dataset_id (Optional[ID_TYPE]): The ID of the dataset.
+ dataset_id (Optional[Union[UUID, str]]): The ID of the dataset.
Either `dataset_id` or `dataset_name` must be given.
dataset_name (Optional[str]): The name of the dataset.
Either `dataset_id` or `dataset_name` must be given.
@@ -2291,7 +2440,20 @@ def share_dataset(
*,
dataset_name: Optional[str] = None,
) -> ls_schemas.DatasetShareSchema:
- """Get a share link for a dataset."""
+ """Get a share link for a dataset.
+
+ Args:
+ dataset_id (Optional[Union[UUID, str]]): The ID of the dataset.
+ Either `dataset_id` or `dataset_name` must be given.
+ dataset_name (Optional[str]): The name of the dataset.
+ Either `dataset_id` or `dataset_name` must be given.
+
+ Returns:
+ ls_schemas.DatasetShareSchema: The shared schema of the dataset.
+
+ Raises:
+ ValueError: If neither `dataset_id` nor `dataset_name` is given.
+ """
if dataset_id is None and dataset_name is None:
raise ValueError("Either dataset_id or dataset_name must be given")
if dataset_id is None:
@@ -2313,7 +2475,14 @@ def share_dataset(
)
def unshare_dataset(self, dataset_id: ID_TYPE) -> None:
- """Delete share link for a dataset."""
+ """Delete share link for a dataset.
+
+ Args:
+ dataset_id (Union[UUID, str]): The ID of the dataset to unshare.
+
+ Returns:
+ None
+ """
response = self.request_with_retries(
"DELETE",
f"/datasets/{_as_uuid(dataset_id, 'dataset_id')}/share",
@@ -2325,7 +2494,14 @@ def read_shared_dataset(
self,
share_token: str,
) -> ls_schemas.Dataset:
- """Get shared datasets."""
+ """Get shared datasets.
+
+ Args:
+ share_token (Union[UUID, str]): The share token or URL of the shared dataset.
+
+ Returns:
+ Dataset: The shared dataset.
+ """
_, token_uuid = _parse_token_or_url(share_token, self.api_url)
response = self.request_with_retries(
"GET",
@@ -2342,7 +2518,15 @@ def read_shared_dataset(
def list_shared_examples(
self, share_token: str, *, example_ids: Optional[List[ID_TYPE]] = None
) -> List[ls_schemas.Example]:
- """Get shared examples."""
+ """Get shared examples.
+
+ Args:
+ share_token (Union[UUID, str]): The share token or URL of the shared dataset.
+ example_ids (Optional[List[UUID, str]], optional): The IDs of the examples to filter by. Defaults to None.
+
+ Returns:
+ List[ls_schemas.Example]: The list of shared examples.
+ """
params = {}
if example_ids is not None:
params["id"] = [str(id) for id in example_ids]
@@ -2370,18 +2554,14 @@ def list_shared_projects(
"""List shared projects.
Args:
- dataset_share_token : str
- The share token of the dataset.
- project_ids : List[ID_TYPE], optional
- List of project IDs to filter the results, by default None.
- name : str, optional
- Name of the project to filter the results, by default None.
- name_contains : str, optional
- Substring to search for in project names, by default None.
- limit : int, optional
+ dataset_share_token (str): The share token of the dataset.
+ project_ids (Optional[List[Union[UUID, str]]]): List of project IDs to filter the results, by default None.
+ name (Optional[str]): Name of the project to filter the results, by default None.
+ name_contains (Optional[str]): Substring to search for in project names, by default None.
+ limit (Optional[int]): Maximum number of projects to return, by default None.
Yields:
- TracerSessionResult: The shared projects.
+ The shared projects.
"""
params = {"id": project_ids, "name": name, "name_contains": name_contains}
share_token = _as_uuid(dataset_share_token, "dataset_share_token")
@@ -2407,25 +2587,16 @@ def create_project(
) -> ls_schemas.TracerSession:
"""Create a project on the LangSmith API.
- Parameters
- ----------
- project_name : str
- The name of the project.
- project_extra : dict or None, default=None
- Additional project information.
- metadata: dict or None, default=None
- Additional metadata to associate with the project.
- description : str or None, default=None
- The description of the project.
- upsert : bool, default=False
- Whether to update the project if it already exists.
- reference_dataset_id: UUID or None, default=None
- The ID of the reference dataset to associate with the project.
+ Args:
+ project_name (str): The name of the project.
+ project_extra (Optional[dict]): Additional project information.
+ metadata (Optional[dict]): Additional metadata to associate with the project.
+ description (Optional[str]): The description of the project.
+ upsert (bool, default=False): Whether to update the project if it already exists.
+ reference_dataset_id (Optional[Union[UUID, str]): The ID of the reference dataset to associate with the project.
Returns:
- -------
- TracerSession
- The created project.
+ TracerSession: The created project.
"""
endpoint = f"{self.api_url}/sessions"
extra = project_extra
@@ -2463,24 +2634,23 @@ def update_project(
) -> ls_schemas.TracerSession:
"""Update a LangSmith project.
- Parameters
- ----------
- project_id : UUID
- The ID of the project to update.
- name : str or None, default=None
- The new name to give the project. This is only valid if the project
- has been assigned an end_time, meaning it has been completed/closed.
- description : str or None, default=None
- The new description to give the project.
- metadata: dict or None, default=None
-
- project_extra : dict or None, default=None
- Additional project information.
+ Args:
+ project_id (Union[UUID, str]):
+ The ID of the project to update.
+ name (Optional[str]):
+ The new name to give the project. This is only valid if the project
+ has been assigned an end_time, meaning it has been completed/closed.
+ description (Optional[str]):
+ The new description to give the project.
+ metadata (Optional[dict]):
+ Additional metadata to associate with the project.
+ project_extra (Optional[dict]):
+ Additional project information.
+ end_time (Optional[datetime.datetime]):
+ The time the project was completed.
Returns:
- -------
- TracerSession
- The updated project.
+ TracerSession: The updated project.
"""
endpoint = f"{self.api_url}/sessions/{_as_uuid(project_id, 'project_id')}"
extra = project_extra
@@ -2537,20 +2707,16 @@ def read_project(
) -> ls_schemas.TracerSessionResult:
"""Read a project from the LangSmith API.
- Parameters
- ----------
- project_id : str or None, default=None
- The ID of the project to read.
- project_name : str or None, default=None
- The name of the project to read.
- Note: Only one of project_id or project_name may be given.
- include_stats : bool, default=False
- Whether to include a project's aggregate statistics in the response.
+ Args:
+ project_id (Optional[str]):
+ The ID of the project to read.
+ project_name (Optional[str]): The name of the project to read.
+ Only one of project_id or project_name may be given.
+ include_stats (bool, default=False):
+ Whether to include a project's aggregate statistics in the response.
Returns:
- -------
- TracerSessionResult
- The project.
+ TracerSessionResult: The project.
"""
path = "/sessions"
params: Dict[str, Any] = {"limit": 1}
@@ -2578,17 +2744,14 @@ def has_project(
) -> bool:
"""Check if a project exists.
- Parameters
- ----------
- project_name : str
- The name of the project to check for.
- project_id : str or None, default=None
- The ID of the project to check for.
+ Args:
+ project_name (str):
+ The name of the project to check for.
+ project_id (Optional[str]):
+ The ID of the project to check for.
Returns:
- -------
- bool
- Whether the project exists.
+ bool: Whether the project exists.
"""
try:
self.read_project(project_name=project_name)
@@ -2607,10 +2770,12 @@ def get_test_results(
Note: this will fetch whatever data exists in the DB. Results are not
immediately available in the DB upon evaluation run completion.
+ Args:
+ project_id (Optional[Union[UUID, str]]): The ID of the project.
+ project_name (Optional[str]): The name of the project.
+
Returns:
- --------
- pd.DataFrame
- A dataframe containing the test results.
+ pd.DataFrame: A dataframe containing the test results.
"""
warnings.warn(
"Function get_test_results is in beta.", UserWarning, stacklevel=2
@@ -2709,29 +2874,29 @@ def list_projects(
) -> Iterator[ls_schemas.TracerSession]:
"""List projects from the LangSmith API.
- Parameters
- ----------
- project_ids : Optional[List[ID_TYPE]], optional
- A list of project IDs to filter by, by default None
- name : Optional[str], optional
- The name of the project to filter by, by default None
- name_contains : Optional[str], optional
- A string to search for in the project name, by default None
- reference_dataset_id : Optional[List[ID_TYPE]], optional
- A dataset ID to filter by, by default None
- reference_dataset_name : Optional[str], optional
- The name of the reference dataset to filter by, by default None
- reference_free : Optional[bool], optional
- Whether to filter for only projects not associated with a dataset.
- limit : Optional[int], optional
- The maximum number of projects to return, by default None
- metadata: Optional[Dict[str, Any]], optional
- Metadata to filter by.
+ Args:
+ project_ids (Optional[List[Union[UUID, str]]]):
+ A list of project IDs to filter by, by default None
+ name (Optional[str]):
+ The name of the project to filter by, by default None
+ name_contains (Optional[str]):
+ A string to search for in the project name, by default None
+ reference_dataset_id (Optional[List[Union[UUID, str]]]):
+ A dataset ID to filter by, by default None
+ reference_dataset_name (Optional[str]):
+ The name of the reference dataset to filter by, by default None
+ reference_free (Optional[bool]):
+ Whether to filter for only projects not associated with a dataset.
+ limit (Optional[int]):
+ The maximum number of projects to return, by default None
+ metadata (Optional[Dict[str, Any]]):
+ Metadata to filter by.
Yields:
- ------
- TracerSession
The projects.
+
+ Raises:
+ ValueError: If both reference_dataset_id and reference_dataset_name are given.
"""
params: Dict[str, Any] = {
"limit": min(limit, 100) if limit is not None else 100
@@ -2771,12 +2936,17 @@ def delete_project(
) -> None:
"""Delete a project from LangSmith.
- Parameters
- ----------
- project_name : str or None, default=None
- The name of the project to delete.
- project_id : str or None, default=None
- The ID of the project to delete.
+ Args:
+ project_name (Optional[str]):
+ The name of the project to delete.
+ project_id (Optional[str]):
+ The ID of the project to delete.
+
+ Returns:
+ None
+
+ Raises:
+ ValueError: If neither project_name or project_id is provided.
"""
if project_name is not None:
project_id = str(self.read_project(project_name=project_name).id)
@@ -2802,32 +2972,27 @@ def create_dataset(
) -> ls_schemas.Dataset:
"""Create a dataset in the LangSmith API.
- Parameters
- ----------
- dataset_name : str
- The name of the dataset.
- description : Optional[str], default=None
- The description of the dataset.
- data_type : ls_schemas.DataType, default=ls_schemas.DataType.kv
- The data type of the dataset.
- inputs_schema : Optional[Dict[str, Any]], default=None
- The schema definition for the inputs of the dataset.
- outputs_schema : Optional[Dict[str, Any]], default=None
- The schema definition for the outputs of the dataset.
- transformations : Optional[List[ls_schemas.DatasetTransformation]], default=None
- A list of transformations to apply to the dataset.
- metadata : Optional[dict], default=None
- Additional metadata to associate with the dataset.
+ Args:
+ dataset_name (str):
+ The name of the dataset.
+ description (Optional[str]):
+ The description of the dataset.
+ data_type (DataType, default=DataType.kv):
+ The data type of the dataset.
+ inputs_schema (Optional[Dict[str, Any]]):
+ The schema definition for the inputs of the dataset.
+ outputs_schema (Optional[Dict[str, Any]]):
+ The schema definition for the outputs of the dataset.
+ transformations (Optional[List[DatasetTransformation]]):
+ A list of transformations to apply to the dataset.
+ metadata (Optional[dict]):
+ Additional metadata to associate with the dataset.
Returns:
- -------
- ls_schemas.Dataset
- The created dataset.
+ Dataset: The created dataset.
Raises:
- ------
- requests.HTTPError
- If the request to create the dataset fails.
+ requests.HTTPError: If the request to create the dataset fails.
"""
dataset: Dict[str, Any] = {
"name": dataset_name,
@@ -2860,21 +3025,18 @@ def create_dataset(
)
def has_dataset(
- self, *, dataset_name: Optional[str] = None, dataset_id: Optional[str] = None
+ self, *, dataset_name: Optional[str] = None, dataset_id: Optional[ID_TYPE] = None
) -> bool:
"""Check whether a dataset exists in your tenant.
- Parameters
- ----------
- dataset_name : str or None, default=None
- The name of the dataset to check.
- dataset_id : str or None, default=None
- The ID of the dataset to check.
+ Args:
+ dataset_name (Optional[str]):
+ The name of the dataset to check.
+ dataset_id (Optional[Union[UUID, str]]):
+ The ID of the dataset to check.
Returns:
- -------
- bool
- Whether the dataset exists.
+ bool: Whether the dataset exists.
"""
try:
self.read_dataset(dataset_name=dataset_name, dataset_id=dataset_id)
@@ -2891,17 +3053,14 @@ def read_dataset(
) -> ls_schemas.Dataset:
"""Read a dataset from the LangSmith API.
- Parameters
- ----------
- dataset_name : str or None, default=None
- The name of the dataset to read.
- dataset_id : UUID or None, default=None
- The ID of the dataset to read.
+ Args:
+ dataset_name (Optional[str]):
+ The name of the dataset to read.
+ dataset_id (Optional[Union[UUID, str]]):
+ The ID of the dataset to read.
Returns:
- -------
- Dataset
- The dataset.
+ Dataset: The dataset.
"""
path = "/datasets"
params: Dict[str, Any] = {"limit": 1}
@@ -2943,45 +3102,39 @@ def diff_dataset_versions(
) -> ls_schemas.DatasetDiffInfo:
"""Get the difference between two versions of a dataset.
- Parameters
- ----------
- dataset_id : str or None, default=None
- The ID of the dataset.
- dataset_name : str or None, default=None
- The name of the dataset.
- from_version : str or datetime.datetime
- The starting version for the diff.
- to_version : str or datetime.datetime
- The ending version for the diff.
+ Args:
+ dataset_id (Optional[Union[UUID, str]]):
+ The ID of the dataset.
+ dataset_name (Optional[str]):
+ The name of the dataset.
+ from_version (Union[str, datetime.datetime]):
+ The starting version for the diff.
+ to_version (Union[str, datetime.datetime]):
+ The ending version for the diff.
Returns:
- -------
- DatasetDiffInfo
- The difference between the two versions of the dataset.
+ DatasetDiffInfo: The difference between the two versions of the dataset.
Examples:
- --------
- .. code-block:: python
-
- # Get the difference between two tagged versions of a dataset
- from_version = "prod"
- to_version = "dev"
- diff = client.diff_dataset_versions(
- dataset_name="my-dataset",
- from_version=from_version,
- to_version=to_version,
- )
- print(diff)
-
- # Get the difference between two timestamped versions of a dataset
- from_version = datetime.datetime(2024, 1, 1)
- to_version = datetime.datetime(2024, 2, 1)
- diff = client.diff_dataset_versions(
- dataset_name="my-dataset",
- from_version=from_version,
- to_version=to_version,
- )
- print(diff)
+ .. code-block:: python
+
+ # Get the difference between two tagged versions of a dataset
+ from_version = "prod"
+ to_version = "dev"
+ diff = client.diff_dataset_versions(
+ dataset_name="my-dataset",
+ from_version=from_version,
+ to_version=to_version,
+ )
+
+ # Get the difference between two timestamped versions of a dataset
+ from_version = datetime.datetime(2024, 1, 1)
+ to_version = datetime.datetime(2024, 2, 1)
+ diff = client.diff_dataset_versions(
+ dataset_name="my-dataset",
+ from_version=from_version,
+ to_version=to_version,
+ )
"""
if dataset_id is None:
if dataset_name is None:
@@ -3009,21 +3162,21 @@ def diff_dataset_versions(
return ls_schemas.DatasetDiffInfo(**response.json())
def read_dataset_openai_finetuning(
- self, dataset_id: Optional[str] = None, *, dataset_name: Optional[str] = None
+ self, dataset_id: Optional[ID_TYPE] = None, *, dataset_name: Optional[str] = None
) -> list:
"""Download a dataset in OpenAI Jsonl format and load it as a list of dicts.
- Parameters
- ----------
- dataset_id : str
- The ID of the dataset to download.
- dataset_name : str
- The name of the dataset to download.
+ Args:
+ dataset_id (Optional[Union[UUID, str]]):
+ The ID of the dataset to download.
+ dataset_name (Optional[str]):
+ The name of the dataset to download.
Returns:
- -------
- list
- The dataset loaded as a list of dicts.
+ list[dict]: The dataset loaded as a list of dicts.
+
+ Raises:
+ ValueError: If neither dataset_id nor dataset_name is provided.
"""
path = "/datasets"
if dataset_id is not None:
@@ -3051,9 +3204,21 @@ def list_datasets(
) -> Iterator[ls_schemas.Dataset]:
"""List the datasets on the LangSmith API.
+ Args:
+ dataset_ids (Optional[List[Union[UUID, str]]]):
+ A list of dataset IDs to filter the results by.
+ data_type (Optional[str]):
+ The data type of the datasets to filter the results by.
+ dataset_name (Optional[str]):
+ The name of the dataset to filter the results by.
+ dataset_name_contains (Optional[str]):
+ A substring to search for in the dataset names.
+ metadata (Optional[Dict[str, Any]]):
+ A dictionary of metadata to filter the results by.
+ limit (Optional[int]):
+ The maximum number of datasets to return.
+
Yields:
- -------
- Dataset
The datasets.
"""
params: Dict[str, Any] = {
@@ -3089,12 +3254,14 @@ def delete_dataset(
) -> None:
"""Delete a dataset from the LangSmith API.
- Parameters
- ----------
- dataset_id : UUID or None, default=None
- The ID of the dataset to delete.
- dataset_name : str or None, default=None
- The name of the dataset to delete.
+ Args:
+ dataset_id (Optional[Union[UUID, str]]):
+ The ID of the dataset to delete.
+ dataset_name (Optional[str]):
+ The name of the dataset to delete.
+
+ Returns:
+ None
"""
if dataset_name is not None:
dataset_id = self.read_dataset(dataset_name=dataset_name).id
@@ -3124,30 +3291,33 @@ def update_dataset_tag(
use the read_dataset_version method to find the exact version
to apply the tags to.
- Parameters
- ----------
- dataset_id : UUID
- The ID of the dataset to update.
- as_of : datetime.datetime
- The timestamp of the dataset to apply the new tags to.
- tag : str
- The new tag to apply to the dataset.
+ Args:
+ dataset_id (Optional[Union[UUID, str]]):
+ The ID of the dataset to update.
+ dataset_name (Optional[str]):
+ The name of the dataset to update.
+ as_of (datetime.datetime):
+ The timestamp of the dataset to apply the new tags to.
+ tag (str):
+ The new tag to apply to the dataset.
+
+ Returns:
+ None
Examples:
- --------
- .. code-block:: python
+ .. code-block:: python
- dataset_name = "my-dataset"
- # Get the version of a dataset <= a given timestamp
- dataset_version = client.read_dataset_version(
- dataset_name=dataset_name, as_of=datetime.datetime(2024, 1, 1)
- )
- # Assign that version a new tag
- client.update_dataset_tags(
- dataset_name="my-dataset",
- as_of=dataset_version.as_of,
- tag="prod",
- )
+ dataset_name = "my-dataset"
+ # Get the version of a dataset <= a given timestamp
+ dataset_version = client.read_dataset_version(
+ dataset_name=dataset_name, as_of=datetime.datetime(2024, 1, 1)
+ )
+ # Assign that version a new tag
+ client.update_dataset_tags(
+ dataset_name="my-dataset",
+ as_of=dataset_version.as_of,
+ tag="prod",
+ )
"""
if dataset_name is not None:
dataset_id = self.read_dataset(dataset_name=dataset_name).id
@@ -3175,13 +3345,13 @@ def list_dataset_versions(
"""List dataset versions.
Args:
- dataset_id (Optional[ID_TYPE]): The ID of the dataset.
+ dataset_id (Optional[Union[UUID, str]]): The ID of the dataset.
dataset_name (Optional[str]): The name of the dataset.
search (Optional[str]): The search query.
limit (Optional[int]): The maximum number of versions to return.
- Returns:
- Iterator[ls_schemas.DatasetVersion]: An iterator of dataset versions.
+ Yields:
+ The dataset versions.
"""
if dataset_id is None:
dataset_id = self.read_dataset(dataset_name=dataset_name).id
@@ -3219,25 +3389,23 @@ def read_dataset_version(
tag (Optional[str]): The tag of the dataset to retrieve.
Returns:
- ls_schemas.DatasetVersion: The dataset version.
-
+ DatasetVersion: The dataset version.
Examples:
- ---------
- .. code-block:: python
+ .. code-block:: python
- # Get the latest version of a dataset
- client.read_dataset_version(dataset_name="my-dataset", tag="latest")
+ # Get the latest version of a dataset
+ client.read_dataset_version(dataset_name="my-dataset", tag="latest")
- # Get the version of a dataset <= a given timestamp
- client.read_dataset_version(
- dataset_name="my-dataset",
- as_of=datetime.datetime(2024, 1, 1),
- )
+ # Get the version of a dataset <= a given timestamp
+ client.read_dataset_version(
+ dataset_name="my-dataset",
+ as_of=datetime.datetime(2024, 1, 1),
+ )
- # Get the version of a dataset with a specific tag
- client.read_dataset_version(dataset_name="my-dataset", tag="prod")
+ # Get the version of a dataset with a specific tag
+ client.read_dataset_version(dataset_name="my-dataset", tag="prod")
"""
if dataset_id is None:
dataset_id = self.read_dataset(dataset_name=dataset_name).id
@@ -3264,10 +3432,13 @@ def clone_public_dataset(
Args:
token_or_url (str): The token of the public dataset to clone.
- source_api_url: The URL of the langsmith server where the data is hosted.
+ source_api_url (Optional[str]): The URL of the langsmith server where the data is hosted.
Defaults to the API URL of your current client.
- dataset_name (str): The name of the dataset to create in your tenant.
+ dataset_name (Optional[str]): The name of the dataset to create in your tenant.
Defaults to the name of the public dataset.
+
+ Returns:
+ Dataset: The cloned dataset.
"""
source_api_url = source_api_url or self.api_url
source_api_url, token_uuid = _parse_token_or_url(token_or_url, source_api_url)
@@ -3334,7 +3505,23 @@ def create_llm_example(
dataset_name: Optional[str] = None,
created_at: Optional[datetime.datetime] = None,
) -> ls_schemas.Example:
- """Add an example (row) to an LLM-type dataset."""
+ """Add an example (row) to an LLM-type dataset.
+
+ Args:
+ prompt (str):
+ The input prompt for the example.
+ generation (Optional[str]):
+ The output generation for the example.
+ dataset_id (Optional[Union[UUID, str]]):
+ The ID of the dataset.
+ dataset_name (Optional[str]):
+ The name of the dataset.
+ created_at (Optional[datetime.datetime]):
+ The creation timestamp of the example.
+
+ Returns:
+ Example: The created example
+ """
return self.create_example(
inputs={"input": prompt},
outputs={"output": generation},
@@ -3354,7 +3541,23 @@ def create_chat_example(
dataset_name: Optional[str] = None,
created_at: Optional[datetime.datetime] = None,
) -> ls_schemas.Example:
- """Add an example (row) to a Chat-type dataset."""
+ """Add an example (row) to a Chat-type dataset.
+
+ Args:
+ messages (List[Union[Mapping[str, Any], BaseMessageLike]]):
+ The input messages for the example.
+ generations (Optional[Union[Mapping[str, Any], BaseMessageLike]]):
+ The output messages for the example.
+ dataset_id (Optional[Union[UUID, str]]):
+ The ID of the dataset.
+ dataset_name (Optional[str]):
+ The name of the dataset.
+ created_at (Optional[datetime.datetime]):
+ The creation timestamp of the example.
+
+ Returns:
+ Example: The created example
+ """
final_input = []
for message in messages:
if ls_utils.is_base_message_like(message):
@@ -3390,7 +3593,17 @@ def create_example_from_run(
dataset_name: Optional[str] = None,
created_at: Optional[datetime.datetime] = None,
) -> ls_schemas.Example:
- """Add an example (row) to a dataset from a run."""
+ """Add an example (row) to a dataset from a run.
+
+ Args:
+ run (Run): The run to create an example from.
+ dataset_id (Optional[Union[UUID, str]]): The ID of the dataset.
+ dataset_name (Optional[str]): The name of the dataset.
+ created_at (Optional[datetime.datetime]): The creation timestamp of the example.
+
+ Returns:
+ Example: The created example
+ """
if dataset_id is None:
dataset_id = self.read_dataset(dataset_name=dataset_name).id
dataset_name = None # Nested call expects only 1 defined
@@ -3625,7 +3838,15 @@ def update_examples_multipart(
dataset_id: ID_TYPE,
updates: Optional[List[ls_schemas.ExampleUpdateWithAttachments]] = None,
) -> ls_schemas.UpsertExamplesResponse:
- """Upload examples."""
+ """Update examples using multipart.
+
+ Args:
+ dataset_id (Union[UUID, str]): The ID of the dataset to update.
+ updates (Optional[List[ExampleUpdateWithAttachments]]): The updates to apply to the examples.
+
+ Raises:
+ ValueError: If the multipart examples endpoint is not enabled.
+ """
if not (self.info.instance_flags or {}).get(
"dataset_examples_multipart_enabled", False
):
@@ -3657,7 +3878,18 @@ def upload_examples_multipart(
dataset_id: ID_TYPE,
uploads: Optional[List[ls_schemas.ExampleUploadWithAttachments]] = None,
) -> ls_schemas.UpsertExamplesResponse:
- """Upload examples."""
+ """Upload examples using multipart.
+
+ Args:
+ dataset_id (Union[UUID, str]): The ID of the dataset to upload to.
+ uploads (Optional[List[ExampleUploadWithAttachments]]): The examples to upload.
+
+ Returns:
+ ls_schemas.UpsertExamplesResponse: The count and ids of the successfully uploaded examples
+
+ Raises:
+ ValueError: If the multipart examples endpoint is not enabled.
+ """
if not (self.info.instance_flags or {}).get(
"dataset_examples_multipart_enabled", False
):
@@ -3691,7 +3923,6 @@ def upsert_examples_multipart(
.. deprecated:: 0.1.0
This method is deprecated. Use :func:`langsmith.upload_examples_multipart` instead.
-
""" # noqa: E501
if not (self.info.instance_flags or {}).get(
"examples_multipart_enabled", False
@@ -3733,25 +3964,31 @@ def create_examples(
) -> None:
"""Create examples in a dataset.
- Parameters
- ----------
- inputs : Sequence[Mapping[str, Any]]
- The input values for the examples.
- outputs : Optional[Sequence[Optional[Mapping[str, Any]]]], default=None
- The output values for the examples.
- metadata : Optional[Sequence[Optional[Mapping[str, Any]]]], default=None
- The metadata for the examples.
- splits : Optional[Sequence[Optional[str | List[str]]]], default=None
- The splits for the examples, which are divisions
- of your dataset such as 'train', 'test', or 'validation'.
- source_run_ids : Optional[Sequence[Optional[ID_TYPE]]], default=None
- The IDs of the source runs associated with the examples.
- ids : Optional[Sequence[ID_TYPE]], default=None
- The IDs of the examples.
- dataset_id : Optional[ID_TYPE], default=None
- The ID of the dataset to create the examples in.
- dataset_name : Optional[str], default=None
- The name of the dataset to create the examples in.
+ Args:
+ inputs (Sequence[Mapping[str, Any]]):
+ The input values for the examples.
+ outputs (Optional[Sequence[Optional[Mapping[str, Any]]]]):
+ The output values for the examples.
+ metadata (Optional[Sequence[Optional[Mapping[str, Any]]]]):
+ The metadata for the examples.
+ splits (Optional[Sequence[Optional[str | List[str]]]]):
+ The splits for the examples, which are divisions
+ of your dataset such as 'train', 'test', or 'validation'.
+ source_run_ids (Optional[Sequence[Optional[Union[UUID, str]]]]):
+ The IDs of the source runs associated with the examples.
+ ids (Optional[Sequence[Union[UUID, str]]]):
+ The IDs of the examples.
+ dataset_id (Optional[Union[UUID, str]]):
+ The ID of the dataset to create the examples in.
+ dataset_name (Optional[str]):
+ The name of the dataset to create the examples in.
+ **kwargs: Any: Additional keyword arguments are ignored.
+
+ Raises:
+ ValueError: If neither dataset_id nor dataset_name is provided.
+
+ Returns:
+ None
"""
if dataset_id is None and dataset_name is None:
raise ValueError("Either dataset_id or dataset_name must be provided.")
@@ -3822,25 +4059,25 @@ def create_example(
for a model or chain.
Args:
- inputs : Mapping[str, Any]
+ inputs (Mapping[str, Any]):
The input values for the example.
- dataset_id : UUID or None, default=None
+ dataset_id (Optional[Union[UUID, str]]):
The ID of the dataset to create the example in.
- dataset_name : str or None, default=None
+ dataset_name (Optional[str]):
The name of the dataset to create the example in.
- created_at : datetime or None, default=None
+ created_at (Optional[datetime.datetime]):
The creation timestamp of the example.
- outputs : Mapping[str, Any] or None, default=None
+ outputs (Optional[Mapping[str, Any]]):
The output values for the example.
- metadata : Mapping[str, Any] or None, default=None
+ metadata (Optional[Mapping[str, Any]]):
The metadata for the example.
- split : str or List[str] or None, default=None
+ split (Optional[str | List[str]]):
The splits for the example, which are divisions
of your dataset such as 'train', 'test', or 'validation'.
- example_id : UUID or None, default=None
+ example_id (Optional[Union[UUID, str]]):
The ID of the example to create. If not provided, a new
example will be created.
- source_run_id : UUID or None, default=None
+ source_run_id (Optional[Union[UUID, str]]):
The ID of the source run associated with this example.
Returns:
@@ -3880,7 +4117,11 @@ def read_example(
"""Read an example from the LangSmith API.
Args:
- example_id (UUID): The ID of the example to read.
+ example_id (Union[UUID, str]): The ID of the example to read.
+ as_of (Optional[datetime.datetime]): The dataset version tag OR
+ timestamp to retrieve the example as of.
+ Response examples will only be those that were present at the time
+ of the tagged (or timestamped) version.
Returns:
Example: The example.
@@ -3931,34 +4172,39 @@ def list_examples(
"""Retrieve the example rows of the specified dataset.
Args:
- dataset_id (UUID, optional): The ID of the dataset to filter by.
+ dataset_id (Optional[Union[UUID, str]]): The ID of the dataset to filter by.
Defaults to None.
- dataset_name (str, optional): The name of the dataset to filter by.
+ dataset_name (Optional[str]): The name of the dataset to filter by.
Defaults to None.
- example_ids (List[UUID], optional): The IDs of the examples to filter by.
+ example_ids (Optional[Sequence[Union[UUID, str]]): The IDs of the examples to filter by.
Defaults to None.
- as_of (datetime, str, or optional): The dataset version tag OR
+ as_of (Optional[Union[datetime.datetime, str]]): The dataset version tag OR
timestamp to retrieve the examples as of.
Response examples will only be those that were present at the time
of the tagged (or timestamped) version.
- splits (List[str], optional): A list of dataset splits, which are
+ splits (Optional[Sequence[str]]): A list of dataset splits, which are
divisions of your dataset such as 'train', 'test', or 'validation'.
Returns examples only from the specified splits.
- inline_s3_urls (bool, optional): Whether to inline S3 URLs.
+ inline_s3_urls (bool, default=True): Whether to inline S3 URLs.
Defaults to True.
- offset (int): The offset to start from. Defaults to 0.
- limit (int, optional): The maximum number of examples to return.
- filter (str, optional): A structured fileter string to apply to
+ offset (int, default=0): The offset to start from. Defaults to 0.
+ limit (Optional[int]): The maximum number of examples to return.
+ metadata (Optional[dict]): A dictionary of metadata to filter by.
+ filter (Optional[str]): A structured fileter string to apply to
the examples.
+ include_attachments (bool, default=False): Whether to include the
+ attachments in the response. Defaults to False.
+ **kwargs (Any): Additional keyword arguments are ignored.
Yields:
- Example: The examples.
+ The examples.
Examples:
List all examples for a dataset:
.. code-block:: python
+
from langsmith import Client
client = Client()
@@ -3970,6 +4216,7 @@ def list_examples(
List examples by id
.. code-block:: python
+
example_ids = [
'734fc6a0-c187-4266-9721-90b7a025751a',
'd6b4c1b9-6160-4d63-9b61-b034c585074f',
@@ -3980,11 +4227,13 @@ def list_examples(
List examples by metadata
.. code-block:: python
+
examples = client.list_examples(dataset_name=dataset_name, metadata={"foo": "bar"})
List examples by structured filter
.. code-block:: python
+
examples = client.list_examples(
dataset_name=dataset_name,
filter='and(not(has(metadata, \'{"foo": "bar"}\')), exists(metadata, "tenant_id"))'
@@ -4050,16 +4299,14 @@ def index_dataset(
``client.similar_examples()``.
Args:
- dataset_id (UUID): The ID of the dataset to index.
- tag (str, optional): The version of the dataset to index. If 'latest'
+ dataset_id (Union[UUID, str]): The ID of the dataset to index.
+ tag (Optional[str]): The version of the dataset to index. If 'latest'
then any updates to the dataset (additions, updates, deletions of
examples) will be reflected in the index.
+ **kwargs (Any): Additional keyword arguments to pass as part of request body.
Returns:
None
-
- Raises:
- requests.HTTPError
""" # noqa: E501
dataset_id = _as_uuid(dataset_id, "dataset_id")
resp = self.request_with_retries(
@@ -4091,14 +4338,17 @@ def similar_examples(
inputs (dict): The inputs to use as a search query. Must match the dataset
input schema. Must be JSON serializable.
limit (int): The maximum number of examples to return.
- dataset_id (str or UUID): The ID of the dataset to search over.
- filter (str, optional): A filter string to apply to the search results. Uses
+ dataset_id (Union[UUID, str]): The ID of the dataset to search over.
+ filter (Optional[str]): A filter string to apply to the search results. Uses
the same syntax as the `filter` parameter in `list_runs()`. Only a subset
of operations are supported. Defaults to None.
For example, you can use ``and(eq(metadata.some_tag, 'some_value'), neq(metadata.env, 'dev'))``
to filter only examples where some_tag has some_value, and the environment is not dev.
- kwargs (Any): Additional keyword args to pass as part of request body.
+ **kwargs: Additional keyword arguments to pass as part of request body.
+
+ Returns:
+ list[ExampleSearch]: List of ExampleSearch objects.
Examples:
.. code-block:: python
@@ -4112,7 +4362,7 @@ def similar_examples(
dataset_id="...",
)
- .. code-block:: pycon
+ .. code-block:: python
[
ExampleSearch(
@@ -4173,26 +4423,25 @@ def update_example(
) -> Dict[str, Any]:
"""Update a specific example.
- Parameters
- ----------
- example_id : str or UUID
- The ID of the example to update.
- inputs : Dict[str, Any] or None, default=None
- The input values to update.
- outputs : Mapping[str, Any] or None, default=None
- The output values to update.
- metadata : Dict or None, default=None
- The metadata to update.
- split : str or List[str] or None, default=None
- The dataset split to update, such as
- 'train', 'test', or 'validation'.
- dataset_id : UUID or None, default=None
- The ID of the dataset to update.
+ Args:
+ example_id (Union[UUID, str]):
+ The ID of the example to update.
+ inputs (Optional[Dict[str, Any]]):
+ The input values to update.
+ outputs (Optional[Mapping[str, Any]]):
+ The output values to update.
+ metadata (Optional[Dict]):
+ The metadata to update.
+ split (Optional[str | List[str]]):
+ The dataset split to update, such as
+ 'train', 'test', or 'validation'.
+ dataset_id (Optional[Union[UUID, str]]):
+ The ID of the dataset to update.
+ attachments_operations (Optional[AttachmentsOperations]):
+ The attachments operations to perform.
Returns:
- -------
- Dict[str, Any]
- The updated example.
+ Dict[str, Any]: The updated example.
"""
if attachments_operations is not None:
if not (self.info.instance_flags or {}).get(
@@ -4233,26 +4482,25 @@ def update_examples(
) -> Dict[str, Any]:
"""Update multiple examples.
- Parameters
- ----------
- example_ids : Sequence[ID_TYPE]
- The IDs of the examples to update.
- inputs : Optional[Sequence[Optional[Dict[str, Any]]], default=None
- The input values for the examples.
- outputs : Optional[Sequence[Optional[Mapping[str, Any]]]], default=None
- The output values for the examples.
- metadata : Optional[Sequence[Optional[Mapping[str, Any]]]], default=None
- The metadata for the examples.
- split : Optional[Sequence[Optional[str | List[str]]]], default=None
- The splits for the examples, which are divisions
- of your dataset such as 'train', 'test', or 'validation'.
- dataset_ids : Optional[Sequence[Optional[ID_TYPE]]], default=None
- The IDs of the datasets to move the examples to.
+ Args:
+ example_ids (Sequence[Union[UUID, str]]):
+ The IDs of the examples to update.
+ inputs (Optional[Sequence[Optional[Dict[str, Any]]]):
+ The input values for the examples.
+ outputs (Optional[Sequence[Optional[Mapping[str, Any]]]]):
+ The output values for the examples.
+ metadata (Optional[Sequence[Optional[Mapping[str, Any]]]]):
+ The metadata for the examples.
+ splits (Optional[Sequence[Optional[str | List[str]]]]):
+ The splits for the examples, which are divisions
+ of your dataset such as 'train', 'test', or 'validation'.
+ dataset_ids (Optional[Sequence[Optional[Union[UUID, str]]]]):
+ The IDs of the datasets to move the examples to.
+ attachments_operations (Optional[Sequence[Optional[ls_schemas.AttachmentsOperations]]):
+ The operations to perform on the attachments.
Returns:
- -------
- Dict[str, Any]
- The response from the server (specifies the number of examples updated).
+ Dict[str, Any]: The response from the server (specifies the number of examples updated).
"""
if attachments_operations is not None:
if not (self.info.instance_flags or {}).get(
@@ -4316,10 +4564,12 @@ def update_examples(
def delete_example(self, example_id: ID_TYPE) -> None:
"""Delete an example by ID.
- Parameters
- ----------
- example_id : str or UUID
- The ID of the example to delete.
+ Args:
+ example_id (Union[UUID, str]):
+ The ID of the example to delete.
+
+ Returns:
+ None
"""
response = self.request_with_retries(
"DELETE",
@@ -4338,13 +4588,14 @@ def list_dataset_splits(
"""Get the splits for a dataset.
Args:
- dataset_id (ID_TYPE): The ID of the dataset.
- as_of (Optional[Union[str, datetime.datetime]], optional): The version
+ dataset_id (Optional[Union[UUID, str]]): The ID of the dataset.
+ dataset_name (Optional[str]): The name of the dataset.
+ as_of (Optional[Union[str, datetime.datetime]]): The version
of the dataset to retrieve splits for. Can be a timestamp or a
string tag. Defaults to "latest".
Returns:
- List[str]: The names of this dataset's.
+ List[str]: The names of this dataset's splits.
"""
if dataset_id is None:
if dataset_name is None:
@@ -4376,11 +4627,12 @@ def update_dataset_splits(
"""Update the splits for a dataset.
Args:
- dataset_id (ID_TYPE): The ID of the dataset to update.
+ dataset_id (Optional[Union[UUID, str]]): The ID of the dataset to update.
+ dataset_name (Optional[str]): The name of the dataset to update.
split_name (str): The name of the split to update.
- example_ids (List[ID_TYPE]): The IDs of the examples to add to or
+ example_ids (List[Union[UUID, str]]): The IDs of the examples to add to or
remove from the split.
- remove (bool, optional): If True, remove the examples from the split.
+ remove (Optional[bool]): If True, remove the examples from the split.
If False, add the examples to the split. Defaults to False.
Returns:
@@ -4411,22 +4663,17 @@ def _resolve_run_id(
) -> ls_schemas.Run:
"""Resolve the run ID.
- Parameters
- ----------
- run : Run or RunBase or str or UUID
- The run to resolve.
- load_child_runs : bool
- Whether to load child runs.
+ Args:
+ run (Union[Run, RunBase, str, UUID]):
+ The run to resolve.
+ load_child_runs (bool):
+ Whether to load child runs.
Returns:
- -------
- Run
- The resolved run.
+ Run: The resolved run.
Raises:
- ------
- TypeError
- If the run type is invalid.
+ TypeError: If the run type is invalid.
"""
if isinstance(run, (str, uuid.UUID)):
run_ = self.read_run(run, load_child_runs=load_child_runs)
@@ -4441,17 +4688,14 @@ def _resolve_example_id(
) -> Optional[ls_schemas.Example]:
"""Resolve the example ID.
- Parameters
- ----------
- example : Example or str or UUID or dict or None
- The example to resolve.
- run : Run
- The run associated with the example.
+ Args:
+ example (Optional[Union[Example, str, UUID, dict]]):
+ The example to resolve.
+ run (Run):
+ The run associated with the example.
Returns:
- -------
- Example or None
- The resolved example.
+ Optional[Example]: The resolved example.
"""
if isinstance(example, (str, uuid.UUID)):
reference_example_ = self.read_example(example)
@@ -4521,25 +4765,22 @@ def evaluate_run(
) -> ls_evaluator.EvaluationResult:
"""Evaluate a run.
- Parameters
- ----------
- run : Run or RunBase or str or UUID
- The run to evaluate.
- evaluator : RunEvaluator
- The evaluator to use.
- source_info : Dict[str, Any] or None, default=None
- Additional information about the source of the evaluation to log
- as feedback metadata.
- reference_example : Example or str or dict or UUID or None, default=None
- The example to use as a reference for the evaluation.
- If not provided, the run's reference example will be used.
- load_child_runs : bool, default=False
- Whether to load child runs when resolving the run ID.
+ Args:
+ run (Union[Run, RunBase, str, UUID]):
+ The run to evaluate.
+ evaluator (RunEvaluator):
+ The evaluator to use.
+ source_info (Optional[Dict[str, Any]]):
+ Additional information about the source of the evaluation to log
+ as feedback metadata.
+ reference_example (Optional[Union[Example, str, dict, UUID]]):
+ The example to use as a reference for the evaluation.
+ If not provided, the run's reference example will be used.
+ load_child_runs (bool, default=False):
+ Whether to load child runs when resolving the run ID.
Returns:
- -------
- Feedback
- The feedback object created by the evaluation.
+ Feedback: The feedback object created by the evaluation.
"""
run_ = self._resolve_run_id(run, load_child_runs=load_child_runs)
reference_example_ = self._resolve_example_id(reference_example, run_)
@@ -4616,25 +4857,22 @@ async def aevaluate_run(
) -> ls_evaluator.EvaluationResult:
"""Evaluate a run asynchronously.
- Parameters
- ----------
- run : Run or str or UUID
- The run to evaluate.
- evaluator : RunEvaluator
- The evaluator to use.
- source_info : Dict[str, Any] or None, default=None
- Additional information about the source of the evaluation to log
- as feedback metadata.
- reference_example : Optional Example or UUID, default=None
- The example to use as a reference for the evaluation.
- If not provided, the run's reference example will be used.
- load_child_runs : bool, default=False
- Whether to load child runs when resolving the run ID.
+ Args:
+ run (Union[Run, str, UUID]):
+ The run to evaluate.
+ evaluator (RunEvaluator):
+ The evaluator to use.
+ source_info (Optional[Dict[str, Any]]):
+ Additional information about the source of the evaluation to log
+ as feedback metadata.
+ reference_example (Optional[Union[Example, str, dict, UUID]]):
+ The example to use as a reference for the evaluation.
+ If not provided, the run's reference example will be used.
+ load_child_runs (bool, default=False)
+ Whether to load child runs when resolving the run ID.
Returns:
- -------
- EvaluationResult
- The evaluation result object created by the evaluation.
+ EvaluationResult: The evaluation result object created by the evaluation.
"""
run_ = self._resolve_run_id(run, load_child_runs=load_child_runs)
reference_example_ = self._resolve_example_id(reference_example, run_)
@@ -4676,52 +4914,55 @@ def create_feedback(
) -> ls_schemas.Feedback:
"""Create a feedback in the LangSmith API.
- Parameters
- ----------
- run_id : str or UUID
- The ID of the run to provide feedback for. Either the run_id OR
- the project_id must be provided.
- trace_id : str or UUID
- The trace ID of the run to provide feedback for. This is optional.
- key : str
- The name of the metric or 'aspect' this feedback is about.
- score : float or int or bool or None, default=None
- The score to rate this run on the metric or aspect.
- value : float or int or bool or str or dict or None, default=None
- The display value or non-numeric value for this feedback.
- correction : dict or None, default=None
- The proper ground truth for this run.
- comment : str or None, default=None
- A comment about this feedback, such as a justification for the score or
- chain-of-thought trajectory for an LLM judge.
- source_info : Dict[str, Any] or None, default=None
- Information about the source of this feedback.
- feedback_source_type : FeedbackSourceType or str, default=FeedbackSourceType.API
- The type of feedback source, such as model (for model-generated feedback)
- or API.
- source_run_id : str or UUID or None, default=None,
- The ID of the run that generated this feedback, if a "model" type.
- feedback_id : str or UUID or None, default=None
- The ID of the feedback to create. If not provided, a random UUID will be
- generated.
- feedback_config: langsmith.schemas.FeedbackConfig or None, default=None,
- The configuration specifying how to interpret feedback with this key.
- Examples include continuous (with min/max bounds), categorical,
- or freeform.
- stop_after_attempt : int, default=10
- The number of times to retry the request before giving up.
- project_id : str or UUID
- The ID of the project_id to provide feedback on. One - and only one - of
- this and run_id must be provided.
- comparative_experiment_id : str or UUID
- If this feedback was logged as a part of a comparative experiment, this
- associates the feedback with that experiment.
- feedback_group_id : str or UUID
- When logging preferences, ranking runs, or other comparative feedback,
- this is used to group feedback together.
- extra : dict
- Metadata for the feedback.
- trace_id: Optional[ID_TYPE] = The trace ID of the run to provide feedback for. Enables batch ingestion.
+ Args:
+ run_id (Optional[Union[UUID, str]]):
+ The ID of the run to provide feedback for. Either the run_id OR
+ the project_id must be provided.
+ key (str):
+ The name of the metric or 'aspect' this feedback is about.
+ score (Optional[Union[float, int, bool]]):
+ The score to rate this run on the metric or aspect.
+ value (Optional[Union[float, int, bool, str, dict]]):
+ The display value or non-numeric value for this feedback.
+ correction (Optional[dict]):
+ The proper ground truth for this run.
+ comment (Optional[str]):
+ A comment about this feedback, such as a justification for the score or
+ chain-of-thought trajectory for an LLM judge.
+ source_info (Optional[Dict[str, Any]]):
+ Information about the source of this feedback.
+ feedback_source_type (Union[FeedbackSourceType, str]):
+ The type of feedback source, such as model (for model-generated feedback)
+ or API.
+ source_run_id (Optional[Union[UUID, str]]):
+ The ID of the run that generated this feedback, if a "model" type.
+ feedback_id (Optional[Union[UUID, str]]):
+ The ID of the feedback to create. If not provided, a random UUID will be
+ generated.
+ feedback_config (Optional[FeedbackConfig]):
+ The configuration specifying how to interpret feedback with this key.
+ Examples include continuous (with min/max bounds), categorical,
+ or freeform.
+ stop_after_attempt (int, default=10):
+ The number of times to retry the request before giving up.
+ project_id (Optional[Union[UUID, str]]):
+ The ID of the project_id to provide feedback on. One - and only one - of
+ this and run_id must be provided.
+ comparative_experiment_id (Optional[Union[UUID, str]]):
+ If this feedback was logged as a part of a comparative experiment, this
+ associates the feedback with that experiment.
+ feedback_group_id (Optional[Union[UUID, str]]):
+ When logging preferences, ranking runs, or other comparative feedback,
+ this is used to group feedback together.
+ extra (Optional[Dict]):
+ Metadata for the feedback.
+ trace_id (Optional[Union[UUID, str]]):
+ The trace ID of the run to provide feedback for. Enables batch ingestion.
+ **kwargs (Any):
+ Additional keyword arguments.
+
+ Returns:
+ Feedback: The created feedback object.
"""
if run_id is None and project_id is None:
raise ValueError("One of run_id and project_id must be provided")
@@ -4830,18 +5071,20 @@ def update_feedback(
) -> None:
"""Update a feedback in the LangSmith API.
- Parameters
- ----------
- feedback_id : str or UUID
- The ID of the feedback to update.
- score : float or int or bool or None, default=None
- The score to update the feedback with.
- value : float or int or bool or str or dict or None, default=None
- The value to update the feedback with.
- correction : dict or None, default=None
- The correction to update the feedback with.
- comment : str or None, default=None
- The comment to update the feedback with.
+ Args:
+ feedback_id (Union[UUID, str]):
+ The ID of the feedback to update.
+ score (Optional[Union[float, int, bool]]):
+ The score to update the feedback with.
+ value (Optional[Union[float, int, bool, str, dict]]):
+ The value to update the feedback with.
+ correction (Optional[dict]):
+ The correction to update the feedback with.
+ comment (Optional[str]):
+ The comment to update the feedback with.
+
+ Returns:
+ None
"""
feedback_update: Dict[str, Any] = {}
if score is not None:
@@ -4863,15 +5106,12 @@ def update_feedback(
def read_feedback(self, feedback_id: ID_TYPE) -> ls_schemas.Feedback:
"""Read a feedback from the LangSmith API.
- Parameters
- ----------
- feedback_id : str or UUID
- The ID of the feedback to read.
+ Args:
+ feedback_id (Union[UUID, str]):
+ The ID of the feedback to read.
Returns:
- -------
- Feedback
- The feedback.
+ Feedback: The feedback.
"""
response = self.request_with_retries(
"GET",
@@ -4890,23 +5130,20 @@ def list_feedback(
) -> Iterator[ls_schemas.Feedback]:
"""List the feedback objects on the LangSmith API.
- Parameters
- ----------
- run_ids : List[str or UUID] or None, default=None
- The IDs of the runs to filter by.
- feedback_key: List[str] or None, default=None
- The feedback key(s) to filter by. Example: 'correctness'
- The query performs a union of all feedback keys.
- feedback_source_type: List[FeedbackSourceType] or None, default=None
- The type of feedback source, such as model
- (for model-generated feedback) or API.
- limit : int or None, default=None
- **kwargs : Any
- Additional keyword arguments.
+ Args:
+ run_ids (Optional[Sequence[Union[UUID, str]]]):
+ The IDs of the runs to filter by.
+ feedback_key (Optional[Sequence[str]]):
+ The feedback key(s) to filter by. Examples: 'correctness'
+ The query performs a union of all feedback keys.
+ feedback_source_type (Optional[Sequence[FeedbackSourceType]]):
+ The type of feedback source, such as model or API.
+ limit (Optional[int]):
+ The maximum number of feedback to return.
+ **kwargs (Any):
+ Additional keyword arguments.
Yields:
- ------
- Feedback
The feedback objects.
"""
params: dict = {
@@ -4928,10 +5165,12 @@ def list_feedback(
def delete_feedback(self, feedback_id: ID_TYPE) -> None:
"""Delete a feedback by ID.
- Parameters
- ----------
- feedback_id : str or UUID
- The ID of the feedback to delete.
+ Args:
+ feedback_id (Union[UUID, str]):
+ The ID of the feedback to delete.
+
+ Returns:
+ None
"""
response = self.request_with_retries(
"DELETE",
@@ -4955,22 +5194,22 @@ def create_feedback_from_token(
Args:
token_or_url (Union[str, uuid.UUID]): The token or URL from which to create
feedback.
- score (Union[float, int, bool, None], optional): The score of the feedback.
+ score (Optional[Union[float, int, bool]]): The score of the feedback.
Defaults to None.
- value (Union[float, int, bool, str, dict, None], optional): The value of the
+ value (Optional[Union[float, int, bool, str, dict]]): The value of the
feedback. Defaults to None.
- correction (Union[dict, None], optional): The correction of the feedback.
+ correction (Optional[dict]): The correction of the feedback.
Defaults to None.
- comment (Union[str, None], optional): The comment of the feedback. Defaults
+ comment (Optional[str]): The comment of the feedback. Defaults
to None.
- metadata (Optional[dict], optional): Additional metadata for the feedback.
+ metadata (Optional[dict]): Additional metadata for the feedback.
Defaults to None.
Raises:
ValueError: If the source API URL is invalid.
Returns:
- None: This method does not return anything.
+ None
"""
source_api_url, token_uuid = _parse_token_or_url(
token_or_url, self.api_url, num_parts=1
@@ -5010,21 +5249,23 @@ def create_presigned_feedback_token(
API key.
Args:
- run_id:
- feedback_key:
- expiration: The expiration time of the pre-signed URL.
+ run_id (Union[UUID, str]):
+ The ID of the run.
+ feedback_key (str):
+ The key of the feedback to create.
+ expiration (Optional[datetime.datetime | datetime.timedelta]): The expiration time of the pre-signed URL.
Either a datetime or a timedelta offset from now.
Default to 3 hours.
- feedback_config: FeedbackConfig or None.
+ feedback_config (Optional[FeedbackConfig]):
If creating a feedback_key for the first time,
this defines how the metric should be interpreted,
such as a continuous score (w/ optional bounds),
or distribution over categorical values.
- feedback_id: The ID of the feedback to create. If not provided, a new
+ feedback_id (Optional[Union[UUID, str]): The ID of the feedback to create. If not provided, a new
feedback will be created.
Returns:
- The pre-signed URL for uploading feedback data.
+ FeedbackIngestToken: The pre-signed URL for uploading feedback data.
"""
body: Dict[str, Any] = {
"run_id": run_id,
@@ -5074,19 +5315,21 @@ def create_presigned_feedback_tokens(
API key.
Args:
- run_id:
- feedback_key:
- expiration: The expiration time of the pre-signed URL.
+ run_id (Union[UUID, str]):
+ The ID of the run.
+ feedback_keys (Sequence[str]):
+ The key of the feedback to create.
+ expiration (Optional[datetime.datetime | datetime.timedelta]): The expiration time of the pre-signed URL.
Either a datetime or a timedelta offset from now.
Default to 3 hours.
- feedback_config: FeedbackConfig or None.
+ feedback_configs (Optional[Sequence[Optional[FeedbackConfig]]]):
If creating a feedback_key for the first time,
this defines how the metric should be interpreted,
such as a continuous score (w/ optional bounds),
or distribution over categorical values.
Returns:
- The pre-signed URL for uploading feedback data.
+ Sequence[FeedbackIngestToken]: The pre-signed URL for uploading feedback data.
"""
# validate
if feedback_configs is not None and len(feedback_keys) != len(feedback_configs):
@@ -5166,12 +5409,11 @@ def list_presigned_feedback_tokens(
"""List the feedback ingest tokens for a run.
Args:
- run_id: The ID of the run to filter by.
- limit: The maximum number of tokens to return.
+ run_id (Union[UUID, str]): The ID of the run to filter by.
+ limit (Optional[int]): The maximum number of tokens to return.
Yields:
- FeedbackIngestToken
- The feedback ingest tokens.
+ The feedback ingest tokens.
"""
params = {
"run_id": _as_uuid(run_id, "run_id"),
@@ -5197,17 +5439,17 @@ def list_annotation_queues(
"""List the annotation queues on the LangSmith API.
Args:
- queue_ids : List[str or UUID] or None, default=None
+ queue_ids (Optional[List[Union[UUID, str]]]):
The IDs of the queues to filter by.
- name : str or None, default=None
+ name (Optional[str]):
The name of the queue to filter by.
- name_contains : str or None, default=None
+ name_contains (Optional[str]):
The substring that the queue name should contain.
- limit : int or None, default=None
+ limit (Optional[int]):
+ The maximum number of queues to return.
Yields:
- AnnotationQueue
- The annotation queues.
+ The annotation queues.
"""
params: dict = {
"ids": (
@@ -5238,16 +5480,15 @@ def create_annotation_queue(
"""Create an annotation queue on the LangSmith API.
Args:
- name : str
+ name (str):
The name of the annotation queue.
- description : str, optional
+ description (Optional[str]):
The description of the annotation queue.
- queue_id : str or UUID, optional
+ queue_id (Optional[Union[UUID, str]]):
The ID of the annotation queue.
Returns:
- AnnotationQueue
- The created annotation queue object.
+ AnnotationQueue: The created annotation queue object.
"""
body = {
"name": name,
@@ -5268,10 +5509,10 @@ def read_annotation_queue(self, queue_id: ID_TYPE) -> ls_schemas.AnnotationQueue
"""Read an annotation queue with the specified queue ID.
Args:
- queue_id (ID_TYPE): The ID of the annotation queue to read.
+ queue_id (Union[UUID, str]): The ID of the annotation queue to read.
Returns:
- ls_schemas.AnnotationQueue: The annotation queue object.
+ AnnotationQueue: The annotation queue object.
"""
# TODO: Replace when actual endpoint is added
return next(self.list_annotation_queues(queue_ids=[queue_id]))
@@ -5282,10 +5523,13 @@ def update_annotation_queue(
"""Update an annotation queue with the specified queue_id.
Args:
- queue_id (ID_TYPE): The ID of the annotation queue to update.
+ queue_id (Union[UUID, str]): The ID of the annotation queue to update.
name (str): The new name for the annotation queue.
- description (Optional[str], optional): The new description for the
+ description (Optional[str]): The new description for the
annotation queue. Defaults to None.
+
+ Returns:
+ None
"""
response = self.request_with_retries(
"PATCH",
@@ -5301,7 +5545,10 @@ def delete_annotation_queue(self, queue_id: ID_TYPE) -> None:
"""Delete an annotation queue with the specified queue ID.
Args:
- queue_id (ID_TYPE): The ID of the annotation queue to delete.
+ queue_id (Union[UUID, str]): The ID of the annotation queue to delete.
+
+ Returns:
+ None
"""
response = self.request_with_retries(
"DELETE",
@@ -5316,9 +5563,12 @@ def add_runs_to_annotation_queue(
"""Add runs to an annotation queue with the specified queue ID.
Args:
- queue_id (ID_TYPE): The ID of the annotation queue.
- run_ids (List[ID_TYPE]): The IDs of the runs to be added to the annotation
+ queue_id (Union[UUID, str]): The ID of the annotation queue.
+ run_ids (List[Union[UUID, str]]): The IDs of the runs to be added to the annotation
queue.
+
+ Returns:
+ None
"""
response = self.request_with_retries(
"POST",
@@ -5333,9 +5583,12 @@ def delete_run_from_annotation_queue(
"""Delete a run from an annotation queue with the specified queue ID and run ID.
Args:
- queue_id (ID_TYPE): The ID of the annotation queue.
- run_id (ID_TYPE): The ID of the run to be added to the annotation
+ queue_id (Union[UUID, str]): The ID of the annotation queue.
+ run_id (Union[UUID, str]): The ID of the run to be added to the annotation
queue.
+
+ Returns:
+ None
"""
response = self.request_with_retries(
"DELETE",
@@ -5349,15 +5602,15 @@ def get_run_from_annotation_queue(
"""Get a run from an annotation queue at the specified index.
Args:
- queue_id (ID_TYPE): The ID of the annotation queue.
+ queue_id (Union[UUID, str]): The ID of the annotation queue.
index (int): The index of the run to retrieve.
Returns:
- ls_schemas.RunWithAnnotationQueueInfo: The run at the specified index.
+ RunWithAnnotationQueueInfo: The run at the specified index.
Raises:
- ls_utils.LangSmithNotFoundError: If the run is not found at the given index.
- ls_utils.LangSmithError: For other API-related errors.
+ LangSmithNotFoundError: If the run is not found at the given index.
+ LangSmithError: For other API-related errors.
"""
base_url = f"/annotation-queues/{_as_uuid(queue_id, 'queue_id')}/run"
response = self.request_with_retries(
@@ -5384,15 +5637,16 @@ def create_comparative_experiment(
These experiments compare 2 or more experiment results over a shared dataset.
Args:
- name: The name of the comparative experiment.
- experiments: The IDs of the experiments to compare.
- reference_dataset: The ID of the dataset these experiments are compared on.
- description: The description of the comparative experiment.
- created_at: The creation time of the comparative experiment.
- metadata: Additional metadata for the comparative experiment.
+ name (str): The name of the comparative experiment.
+ experiments (Sequence[Union[UUID, str]]): The IDs of the experiments to compare.
+ reference_dataset (Optional[Union[UUID, str]]): The ID of the dataset these experiments are compared on.
+ description (Optional[str]): The description of the comparative experiment.
+ created_at (Optional[datetime.datetime]): The creation time of the comparative experiment.
+ metadata (Optional[Dict[str, Any]]): Additional metadata for the comparative experiment.
+ id (Optional[Union[UUID, str]]): The ID of the comparative experiment.
Returns:
- The created comparative experiment object.
+ ComparativeExperiment: The created comparative experiment object.
"""
if not experiments:
raise ValueError("At least one experiment is required.")
@@ -5445,7 +5699,6 @@ async def arun_on_dataset(
.. deprecated:: 0.1.0
This method is deprecated. Use :func:`langsmith.aevaluate` instead.
-
""" # noqa: E501
warnings.warn(
"The `arun_on_dataset` method is deprecated and"
@@ -5494,7 +5747,6 @@ def run_on_dataset(
.. deprecated:: 0.1.0
This method is deprecated. Use :func:`langsmith.aevaluate` instead.
-
""" # noqa: E501 # noqa: E501
warnings.warn(
"The `run_on_dataset` method is deprecated and"
@@ -5554,8 +5806,8 @@ def _get_latest_commit_hash(
Args:
prompt_owner_and_name (str): The owner and name of the prompt.
- limit (int): The maximum number of commits to fetch. Defaults to 1.
- offset (int): The number of commits to skip. Defaults to 0.
+ limit (int, default=1): The maximum number of commits to fetch. Defaults to 1.
+ offset (int, default=0): The number of commits to skip. Defaults to 0.
Returns:
Optional[str]: The latest commit hash, or None if no commits are found.
@@ -5633,7 +5885,7 @@ def like_prompt(self, prompt_identifier: str) -> Dict[str, int]:
prompt_identifier (str): The identifier of the prompt.
Returns:
- A dictionary with the key 'likes' and the count of likes as the value.
+ Dict[str, int]: A dictionary with the key 'likes' and the count of likes as the value.
"""
return self._like_or_unlike_prompt(prompt_identifier, like=True)
@@ -5645,7 +5897,7 @@ def unlike_prompt(self, prompt_identifier: str) -> Dict[str, int]:
prompt_identifier (str): The identifier of the prompt.
Returns:
- A dictionary with the key 'likes' and the count of likes as the value.
+ Dict[str, int]: A dictionary with the key 'likes' and the count of likes as the value.
"""
return self._like_or_unlike_prompt(prompt_identifier, like=False)
@@ -5664,18 +5916,18 @@ def list_prompts(
"""List prompts with pagination.
Args:
- limit (int): The maximum number of prompts to return. Defaults to 100.
- offset (int): The number of prompts to skip. Defaults to 0.
+ limit (int, default=100): The maximum number of prompts to return. Defaults to 100.
+ offset (int, default=0): The number of prompts to skip. Defaults to 0.
is_public (Optional[bool]): Filter prompts by if they are public.
is_archived (Optional[bool]): Filter prompts by if they are archived.
- sort_field (ls_schemas.PromptsSortField): The field to sort by.
+ sort_field (PromptSortField): The field to sort by.
Defaults to "updated_at".
- sort_direction (Literal["desc", "asc"]): The order to sort by.
+ sort_direction (Literal["desc", "asc"], default="desc"): The order to sort by.
Defaults to "desc".
query (Optional[str]): Filter prompts by a search query.
Returns:
- ls_schemas.ListPromptsResponse: A response object containing
+ ListPromptsResponse: A response object containing
the list of prompts.
"""
params = {
@@ -5699,14 +5951,14 @@ def get_prompt(self, prompt_identifier: str) -> Optional[ls_schemas.Prompt]:
Args:
prompt_identifier (str): The identifier of the prompt.
- The identifier should be in the format "prompt_name" or "owner/prompt_name".
+ The identifier should be in the format "prompt_name" or "owner/prompt_name".
Returns:
- Optional[ls_schemas.Prompt]: The prompt object.
+ Optional[Prompt]: The prompt object.
Raises:
requests.exceptions.HTTPError: If the prompt is not found or
- another error occurs.
+ another error occurs.
"""
owner, prompt_name, _ = ls_utils.parse_prompt_identifier(prompt_identifier)
try:
@@ -5729,14 +5981,15 @@ def create_prompt(
Does not attach prompt object, just creates an empty prompt.
Args:
- prompt_name (str): The name of the prompt.
+ prompt_identifier (str): The identifier of the prompt.
+ The identifier should be in the formatof owner/name:hash, name:hash, owner/name, or name
description (Optional[str]): A description of the prompt.
readme (Optional[str]): A readme for the prompt.
tags (Optional[Sequence[str]]): A list of tags for the prompt.
is_public (bool): Whether the prompt should be public. Defaults to False.
Returns:
- ls_schemas.Prompt: The created prompt object.
+ Prompt: The created prompt object.
Raises:
ValueError: If the current tenant is not the owner.
@@ -5909,7 +6162,7 @@ def pull_prompt_commit(
prompt_identifier (str): The identifier of the prompt.
Returns:
- ls_schemas.PromptObject: The prompt object.
+ PromptCommit: The prompt object.
Raises:
ValueError: If no commits are found for the prompt.
@@ -5940,15 +6193,12 @@ def list_prompt_commits(
Args:
prompt_identifier (str): The identifier of the prompt in the format 'owner/repo_name'.
- limit (Optional[int], optional): The maximum number of commits to return. If None, returns all commits. Defaults to None.
- offset (int, optional): The number of commits to skip before starting to return results. Defaults to 0.
- include_model (bool, optional): Whether to include the model information in the commit data. Defaults to False.
-
- Returns:
- Iterator[ls_schemas.ListedPromptCommit]: An iterator of ListedPromptCommit objects representing the commits.
+ limit (Optional[int]): The maximum number of commits to return. If None, returns all commits. Defaults to None.
+ offset (int, default=0): The number of commits to skip before starting to return results. Defaults to 0.
+ include_model (bool, default=False): Whether to include the model information in the commit data. Defaults to False.
Yields:
- ls_schemas.ListedPromptCommit: A ListedPromptCommit object for each commit.
+ A ListedPromptCommit object for each commit.
Note:
This method uses pagination to retrieve commits. It will make multiple API calls if necessary to retrieve all commits
@@ -5996,6 +6246,7 @@ def pull_prompt(
Args:
prompt_identifier (str): The identifier of the prompt.
+ include_model (Optional[bool], default=False): Whether to include the model information in the prompt data.
Returns:
Any: The prompt object in the specified format.
@@ -6121,7 +6372,6 @@ def push_prompt(
Returns:
str: The URL of the prompt.
-
"""
# Create or update prompt metadata
if self._prompt_exists(prompt_identifier):
@@ -6221,41 +6471,38 @@ def evaluate(
r"""Evaluate a target system on a given dataset.
Args:
- target (TARGET_T | Runnable | EXPERIMENT_T | Tuple[EXPERIMENT_T, EXPERIMENT_T]):
+ target (Union[TARGET_T, Runnable, EXPERIMENT_T, Tuple[EXPERIMENT_T, EXPERIMENT_T]]):
The target system or experiment(s) to evaluate. Can be a function
that takes a dict and returns a dict, a langchain Runnable, an
existing experiment ID, or a two-tuple of experiment IDs.
data (DATA_T): The dataset to evaluate on. Can be a dataset name, a list of
examples, or a generator of examples.
- evaluators (Sequence[EVALUATOR_T] | Sequence[COMPARATIVE_EVALUATOR_T] | None):
+ evaluators (Optional[Union[Sequence[EVALUATOR_T], Sequence[COMPARATIVE_EVALUATOR_T]]]):
A list of evaluators to run on each example. The evaluator signature
depends on the target type. Default to None.
- summary_evaluators (Sequence[SUMMARY_EVALUATOR_T] | None): A list of summary
+ summary_evaluators (Optional[Sequence[SUMMARY_EVALUATOR_T]]): A list of summary
evaluators to run on the entire dataset. Should not be specified if
comparing two existing experiments. Defaults to None.
- metadata (dict | None): Metadata to attach to the experiment.
+ metadata (Optional[dict]): Metadata to attach to the experiment.
Defaults to None.
- experiment_prefix (str | None): A prefix to provide for your experiment name.
+ experiment_prefix (Optional[str]): A prefix to provide for your experiment name.
Defaults to None.
- description (str | None): A free-form text description for the experiment.
- max_concurrency (int | None): The maximum number of concurrent
+ description (Optional[str]): A free-form text description for the experiment.
+ max_concurrency (Optional[int], default=0): The maximum number of concurrent
evaluations to run. If None then no limit is set. If 0 then no concurrency.
Defaults to 0.
- blocking (bool): Whether to block until the evaluation is complete.
+ blocking (bool, default=True): Whether to block until the evaluation is complete.
Defaults to True.
- num_repetitions (int): The number of times to run the evaluation.
+ num_repetitions (int, default=1): The number of times to run the evaluation.
Each item in the dataset will be run and evaluated this many times.
Defaults to 1.
- experiment (schemas.TracerSession | None): An existing experiment to
+ experiment (Optional[EXPERIMENT_T]): An existing experiment to
extend. If provided, experiment_prefix is ignored. For advanced
usage only. Should not be specified if target is an existing experiment or
two-tuple fo experiments.
- load_nested (bool): Whether to load all child runs for the experiment.
- Default is to only load the top-level root runs. Should only be specified
- when target is an existing experiment or two-tuple of experiments.
- randomize_order (bool): Whether to randomize the order of the outputs for each
- evaluation. Default is False. Should only be specified when target is a
- two-tuple of existing experiments.
+ upload_results (bool, default=True): Whether to upload the results to LangSmith.
+ Defaults to True.
+ **kwargs (Any): Additional keyword arguments to pass to the evaluator.
Returns:
ExperimentResults: If target is a function, Runnable, or existing experiment.
@@ -6264,124 +6511,157 @@ def evaluate(
Examples:
Prepare the dataset:
- >>> from langsmith import Client
- >>> client = Client()
- >>> dataset = client.clone_public_dataset(
- ... "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d"
- ... )
- >>> dataset_name = "Evaluate Examples"
+ .. code-block:: python
+
+ from langsmith import Client
+ client = Client()
+ dataset = client.clone_public_dataset(
+ "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d"
+ )
+ dataset_name = "Evaluate Examples"
Basic usage:
- >>> def accuracy(outputs: dict, reference_outputs: dict) -> dict:
- ... # Row-level evaluator for accuracy.
- ... pred = outputs["response"]
- ... expected = reference_outputs["answer"]
- ... return {"score": expected.lower() == pred.lower()}
-
- >>> def precision(outputs: list[dict], reference_outputs: list[dict]) -> dict:
- ... # Experiment-level evaluator for precision.
- ... # TP / (TP + FP)
- ... predictions = [out["response"].lower() for out in outputs]
- ... expected = [ref["answer"].lower() for ref in reference_outputs]
- ... # yes and no are the only possible answers
- ... tp = sum([p == e for p, e in zip(predictions, expected) if p == "yes"])
- ... fp = sum([p == "yes" and e == "no" for p, e in zip(predictions, expected)])
- ... return {"score": tp / (tp + fp)}
- >>> def predict(inputs: dict) -> dict:
- ... # This can be any function or just an API call to your app.
- ... return {"response": "Yes"}
- >>> results = client.evaluate(
- ... predict,
- ... data=dataset_name,
- ... evaluators=[accuracy],
- ... summary_evaluators=[precision],
- ... experiment_prefix="My Experiment",
- ... description="Evaluating the accuracy of a simple prediction model.",
- ... metadata={
- ... "my-prompt-version": "abcd-1234",
- ... },
- ... ) # doctest: +ELLIPSIS
- View the evaluation results for experiment:...
+ .. code-block:: python
+
+ def accuracy(outputs: dict, reference_outputs: dict) -> dict:
+ # Row-level evaluator for accuracy.
+ pred = outputs["response"]
+ expected = reference_outputs["answer"]
+ return {"score": expected.lower() == pred.lower()}
+
+ .. code-block:: python
+
+ def precision(outputs: list[dict], reference_outputs: list[dict]) -> dict:
+ # Experiment-level evaluator for precision.
+ # TP / (TP + FP)
+ predictions = [out["response"].lower() for out in outputs]
+ expected = [ref["answer"].lower() for ref in reference_outputs]
+ # yes and no are the only possible answers
+ tp = sum([p == e for p, e in zip(predictions, expected) if p == "yes"])
+ fp = sum([p == "yes" and e == "no" for p, e in zip(predictions, expected)])
+ return {"score": tp / (tp + fp)}
+ def predict(inputs: dict) -> dict:
+ # This can be any function or just an API call to your app.
+ return {"response": "Yes"}
+ results = client.evaluate(
+ predict,
+ data=dataset_name,
+ evaluators=[accuracy],
+ summary_evaluators=[precision],
+ experiment_prefix="My Experiment",
+ description="Evaluating the accuracy of a simple prediction model.",
+ metadata={
+ "my-prompt-version": "abcd-1234",
+ },
+ )
Evaluating over only a subset of the examples
- >>> experiment_name = results.experiment_name
- >>> examples = client.list_examples(dataset_name=dataset_name, limit=5)
- >>> results = client.evaluate(
- ... predict,
- ... data=examples,
- ... evaluators=[accuracy],
- ... summary_evaluators=[precision],
- ... experiment_prefix="My Experiment",
- ... description="Just testing a subset synchronously.",
- ... ) # doctest: +ELLIPSIS
- View the evaluation results for experiment:...
+ .. code-block:: python
+
+ experiment_name = results.experiment_name
+ examples = client.list_examples(dataset_name=dataset_name, limit=5)
+ results = client.evaluate(
+ predict,
+ data=examples,
+ evaluators=[accuracy],
+ summary_evaluators=[precision],
+ experiment_prefix="My Experiment",
+ description="Just testing a subset synchronously.",
+ )
Streaming each prediction to more easily + eagerly debug.
- >>> results = client.evaluate(
- ... predict,
- ... data=dataset_name,
- ... evaluators=[accuracy],
- ... summary_evaluators=[precision],
- ... description="I don't even have to block!",
- ... blocking=False,
- ... ) # doctest: +ELLIPSIS
- View the evaluation results for experiment:...
- >>> for i, result in enumerate(results): # doctest: +ELLIPSIS
- ... pass
+ .. code-block:: python
+
+ results = client.evaluate(
+ predict,
+ data=dataset_name,
+ evaluators=[accuracy],
+ summary_evaluators=[precision],
+ description="I don't even have to block!",
+ blocking=False,
+ )
+ for i, result in enumerate(results): # doctest: +ELLIPSIS
+ pass
Using the `evaluate` API with an off-the-shelf LangChain evaluator:
- >>> from langsmith.evaluation import LangChainStringEvaluator
- >>> from langchain.chat_models import init_chat_model
- >>> def prepare_criteria_data(run: Run, example: Example):
- ... return {
- ... "prediction": run.outputs["output"],
- ... "reference": example.outputs["answer"],
- ... "input": str(example.inputs),
- ... }
- >>> results = client.evaluate(
- ... predict,
- ... data=dataset_name,
- ... evaluators=[
- ... accuracy,
- ... LangChainStringEvaluator("embedding_distance"),
- ... LangChainStringEvaluator(
- ... "labeled_criteria",
- ... config={
- ... "criteria": {
- ... "usefulness": "The prediction is useful if it is correct"
- ... " and/or asks a useful followup question."
- ... },
- ... "llm": init_chat_model("gpt-4o"),
- ... },
- ... prepare_data=prepare_criteria_data,
- ... ),
- ... ],
- ... description="Evaluating with off-the-shelf LangChain evaluators.",
- ... summary_evaluators=[precision],
- ... ) # doctest: +ELLIPSIS
- View the evaluation results for experiment:...
+ .. code-block:: python
+
+ from langsmith.evaluation import LangChainStringEvaluator
+ from langchain.chat_models import init_chat_model
+ def prepare_criteria_data(run: Run, example: Example):
+ return {
+ "prediction": run.outputs["output"],
+ "reference": example.outputs["answer"],
+ "input": str(example.inputs),
+ }
+ results = client.evaluate(
+ predict,
+ data=dataset_name,
+ evaluators=[
+ accuracy,
+ LangChainStringEvaluator("embedding_distance"),
+ LangChainStringEvaluator(
+ "labeled_criteria",
+ config={
+ "criteria": {
+ "usefulness": "The prediction is useful if it is correct"
+ " and/or asks a useful followup question."
+ },
+ "llm": init_chat_model("gpt-4o"),
+ },
+ prepare_data=prepare_criteria_data,
+ ),
+ ],
+ description="Evaluating with off-the-shelf LangChain evaluators.",
+ summary_evaluators=[precision],
+ )
+ View the evaluation results for experiment:...
Evaluating a LangChain object:
- >>> from langchain_core.runnables import chain as as_runnable
- >>> @as_runnable
- ... def nested_predict(inputs):
- ... return {"response": "Yes"}
- >>> @as_runnable
- ... def lc_predict(inputs):
- ... return nested_predict.invoke(inputs)
- >>> results = client.evaluate(
- ... lc_predict,
- ... data=dataset_name,
- ... evaluators=[accuracy],
- ... description="This time we're evaluating a LangChain object.",
- ... summary_evaluators=[precision],
- ... ) # doctest: +ELLIPSIS
- View the evaluation results for experiment:...
+ .. code-block:: python
+
+ from langchain_core.runnables import chain as as_runnable
+ @as_runnable
+ def nested_predict(inputs):
+ return {"response": "Yes"}
+ @as_runnable
+ def lc_predict(inputs):
+ return nested_predict.invoke(inputs)
+ results = client.evaluate(
+ lc_predict,
+ data=dataset_name,
+ evaluators=[accuracy],
+ description="This time we're evaluating a LangChain object.",
+ summary_evaluators=[precision],
+ )
+
+ Comparative evaluation:
+
+ .. code-block:: python
+
+ results = client.evaluate(
+ # The target is a tuple of the experiment IDs to compare
+ target=("12345678-1234-1234-1234-123456789012", "98765432-1234-1234-1234-123456789012"),
+ evaluators=[accuracy],
+ summary_evaluators=[precision],
+ )
+
+ Evaluate an existing experiment:
+
+ .. code-block:: python
+
+ results = client.evaluate(
+ # The target is the ID of the experiment we are evaluating
+ target="12345678-1234-1234-1234-123456789012",
+ evaluators=[accuracy],
+ summary_evaluators=[precision],
+ )
+
.. versionadded:: 0.2.0
""" # noqa: E501
@@ -6435,11 +6715,11 @@ async def aevaluate(
r"""Evaluate an async target system on a given dataset.
Args:
- target (AsyncCallable[[dict], dict] | AsyncIterable[dict] | Runnable | EXPERIMENT_T | Tuple[EXPERIMENT_T, EXPERIMENT_T]):
+ target (Union[ATARGET_T, AsyncIterable[dict], Runnable, str, uuid.UUID, TracerSession]):
The target system or experiment(s) to evaluate. Can be an async function
that takes a dict and returns a dict, a langchain Runnable, an
existing experiment ID, or a two-tuple of experiment IDs.
- data (Union[DATA_T, AsyncIterable[schemas.Example]]): The dataset to evaluate on. Can be a dataset name, a list of
+ data (Union[DATA_T, AsyncIterable[Example]]): The dataset to evaluate on. Can be a dataset name, a list of
examples, an async generator of examples, or an async iterable of examples.
evaluators (Optional[Sequence[EVALUATOR_T]]): A list of evaluators to run
on each example. Defaults to None.
@@ -6450,20 +6730,20 @@ async def aevaluate(
experiment_prefix (Optional[str]): A prefix to provide for your experiment name.
Defaults to None.
description (Optional[str]): A description of the experiment.
- max_concurrency (int | None): The maximum number of concurrent
+ max_concurrency (Optional[int], default=0): The maximum number of concurrent
evaluations to run. If None then no limit is set. If 0 then no concurrency.
Defaults to 0.
- num_repetitions (int): The number of times to run the evaluation.
+ num_repetitions (int, default=1): The number of times to run the evaluation.
Each item in the dataset will be run and evaluated this many times.
Defaults to 1.
- blocking (bool): Whether to block until the evaluation is complete.
+ blocking (bool, default=True): Whether to block until the evaluation is complete.
Defaults to True.
- experiment (Optional[schemas.TracerSession]): An existing experiment to
+ experiment (Optional[TracerSession]): An existing experiment to
extend. If provided, experiment_prefix is ignored. For advanced
usage only.
- load_nested: Whether to load all child runs for the experiment.
- Default is to only load the top-level root runs. Should only be specified
- when evaluating an existing experiment.
+ upload_results (bool, default=True): Whether to upload the results to LangSmith.
+ Defaults to True.
+ **kwargs (Any): Additional keyword arguments to pass to the evaluator.
Returns:
AsyncIterator[ExperimentResultRow]: An async iterator over the experiment results.
@@ -6475,122 +6755,144 @@ async def aevaluate(
Requires the 'langsmith[vcr]' package to be installed.
Examples:
- >>> import asyncio
- >>> from langsmith import Client
- >>> client = Client()
- >>> dataset = client.clone_public_dataset(
- ... "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d"
- ... )
- >>> dataset_name = "Evaluate Examples"
+ Prepare the dataset:
+
+ .. code-block:: python
+
+ import asyncio
+ from langsmith import Client
+ client = Client()
+ dataset = client.clone_public_dataset(
+ "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d"
+ )
+ dataset_name = "Evaluate Examples"
Basic usage:
- >>> def accuracy(outputs: dict, reference_outputs: dict) -> dict:
- ... # Row-level evaluator for accuracy.
- ... pred = outputs["resposen"]
- ... expected = reference_outputs["answer"]
- ... return {"score": expected.lower() == pred.lower()}
-
- >>> def precision(outputs: list[dict], reference_outputs: list[dict]) -> dict:
- ... # Experiment-level evaluator for precision.
- ... # TP / (TP + FP)
- ... predictions = [out["response"].lower() for out in outputs]
- ... expected = [ref["answer"].lower() for ref in reference_outputs]
- ... # yes and no are the only possible answers
- ... tp = sum([p == e for p, e in zip(predictions, expected) if p == "yes"])
- ... fp = sum([p == "yes" and e == "no" for p, e in zip(predictions, expected)])
- ... return {"score": tp / (tp + fp)}
-
- >>> async def apredict(inputs: dict) -> dict:
- ... # This can be any async function or just an API call to your app.
- ... await asyncio.sleep(0.1)
- ... return {"response": "Yes"}
- >>> results = asyncio.run(
- ... client.aevaluate(
- ... apredict,
- ... data=dataset_name,
- ... evaluators=[accuracy],
- ... summary_evaluators=[precision],
- ... experiment_prefix="My Experiment",
- ... description="Evaluate the accuracy of the model asynchronously.",
- ... metadata={
- ... "my-prompt-version": "abcd-1234",
- ... },
- ... )
- ... ) # doctest: +ELLIPSIS
- View the evaluation results for experiment:...
+ .. code-block:: python
+
+ def accuracy(outputs: dict, reference_outputs: dict) -> dict:
+ # Row-level evaluator for accuracy.
+ pred = outputs["resposen"]
+ expected = reference_outputs["answer"]
+ return {"score": expected.lower() == pred.lower()}
+
+ def precision(outputs: list[dict], reference_outputs: list[dict]) -> dict:
+ # Experiment-level evaluator for precision.
+ # TP / (TP + FP)
+ predictions = [out["response"].lower() for out in outputs]
+ expected = [ref["answer"].lower() for ref in reference_outputs]
+ # yes and no are the only possible answers
+ tp = sum([p == e for p, e in zip(predictions, expected) if p == "yes"])
+ fp = sum([p == "yes" and e == "no" for p, e in zip(predictions, expected)])
+ return {"score": tp / (tp + fp)}
+
+ async def apredict(inputs: dict) -> dict:
+ # This can be any async function or just an API call to your app.
+ await asyncio.sleep(0.1)
+ return {"response": "Yes"}
+ results = asyncio.run(
+ client.aevaluate(
+ apredict,
+ data=dataset_name,
+ evaluators=[accuracy],
+ summary_evaluators=[precision],
+ experiment_prefix="My Experiment",
+ description="Evaluate the accuracy of the model asynchronously.",
+ metadata={
+ "my-prompt-version": "abcd-1234",
+ },
+ )
+ )
Evaluating over only a subset of the examples using an async generator:
- >>> async def example_generator():
- ... examples = client.list_examples(dataset_name=dataset_name, limit=5)
- ... for example in examples:
- ... yield example
- >>> results = asyncio.run(
- ... client.aevaluate(
- ... apredict,
- ... data=example_generator(),
- ... evaluators=[accuracy],
- ... summary_evaluators=[precision],
- ... experiment_prefix="My Subset Experiment",
- ... description="Evaluate a subset of examples asynchronously.",
- ... )
- ... ) # doctest: +ELLIPSIS
- View the evaluation results for experiment:...
+ .. code-block:: python
+
+ async def example_generator():
+ examples = client.list_examples(dataset_name=dataset_name, limit=5)
+ for example in examples:
+ yield example
+ results = asyncio.run(
+ client.aevaluate(
+ apredict,
+ data=example_generator(),
+ evaluators=[accuracy],
+ summary_evaluators=[precision],
+ experiment_prefix="My Subset Experiment",
+ description="Evaluate a subset of examples asynchronously.",
+ )
+ )
Streaming each prediction to more easily + eagerly debug.
- >>> results = asyncio.run(
- ... client.aevaluate(
- ... apredict,
- ... data=dataset_name,
- ... evaluators=[accuracy],
- ... summary_evaluators=[precision],
- ... experiment_prefix="My Streaming Experiment",
- ... description="Streaming predictions for debugging.",
- ... blocking=False,
- ... )
- ... ) # doctest: +ELLIPSIS
- View the evaluation results for experiment:...
+ .. code-block:: python
+
+ results = asyncio.run(
+ client.aevaluate(
+ apredict,
+ data=dataset_name,
+ evaluators=[accuracy],
+ summary_evaluators=[precision],
+ experiment_prefix="My Streaming Experiment",
+ description="Streaming predictions for debugging.",
+ blocking=False,
+ )
+ )
- >>> async def aenumerate(iterable):
- ... async for elem in iterable:
- ... print(elem)
- >>> asyncio.run(aenumerate(results))
+ async def aenumerate(iterable):
+ async for elem in iterable:
+ print(elem)
+ asyncio.run(aenumerate(results))
Running without concurrency:
- >>> results = asyncio.run(
- ... client.aevaluate(
- ... apredict,
- ... data=dataset_name,
- ... evaluators=[accuracy],
- ... summary_evaluators=[precision],
- ... experiment_prefix="My Experiment Without Concurrency",
- ... description="This was run without concurrency.",
- ... max_concurrency=0,
- ... )
- ... ) # doctest: +ELLIPSIS
- View the evaluation results for experiment:...
+ .. code-block:: python
+
+ results = asyncio.run(
+ client.aevaluate(
+ apredict,
+ data=dataset_name,
+ evaluators=[accuracy],
+ summary_evaluators=[precision],
+ experiment_prefix="My Experiment Without Concurrency",
+ description="This was run without concurrency.",
+ max_concurrency=0,
+ )
+ )
Using Async evaluators:
- >>> async def helpfulness(outputs: dict) -> dict:
- ... # Row-level evaluator for helpfulness.
- ... await asyncio.sleep(5) # Replace with your LLM API call
- ... return {"score": outputs["output"] == "Yes"}
-
- >>> results = asyncio.run(
- ... client.aevaluate(
- ... apredict,
- ... data=dataset_name,
- ... evaluators=[helpfulness],
- ... summary_evaluators=[precision],
- ... experiment_prefix="My Helpful Experiment",
- ... description="Applying async evaluators example.",
- ... )
- ... ) # doctest: +ELLIPSIS
- View the evaluation results for experiment:...
+ .. code-block:: python
+
+ async def helpfulness(outputs: dict) -> dict:
+ # Row-level evaluator for helpfulness.
+ await asyncio.sleep(5) # Replace with your LLM API call
+ return {"score": outputs["output"] == "Yes"}
+
+ results = asyncio.run(
+ client.aevaluate(
+ apredict,
+ data=dataset_name,
+ evaluators=[helpfulness],
+ summary_evaluators=[precision],
+ experiment_prefix="My Helpful Experiment",
+ description="Applying async evaluators example.",
+ )
+ )
+
+ Evaluate an existing experiment:
+
+ .. code-block:: python
+
+ results = asyncio.run(
+ client.aevaluate(
+ # The target is the ID of the experiment we are evaluating
+ target="419dcab2-1d66-4b94-8901-0357ead390df",
+ evaluators=[accuracy, helpfulness],
+ summary_evaluators=[precision],
+ )
+ )
.. versionadded:: 0.2.0