From 5f1b4ef6ea21bd344d9676ee2fc67c130d91b71f Mon Sep 17 00:00:00 2001
From: Bagatur <22008038+baskaryan@users.noreply.github.com>
Date: Thu, 22 Aug 2024 12:35:10 -0700
Subject: [PATCH] python[patch]: Return cloned dataset (#930)

Co-authored-by: William FH <13333726+hinthornw@users.noreply.github.com>
---
 python/langsmith/client.py                 | 15 ++++++++++++---
 python/langsmith/evaluation/_arunner.py    |  2 +-
 python/langsmith/evaluation/_runner.py     |  4 ++--
 python/tests/evaluation/test_evaluation.py |  4 ++--
 4 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index 9c3852666..82fff681c 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -2944,7 +2944,7 @@ def clone_public_dataset(
         *,
         source_api_url: Optional[str] = None,
         dataset_name: Optional[str] = None,
-    ) -> None:
+    ) -> ls_schemas.Dataset:
         """Clone a public dataset to your own langsmith tenant.
 
         This operation is idempotent. If you already have a dataset with the given name,
@@ -2957,6 +2957,10 @@ def clone_public_dataset(
             dataset_name (str): The name of the dataset to create in your tenant.
                 Defaults to the name of the public dataset.
 
+        Returns:
+        -------
+        Dataset
+            The created dataset.
         """
         source_api_url = source_api_url or self.api_url
         source_api_url, token_uuid = _parse_token_or_url(token_or_url, source_api_url)
@@ -2969,11 +2973,15 @@ def clone_public_dataset(
         )
         ds = source_client.read_shared_dataset(token_uuid)
         dataset_name = dataset_name or ds.name
-        if self.has_dataset(dataset_name=dataset_name):
+        try:
+            ds = self.read_dataset(dataset_name=dataset_name)
             logger.info(
                 f"Dataset {dataset_name} already exists in your tenant. Skipping."
             )
-            return
+            return ds
+        except ls_utils.LangSmithNotFoundError:
+            pass
+
         try:
             # Fetch examples first
             examples = list(source_client.list_shared_examples(token_uuid))
@@ -3001,6 +3009,7 @@ def clone_public_dataset(
                 raise e
         finally:
             del source_client
+        return dataset
 
     def _get_data_type(self, dataset_id: ID_TYPE) -> ls_schemas.DataType:
         dataset = self.read_dataset(dataset_id=dataset_id)
diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py
index 7cc50bffa..e155d3599 100644
--- a/python/langsmith/evaluation/_arunner.py
+++ b/python/langsmith/evaluation/_arunner.py
@@ -105,7 +105,7 @@ async def aevaluate(
         >>> from langsmith.evaluation import evaluate
         >>> from langsmith.schemas import Example, Run
         >>> client = Client()
-        >>> client.clone_public_dataset(
+        >>> dataset = client.clone_public_dataset(
         ...     "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d"
         ... )
         >>> dataset_name = "Evaluate Examples"
diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index d1870e989..000d516ed 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -130,7 +130,7 @@ def evaluate(
         >>> from langsmith.evaluation import evaluate
         >>> from langsmith.schemas import Example, Run
         >>> client = Client()
-        >>> client.clone_public_dataset(
+        >>> dataset = client.clone_public_dataset(
         ...     "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d"
         ... )
         >>> dataset_name = "Evaluate Examples"
@@ -480,7 +480,7 @@ def evaluate_comparative(
         >>> from langsmith.evaluation import evaluate
         >>> from langsmith.schemas import Example, Run
         >>> client = Client()
-        >>> client.clone_public_dataset(
+        >>> dataset = client.clone_public_dataset(
         ...     "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d"
         ... )
         >>> dataset_name = "Evaluate Examples"
diff --git a/python/tests/evaluation/test_evaluation.py b/python/tests/evaluation/test_evaluation.py
index e05f9e920..c654a2b58 100644
--- a/python/tests/evaluation/test_evaluation.py
+++ b/python/tests/evaluation/test_evaluation.py
@@ -9,7 +9,7 @@
 
 def test_evaluate():
     client = Client()
-    client.clone_public_dataset(
+    _ = client.clone_public_dataset(
         "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d"
     )
     dataset_name = "Evaluate Examples"
@@ -49,7 +49,7 @@ def predict(inputs: dict) -> dict:
 
 async def test_aevaluate():
     client = Client()
-    client.clone_public_dataset(
+    _ = client.clone_public_dataset(
         "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d"
     )
     dataset_name = "Evaluate Examples"