From 5f1b4ef6ea21bd344d9676ee2fc67c130d91b71f Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Thu, 22 Aug 2024 12:35:10 -0700 Subject: [PATCH] python[patch]: Return cloned dataset (#930) Co-authored-by: William FH <13333726+hinthornw@users.noreply.github.com> --- python/langsmith/client.py | 15 ++++++++++++--- python/langsmith/evaluation/_arunner.py | 2 +- python/langsmith/evaluation/_runner.py | 4 ++-- python/tests/evaluation/test_evaluation.py | 4 ++-- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 9c3852666..82fff681c 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -2944,7 +2944,7 @@ def clone_public_dataset( *, source_api_url: Optional[str] = None, dataset_name: Optional[str] = None, - ) -> None: + ) -> ls_schemas.Dataset: """Clone a public dataset to your own langsmith tenant. This operation is idempotent. If you already have a dataset with the given name, @@ -2957,6 +2957,10 @@ def clone_public_dataset( dataset_name (str): The name of the dataset to create in your tenant. Defaults to the name of the public dataset. + Returns: + ------- + Dataset + The created dataset. """ source_api_url = source_api_url or self.api_url source_api_url, token_uuid = _parse_token_or_url(token_or_url, source_api_url) @@ -2969,11 +2973,15 @@ def clone_public_dataset( ) ds = source_client.read_shared_dataset(token_uuid) dataset_name = dataset_name or ds.name - if self.has_dataset(dataset_name=dataset_name): + try: + ds = self.read_dataset(dataset_name=dataset_name) logger.info( f"Dataset {dataset_name} already exists in your tenant. Skipping." ) - return + return ds + except ls_utils.LangSmithNotFoundError: + pass + try: # Fetch examples first examples = list(source_client.list_shared_examples(token_uuid)) @@ -3001,6 +3009,7 @@ def clone_public_dataset( raise e finally: del source_client + return dataset def _get_data_type(self, dataset_id: ID_TYPE) -> ls_schemas.DataType: dataset = self.read_dataset(dataset_id=dataset_id) diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py index 7cc50bffa..e155d3599 100644 --- a/python/langsmith/evaluation/_arunner.py +++ b/python/langsmith/evaluation/_arunner.py @@ -105,7 +105,7 @@ async def aevaluate( >>> from langsmith.evaluation import evaluate >>> from langsmith.schemas import Example, Run >>> client = Client() - >>> client.clone_public_dataset( + >>> dataset = client.clone_public_dataset( ... "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d" ... ) >>> dataset_name = "Evaluate Examples" diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index d1870e989..000d516ed 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -130,7 +130,7 @@ def evaluate( >>> from langsmith.evaluation import evaluate >>> from langsmith.schemas import Example, Run >>> client = Client() - >>> client.clone_public_dataset( + >>> dataset = client.clone_public_dataset( ... "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d" ... ) >>> dataset_name = "Evaluate Examples" @@ -480,7 +480,7 @@ def evaluate_comparative( >>> from langsmith.evaluation import evaluate >>> from langsmith.schemas import Example, Run >>> client = Client() - >>> client.clone_public_dataset( + >>> dataset = client.clone_public_dataset( ... "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d" ... ) >>> dataset_name = "Evaluate Examples" diff --git a/python/tests/evaluation/test_evaluation.py b/python/tests/evaluation/test_evaluation.py index e05f9e920..c654a2b58 100644 --- a/python/tests/evaluation/test_evaluation.py +++ b/python/tests/evaluation/test_evaluation.py @@ -9,7 +9,7 @@ def test_evaluate(): client = Client() - client.clone_public_dataset( + _ = client.clone_public_dataset( "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d" ) dataset_name = "Evaluate Examples" @@ -49,7 +49,7 @@ def predict(inputs: dict) -> dict: async def test_aevaluate(): client = Client() - client.clone_public_dataset( + _ = client.clone_public_dataset( "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d" ) dataset_name = "Evaluate Examples"