gs

langchain-ai · Apr 11, 2024 · d5ecef0 · d5ecef0
1 parent 75e4c05
commit d5ecef0
Show file tree

Hide file tree

Showing 5 changed files with 312 additions and 41 deletions.
diff --git a/python/langsmith/_expect.py b/python/langsmith/_expect.py
@@ -24,7 +24,7 @@ def test_output_semantically_close():
         ).to_be_less_than(0.5)
 
         # Score the test case
-        matcher = expect.string_distance(
+        matcher = expect.edit_distance(
             prediction=response.choices[0].message.content,
             reference="Hello!",
         )
@@ -36,29 +36,15 @@ def test_output_semantically_close():
 
 import atexit
 import concurrent.futures
-from typing import Any, Callable, Optional, Protocol
+from typing import TYPE_CHECKING, Any, Optional
 
 from langsmith import client as ls_client
 from langsmith import run_helpers as rh
 from langsmith import utils as ls_utils
 
-
-def _import_load_evaluators() -> Callable[..., _StringEvaluator]:
-    try:
-        from langchain.evaluation import load_evaluator  # noqa: F811
-    except ImportError:
-        raise ImportError("Please install langchain to use this feature")
-
-    return load_evaluator  # type: ignore
-
-
-class _StringEvaluator(Protocol):
-    def evaluate_strings(
-        self,
-        prediction: str,
-        reference: Optional[str] = None,
-        inputs: Optional[str] = None,
-    ) -> dict: ...
+if TYPE_CHECKING:
+    from langsmith._internal._edit_distance import EditDistanceConfig
+    from langsmith._internal._embedding_distance import EmbeddingConfig
 
 
 class _Matcher:
@@ -191,34 +177,56 @@ def __init__(self, client: Optional[ls_client.Client] = None):
         atexit.register(self.executor.shutdown, wait=True)
 
     def embedding_distance(
-        self, prediction: str, reference: str, *, config: Optional[dict] = None
+        self,
+        prediction: str,
+        reference: str,
+        *,
+        config: Optional[EmbeddingConfig] = None,
     ) -> _Matcher:
         """Compute the embedding distance between the prediction and reference.
 
         This logs the embedding distance to LangSmith and returns a `_Matcher` instance
         for making assertions on the distance value.
 
-        This depends on the `langchain` package and whichever embedding
-        model is configured. (OpenAI by default)
+        By default, this uses the OpenAI API for computing embeddings.
 
         Args:
             prediction: The predicted string to compare.
             reference: The reference string to compare against.
             config: Optional configuration for the embedding distance evaluator.
-                Use this to configure the embedding model, distance metrics, etc.
+                Supported options:
+                - `encoder`: A custom encoder function to encode the list of input
+                     strings to embeddings. Defaults to the OpenAI API.
+                - `metric`: The distance metric to use for comparison.
+                    Supported values: "cosine", "euclidean", "manhattan",
+                    "chebyshev", "hamming".
 
         Returns:
             A `_Matcher` instance for the embedding distance value.
         """
-        evaluator = self._load_evaluator(config)
-        results = evaluator.evaluate_strings(prediction=prediction, reference=reference)
-        self._submit_feedback("embedding_distance", results)
+        from langsmith._internal._embedding_distance import EmbeddingDistance
+
+        config = config or {}
+        encoder_func = "custom" if config.get("encoder") else "openai"
+        evaluator = EmbeddingDistance(config=config)
+        score = evaluator.evaluate(prediction=prediction, reference=reference)
+        self._submit_feedback(
+            "embedding_distance",
+            {
+                "score": score,
+                "source_info": {"encoder": encoder_func, "metric": evaluator.distance},
+            },
+        )
         return _Matcher(
-            self.client, "embedding_distance", results["score"], _executor=self.executor
+            self.client, "embedding_distance", score, _executor=self.executor
         )
 
-    def string_distance(
-        self, prediction: str, reference: str, *, config: Optional[dict] = None
+    def edit_distance(
+        self,
+        prediction: str,
+        reference: str,
+        *,
+        config: Optional[EditDistanceConfig] = None,
     ) -> _Matcher:
         """Compute the string distance between the prediction and reference.
 
@@ -231,18 +239,30 @@ def string_distance(
             prediction: The predicted string to compare.
             reference: The reference string to compare against.
             config: Optional configuration for the string distance evaluator.
-                Use this to configure the distance metric, normalization, etc.
+                Supported options:
+                - `metric`: The distance metric to use for comparison.
+                    Supported values: "damerau_levenshtein", "levenshtein",
+                    "jaro", "jaro_winkler", "hamming", "indel".
+                - `normalize_score`: Whether to normalize the score between 0 and 1.
 
         Returns:
             A `_Matcher` instance for the string distance value.
         """
-        evaluator = self._load_evaluator(config)
-        results = evaluator.evaluate_strings(prediction=prediction, reference=reference)
-        self._submit_feedback("string_distance", results)
+        from langsmith._internal._edit_distance import EditDistance
+
+        config = config or {}
+        metric = config.get("metric") or "damerau_levenshtein"
+        normalize = config.get("normalize_score", True)
+        evaluator = EditDistance(config=config)
+        score = evaluator.evaluate(prediction=prediction, reference=reference)
+        self._submit_feedback(
+            "edit_distance",
+            {"score": score, "source_info": {"metric": metric, "normalize": normalize}},
+        )
         return _Matcher(
             self.client,
-            "string_distance",
-            results["score"],
+            "edit_distance",
+            score,
             _executor=self.executor,
         )
 
@@ -261,9 +281,5 @@ def _submit_feedback(self, key: str, results: dict):
                 self.client.create_feedback, run_id=run_id, key=key, **results
             )
 
-    def _load_evaluator(self, config: Optional[dict] = None) -> _StringEvaluator:
-        load_evaluator = _import_load_evaluators()
-        return load_evaluator("embedding_distance", **(config or {}))
-
 
 expect = _Expect()
diff --git a/python/langsmith/_internal/_edit_distance.py b/python/langsmith/_internal/_edit_distance.py
@@ -0,0 +1,63 @@
+from typing import Any, Callable, Dict, Literal, Optional
+
+from typing_extensions import TypedDict
+
+METRICS = Literal[
+    "damerau_levenshtein",
+    "levenshtein",
+    "jaro",
+    "jaro_winkler",
+    "hamming",
+    "indel",
+]
+
+
+class EditDistanceConfig(TypedDict, total=False):
+    metric: METRICS
+    normalize_score: bool
+
+
+class EditDistance:
+    def __init__(
+        self,
+        config: Optional[EditDistanceConfig] = None,
+    ):
+        config = config or {}
+        metric = config.get("metric") or "damerau_levenshtein"
+        self.metric = self._get_metric(metric, config.get("normalize_score", True))
+
+    def evaluate(
+        self,
+        prediction: str,
+        reference: Optional[str] = None,
+    ) -> float:
+        return self.metric(prediction, reference)
+
+    @staticmethod
+    def _get_metric(distance: str, normalize_score: bool = False) -> Callable:
+        try:
+            from rapidfuzz import distance as rf_distance
+        except ImportError:
+            raise ImportError(
+                "This operation requires the rapidfuzz library to use."
+                "Please install it with `pip install -U rapidfuzz`."
+            )
+
+        module_map: Dict[str, Any] = {
+            "damerau_levenshtein": rf_distance.DamerauLevenshtein,
+            "levenshtein": rf_distance.Levenshtein,
+            "jaro": rf_distance.Jaro,
+            "jaro_winkler": rf_distance.JaroWinkler,
+            "hamming": rf_distance.Hamming,
+            "indel": rf_distance.Indel,
+        }
+        if distance not in module_map:
+            raise ValueError(
+                f"Invalid distance metric: {distance}"
+                f"\nMust be one of: {list(module_map)}"
+            )
+        module = module_map[distance]
+        if normalize_score:
+            return module.normalized_distance
+        else:
+            return module.distance