Skip to content

Commit

Permalink
gs
Browse files Browse the repository at this point in the history
  • Loading branch information
hinthornw committed Apr 11, 2024
1 parent 75e4c05 commit d5ecef0
Show file tree
Hide file tree
Showing 5 changed files with 312 additions and 41 deletions.
94 changes: 55 additions & 39 deletions python/langsmith/_expect.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_output_semantically_close():
).to_be_less_than(0.5)
# Score the test case
matcher = expect.string_distance(
matcher = expect.edit_distance(
prediction=response.choices[0].message.content,
reference="Hello!",
)
Expand All @@ -36,29 +36,15 @@ def test_output_semantically_close():

import atexit
import concurrent.futures
from typing import Any, Callable, Optional, Protocol
from typing import TYPE_CHECKING, Any, Optional

from langsmith import client as ls_client
from langsmith import run_helpers as rh
from langsmith import utils as ls_utils


def _import_load_evaluators() -> Callable[..., _StringEvaluator]:
try:
from langchain.evaluation import load_evaluator # noqa: F811
except ImportError:
raise ImportError("Please install langchain to use this feature")

return load_evaluator # type: ignore


class _StringEvaluator(Protocol):
def evaluate_strings(
self,
prediction: str,
reference: Optional[str] = None,
inputs: Optional[str] = None,
) -> dict: ...
if TYPE_CHECKING:
from langsmith._internal._edit_distance import EditDistanceConfig
from langsmith._internal._embedding_distance import EmbeddingConfig


class _Matcher:
Expand Down Expand Up @@ -191,34 +177,56 @@ def __init__(self, client: Optional[ls_client.Client] = None):
atexit.register(self.executor.shutdown, wait=True)

def embedding_distance(
self, prediction: str, reference: str, *, config: Optional[dict] = None
self,
prediction: str,
reference: str,
*,
config: Optional[EmbeddingConfig] = None,
) -> _Matcher:
"""Compute the embedding distance between the prediction and reference.
This logs the embedding distance to LangSmith and returns a `_Matcher` instance
for making assertions on the distance value.
This depends on the `langchain` package and whichever embedding
model is configured. (OpenAI by default)
By default, this uses the OpenAI API for computing embeddings.
Args:
prediction: The predicted string to compare.
reference: The reference string to compare against.
config: Optional configuration for the embedding distance evaluator.
Use this to configure the embedding model, distance metrics, etc.
Supported options:
- `encoder`: A custom encoder function to encode the list of input
strings to embeddings. Defaults to the OpenAI API.
- `metric`: The distance metric to use for comparison.
Supported values: "cosine", "euclidean", "manhattan",
"chebyshev", "hamming".
Returns:
A `_Matcher` instance for the embedding distance value.
"""
evaluator = self._load_evaluator(config)
results = evaluator.evaluate_strings(prediction=prediction, reference=reference)
self._submit_feedback("embedding_distance", results)
from langsmith._internal._embedding_distance import EmbeddingDistance

config = config or {}
encoder_func = "custom" if config.get("encoder") else "openai"
evaluator = EmbeddingDistance(config=config)
score = evaluator.evaluate(prediction=prediction, reference=reference)
self._submit_feedback(
"embedding_distance",
{
"score": score,
"source_info": {"encoder": encoder_func, "metric": evaluator.distance},
},
)
return _Matcher(
self.client, "embedding_distance", results["score"], _executor=self.executor
self.client, "embedding_distance", score, _executor=self.executor
)

def string_distance(
self, prediction: str, reference: str, *, config: Optional[dict] = None
def edit_distance(
self,
prediction: str,
reference: str,
*,
config: Optional[EditDistanceConfig] = None,
) -> _Matcher:
"""Compute the string distance between the prediction and reference.
Expand All @@ -231,18 +239,30 @@ def string_distance(
prediction: The predicted string to compare.
reference: The reference string to compare against.
config: Optional configuration for the string distance evaluator.
Use this to configure the distance metric, normalization, etc.
Supported options:
- `metric`: The distance metric to use for comparison.
Supported values: "damerau_levenshtein", "levenshtein",
"jaro", "jaro_winkler", "hamming", "indel".
- `normalize_score`: Whether to normalize the score between 0 and 1.
Returns:
A `_Matcher` instance for the string distance value.
"""
evaluator = self._load_evaluator(config)
results = evaluator.evaluate_strings(prediction=prediction, reference=reference)
self._submit_feedback("string_distance", results)
from langsmith._internal._edit_distance import EditDistance

config = config or {}
metric = config.get("metric") or "damerau_levenshtein"
normalize = config.get("normalize_score", True)
evaluator = EditDistance(config=config)
score = evaluator.evaluate(prediction=prediction, reference=reference)
self._submit_feedback(
"edit_distance",
{"score": score, "source_info": {"metric": metric, "normalize": normalize}},
)
return _Matcher(
self.client,
"string_distance",
results["score"],
"edit_distance",
score,
_executor=self.executor,
)

Expand All @@ -261,9 +281,5 @@ def _submit_feedback(self, key: str, results: dict):
self.client.create_feedback, run_id=run_id, key=key, **results
)

def _load_evaluator(self, config: Optional[dict] = None) -> _StringEvaluator:
load_evaluator = _import_load_evaluators()
return load_evaluator("embedding_distance", **(config or {}))


expect = _Expect()
63 changes: 63 additions & 0 deletions python/langsmith/_internal/_edit_distance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from typing import Any, Callable, Dict, Literal, Optional

from typing_extensions import TypedDict

METRICS = Literal[
"damerau_levenshtein",
"levenshtein",
"jaro",
"jaro_winkler",
"hamming",
"indel",
]


class EditDistanceConfig(TypedDict, total=False):
metric: METRICS
normalize_score: bool


class EditDistance:
def __init__(
self,
config: Optional[EditDistanceConfig] = None,
):
config = config or {}
metric = config.get("metric") or "damerau_levenshtein"
self.metric = self._get_metric(metric, config.get("normalize_score", True))

def evaluate(
self,
prediction: str,
reference: Optional[str] = None,
) -> float:
return self.metric(prediction, reference)

@staticmethod
def _get_metric(distance: str, normalize_score: bool = False) -> Callable:
try:
from rapidfuzz import distance as rf_distance
except ImportError:
raise ImportError(
"This operation requires the rapidfuzz library to use."
"Please install it with `pip install -U rapidfuzz`."
)

module_map: Dict[str, Any] = {
"damerau_levenshtein": rf_distance.DamerauLevenshtein,
"levenshtein": rf_distance.Levenshtein,
"jaro": rf_distance.Jaro,
"jaro_winkler": rf_distance.JaroWinkler,
"hamming": rf_distance.Hamming,
"indel": rf_distance.Indel,
}
if distance not in module_map:
raise ValueError(
f"Invalid distance metric: {distance}"
f"\nMust be one of: {list(module_map)}"
)
module = module_map[distance]
if normalize_score:
return module.normalized_distance
else:
return module.distance
Loading

0 comments on commit d5ecef0

Please sign in to comment.