Skip to content

Commit

Permalink
track evaluator errors from sdk
Browse files Browse the repository at this point in the history
  • Loading branch information
isahers1 committed Oct 8, 2024
1 parent f034c38 commit 2cfa36e
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 0 deletions.
5 changes: 5 additions & 0 deletions python/langsmith/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4245,6 +4245,7 @@ def _submit_feedback(**kwargs):
),
feedback_source_type=ls_schemas.FeedbackSourceType.MODEL,
project_id=project_id,
extra=res.extra,
)
return results

Expand Down Expand Up @@ -4315,6 +4316,7 @@ def create_feedback(
project_id: Optional[ID_TYPE] = None,
comparative_experiment_id: Optional[ID_TYPE] = None,
feedback_group_id: Optional[ID_TYPE] = None,
extra: Optional[Dict] = None,
**kwargs: Any,
) -> ls_schemas.Feedback:
"""Create a feedback in the LangSmith API.
Expand Down Expand Up @@ -4360,6 +4362,8 @@ def create_feedback(
feedback_group_id : str or UUID
When logging preferences, ranking runs, or other comparative feedback,
this is used to group feedback together.
extra : dict
Metadata for the feedback.
"""
if run_id is None and project_id is None:
raise ValueError("One of run_id and project_id must be provided")
Expand Down Expand Up @@ -4419,6 +4423,7 @@ def create_feedback(
comparative_experiment_id, accept_null=True
),
feedback_group_id=_ensure_uuid(feedback_group_id, accept_null=True),
extra=extra,
)
feedback_block = _dumps_json(feedback.dict(exclude_none=True))
self.request_with_retries(
Expand Down
32 changes: 32 additions & 0 deletions python/langsmith/evaluation/_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import random
import threading
import uuid
import inspect
import re
from contextvars import copy_context
from typing import (
Awaitable,
Expand Down Expand Up @@ -82,6 +84,27 @@
],
]

def extract_code_evaluator_feedback_keys(python_code: str) -> list[str]:
# Find the return statement
return_match = re.search(r'return\s*({[^}]+})', python_code)
if not return_match:
return []

# Extract the dictionary from the return statement
dict_str = return_match.group(1)

# Find all keys in the dictionary
key_matches = re.findall(r'"([^"]+)":', dict_str)

# Filter out 'key' and 'score'
feedback_keys = [key for key in key_matches if key not in ['key', 'score']]

# If 'key' is present in the dictionary, add its value to the feedback_keys
key_value_match = re.search(r'"key"\s*:\s*"([^"]+)"', dict_str)
if key_value_match:
feedback_keys.append(key_value_match.group(1))

return feedback_keys

def evaluate(
target: TARGET_T,
Expand Down Expand Up @@ -1353,6 +1376,15 @@ def _run_evaluators(
)
)
except Exception as e:
feedback_keys = extract_code_evaluator_feedback_keys(inspect.getsource(evaluator.func))
error_response = EvaluationResults(results=[EvaluationResult(key=key,source_run_id=run.id,
comment=repr(e),extra={"error":True}) for key in feedback_keys])
eval_results["results"].extend(
# TODO: This is a hack
self.client._log_evaluation_feedback(
error_response, run=run, _executor=executor
)
)
logger.error(
f"Error running evaluator {repr(evaluator)} on"
f" run {run.id}: {repr(e)}",
Expand Down
2 changes: 2 additions & 0 deletions python/langsmith/evaluation/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ class EvaluationResult(BaseModel):
If none provided, the evaluation feedback is applied to the
root trace being."""
extra: Optional[Dict] = None
"""Metadata for the evaluator run."""

class Config:
"""Pydantic model configuration."""
Expand Down
2 changes: 2 additions & 0 deletions python/langsmith/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,8 @@ class FeedbackBase(BaseModel):
"""For preference scoring, this group ID is shared across feedbacks for each
run in the group that was being compared."""
extra: Optional[Dict] = None
"""The metadata of the feedback."""

class Config:
"""Configuration class for the schema."""
Expand Down

0 comments on commit 2cfa36e

Please sign in to comment.