Skip to content

Commit

Permalink
gs
Browse files Browse the repository at this point in the history
  • Loading branch information
hinthornw committed Dec 13, 2024
1 parent f684f2c commit 0a2f1d5
Showing 1 changed file with 34 additions and 20 deletions.
54 changes: 34 additions & 20 deletions python/langsmith/evaluation/_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,7 @@ def evaluate_comparative(
metadata: Optional[dict] = None,
load_nested: bool = False,
randomize_order: bool = False,
upload_results: bool = True,
) -> ComparativeExperimentResults:
r"""Evaluate existing experiment runs against each other.
Expand All @@ -675,6 +676,8 @@ def evaluate_comparative(
Default is to only load the top-level root runs.
randomize_order (bool): Whether to randomize the order of the outputs for each evaluation.
Default is False.
upload_results (bool): Whether to upload the results to LangSmith.
Default is True.
Returns:
ComparativeExperimentResults: The results of the comparative evaluation.
Expand Down Expand Up @@ -910,6 +913,7 @@ def evaluate_comparative(

comparators = [comparison_evaluator(evaluator) for evaluator in evaluators or []]
results: dict = {}
tracing_mode = "local" if not upload_results else True

def evaluate_and_submit_feedback(
runs_list: list[schemas.Run],
Expand All @@ -920,10 +924,26 @@ def evaluate_and_submit_feedback(
feedback_group_id = uuid.uuid4()
if randomize_order:
random.shuffle(runs_list)
with rh.tracing_context(project_name="evaluators", client=client):
current_context = rh.get_tracing_context()
metadata = (current_context["metadata"] or {}) | {
"experiment": comparative_experiment.name,
"experiment_id": comparative_experiment.id,
"reference_example_id": example.id,
"reference_run_ids": [r.id for r in runs_list],
}
with rh.tracing_context(
**(
current_context
| {
"project_name": "evaluators",
"metadata": metadata,
"enabled": tracing_mode,
"client": client,
}
)
):
result = comparator.compare_runs(runs_list, example)
if client is None:
raise ValueError("Client is required to submit feedback.")

comments = (
{str(rid): result.comment for rid in result.scores}
if isinstance(result.comment, str)
Expand Down Expand Up @@ -1548,17 +1568,14 @@ def _run_evaluators(
executor: cf.ThreadPoolExecutor,
) -> ExperimentResultRow:
current_context = rh.get_tracing_context()
metadata = {
**(current_context["metadata"] or {}),
**{
"experiment": self.experiment_name,
"reference_example_id": current_results["example"].id,
"reference_run_id": current_results["run"].id,
},
metadata = (current_context["metadata"] or {}) | {
"experiment": self.experiment_name,
"reference_example_id": current_results["example"].id,
"reference_run_id": current_results["run"].id,
}
with rh.tracing_context(
**{
**current_context,
current_context
| {
"project_name": "evaluators",
"metadata": metadata,
"enabled": "local" if not self._upload_results else True,
Expand Down Expand Up @@ -1681,16 +1698,13 @@ def _apply_summary_evaluators(
with ls_utils.ContextThreadPoolExecutor() as executor:
project_id = self._get_experiment().id if self._upload_results else None
current_context = rh.get_tracing_context()
metadata = {
**(current_context["metadata"] or {}),
**{
"experiment": self.experiment_name,
"experiment_id": project_id,
},
metadata = (current_context["metadata"] or {}) | {
"experiment": self.experiment_name,
"experiment_id": project_id,
}
with rh.tracing_context(
**{
**current_context,
current_context
| {
"project_name": "evaluators",
"metadata": metadata,
"client": self.client,
Expand Down

0 comments on commit 0a2f1d5

Please sign in to comment.