Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: added option to provide experiment_name to evaluate() #1843

Merged
merged 3 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .readthedocs.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
version: 2

mkdocs:
configuration: mkdocs.yml

build:
os: ubuntu-22.04
tools:
Expand Down
8 changes: 4 additions & 4 deletions src/ragas/dataset_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,21 @@
from datasets import Dataset as HFDataset
from pydantic import BaseModel, field_validator

from ragas._version import __version__
from ragas.callbacks import ChainRunEncoder, parse_run_traces
from ragas.cost import CostCallbackHandler
from ragas.exceptions import UploadException
from ragas.messages import AIMessage, HumanMessage, ToolCall, ToolMessage
from ragas.sdk import (
upload_packet,
RAGAS_API_SOURCE,
get_app_token,
check_api_response,
build_evaluation_app_url,
check_api_response,
get_api_url,
get_app_token,
get_app_url,
upload_packet,
)
from ragas.utils import safe_nanmean
from ragas._version import __version__

if t.TYPE_CHECKING:
from pathlib import Path
Expand Down
5 changes: 4 additions & 1 deletion src/ragas/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def evaluate(
metrics: t.Optional[t.Sequence[Metric]] = None,
llm: t.Optional[BaseRagasLLM | LangchainLLM] = None,
embeddings: t.Optional[BaseRagasEmbeddings | LangchainEmbeddings] = None,
experiment_name: t.Optional[str] = None,
callbacks: Callbacks = None,
run_config: t.Optional[RunConfig] = None,
token_usage_parser: t.Optional[TokenUsageParser] = None,
Expand Down Expand Up @@ -87,6 +88,8 @@ def evaluate(
The embeddings to use for the metrics. If not provided then ragas will use
the default embeddings for metrics which require embeddings. This can we overridden by the embeddings specified in
the metric level with `metric.embeddings`.
experiment_name: str, optional
The name of the experiment to track. This is used to track the evaluation in the tracing tools.
callbacks: Callbacks, optional
Lifecycle Langchain Callbacks to run during evaluation. Check the
[langchain documentation](https://python.langchain.com/docs/modules/callbacks/)
Expand Down Expand Up @@ -246,7 +249,7 @@ def evaluate(
# new evaluation chain
row_run_managers = []
evaluation_rm, evaluation_group_cm = new_group(
name=RAGAS_EVALUATION_CHAIN_NAME,
name=experiment_name or RAGAS_EVALUATION_CHAIN_NAME,
inputs={},
callbacks=callbacks,
metadata={"type": ChainType.EVALUATION},
Expand Down
4 changes: 3 additions & 1 deletion src/ragas/metrics/_domain_specific_rubrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,9 @@ def __init__(
}

# Add rubrics to the scoring prompts
rubrics_text = "\n".join(f"{key}: {value}" for key, value in self.rubrics.items())
rubrics_text = "\n".join(
f"{key}: {value}" for key, value in self.rubrics.items()
)
self.single_turn_scoring_prompt.instruction = f"{self.single_turn_scoring_prompt.instruction}\n\nScoring Rubrics:\n{rubrics_text}\n"
self.multi_turn_scoring_prompt.instruction = f"{self.multi_turn_scoring_prompt.instruction}\n\nScoring Rubrics:\n{rubrics_text}\n"

Expand Down
6 changes: 1 addition & 5 deletions src/ragas/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,7 @@

from ragas._analytics import EvaluationEvent, _analytics_batcher
from ragas.callbacks import ChainType, new_group
from ragas.dataset_schema import (
MetricAnnotation,
MultiTurnSample,
SingleTurnSample,
)
from ragas.dataset_schema import MetricAnnotation, MultiTurnSample, SingleTurnSample
from ragas.executor import is_event_loop_running
from ragas.losses import BinaryMetricLoss, MSELoss
from ragas.prompt import FewShotPydanticPrompt, PromptMixin
Expand Down
Loading