Skip to content

Commit

Permalink
Chore: add language to event tracking (#592)
Browse files Browse the repository at this point in the history
Co-authored-by: jjmachan <[email protected]>
  • Loading branch information
shahules786 and jjmachan authored Feb 15, 2024
1 parent b667668 commit 3834fe5
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 6 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ jobs:
ragas: ${{ steps.filter.outputs.ragas }}
docs: ${{ steps.filter.outputs.docs }}
steps:
- uses: actions/checkout@v3
- uses: dorny/paths-filter@v2
- uses: actions/checkout@v4
- uses: dorny/paths-filter@v3
id: filter
with:
base: "main"
Expand Down Expand Up @@ -59,7 +59,7 @@ jobs:
runs-on: ${{ matrix.os }}

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0 # fetch all tags and branches

Expand Down Expand Up @@ -108,7 +108,7 @@ jobs:
if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.ragas == 'true') || github.event_name == 'push' }}

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Setup python
uses: actions/setup-python@v4
Expand Down
2 changes: 2 additions & 0 deletions src/ragas/_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,14 @@ class EvaluationEvent(BaseEvent):
metrics: t.List[str]
evaluation_mode: str
num_rows: int
language: str


class TesetGenerationEvent(BaseEvent):
evolution_names: t.List[str]
evolution_percentages: t.List[float]
num_rows: int
language: str


@silent
Expand Down
4 changes: 4 additions & 0 deletions src/ragas/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from ragas.metrics.base import Metric, MetricWithEmbeddings, MetricWithLLM
from ragas.metrics.critique import AspectCritique
from ragas.run_config import RunConfig
from ragas.utils import get_feature_language

# from ragas.metrics.critique import AspectCritique
from ragas.validation import (
Expand Down Expand Up @@ -249,12 +250,15 @@ def evaluate(

# log the evaluation event
metrics_names = [m.name for m in metrics]
metric_lang = [get_feature_language(m) for m in metrics]
metric_lang = np.unique([m for m in metric_lang if m is not None])
track(
EvaluationEvent(
event_type="evaluation",
metrics=metrics_names,
evaluation_mode="",
num_rows=dataset.shape[0],
language=metric_lang[0] if len(metric_lang) > 0 else "",
)
)
return result
Expand Down
5 changes: 4 additions & 1 deletion src/ragas/testset/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
)
from ragas.testset.extractor import KeyphraseExtractor
from ragas.testset.filters import EvolutionFilter, NodeFilter, QuestionFilter
from ragas.utils import check_if_sum_is_close, is_nan
from ragas.utils import check_if_sum_is_close, get_feature_language, is_nan

if t.TYPE_CHECKING:
from langchain_core.documents import Document as LCDocument
Expand Down Expand Up @@ -251,12 +251,15 @@ def generate(
# due to failed evolutions. MaxRetriesExceeded is a common reason
test_data_rows = [r for r in test_data_rows if not is_nan(r)]
test_dataset = TestDataset(test_data=test_data_rows)
evol_lang = [get_feature_language(e) for e in distributions]
evol_lang = [e for e in evol_lang if e is not None]
track(
TesetGenerationEvent(
event_type="testset_generation",
evolution_names=[e.__class__.__name__.lower() for e in distributions],
evolution_percentages=[distributions[e] for e in distributions],
num_rows=len(test_dataset.test_data),
language=evol_lang[0] if len(evol_lang) > 0 else "",
)
)

Expand Down
15 changes: 15 additions & 0 deletions src/ragas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@

import numpy as np

if t.TYPE_CHECKING:
from ragas.metrics.base import Metric
from ragas.testset.evolutions import Evolution

DEBUG_ENV_VAR = "RAGAS_DEBUG"


Expand Down Expand Up @@ -57,3 +61,14 @@ def is_nan(x):
return np.isnan(x)
except TypeError:
return False


def get_feature_language(feature: t.Union[Metric, Evolution]) -> t.Optional[str]:
from ragas.llms.prompt import Prompt

languags = [
value.language
for _, value in vars(feature).items()
if isinstance(value, Prompt)
]
return languags[0] if len(languags) > 0 else None
8 changes: 7 additions & 1 deletion tests/unit/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,18 @@ def test_evaluation_event():
from ragas._analytics import EvaluationEvent

evaluation_event = EvaluationEvent(
event_type="evaluation", metrics=["harmfulness"], num_rows=1, evaluation_mode=""
event_type="evaluation",
metrics=["harmfulness"],
num_rows=1,
evaluation_mode="",
language="english",
)

payload = dict(evaluation_event)
assert isinstance(payload.get("user_id"), str)
assert isinstance(payload.get("evaluation_mode"), str)
assert isinstance(payload.get("metrics"), list)
assert isinstance(payload.get("language"), str)


def setup_user_id_filepath(tmp_path, monkeypatch):
Expand Down Expand Up @@ -101,6 +106,7 @@ def test_testset_generation_tracking(monkeypatch):
evolution_names=[e.__class__.__name__.lower() for e in distributions],
evolution_percentages=[distributions[e] for e in distributions],
num_rows=10,
language="english",
)

assert dict(testset_event_payload)["evolution_names"] == [
Expand Down

0 comments on commit 3834fe5

Please sign in to comment.