Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Ragas integration #404

Merged
merged 13 commits into from
Feb 16, 2024
4 changes: 4 additions & 0 deletions .github/workflows/ragas.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,9 @@ jobs:
if: matrix.python-version == '3.9' && runner.os == 'Linux'
run: hatch run lint:all

julian-risch marked this conversation as resolved.
Show resolved Hide resolved
- name: Generate docs
if: matrix.python-version == '3.9' && runner.os == 'Linux'
run: hatch run docs

- name: Run tests
run: hatch run cov
4 changes: 2 additions & 2 deletions integrations/ragas/README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# ragas-haystack

[![PyPI - Version](https://img.shields.io/pypi/v/uptrain-haystack.svg)](https://pypi.org/project/ragas-haystack)
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/uptrain-haystack.svg)](https://pypi.org/project/ragas-haystack)
[![PyPI - Version](https://img.shields.io/pypi/v/ragas-haystack.svg)](https://pypi.org/project/ragas-haystack)
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ragas-haystack.svg)](https://pypi.org/project/ragas-haystack)

---

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# A valid OpenAI API key is required to run this example.
# A valid OpenAI API key must be provided as an environment variable "OPENAI_API_KEY" to run this example.

from haystack import Pipeline
from haystack.utils import Secret

from haystack_integrations.components.evaluators.ragas import RagasEvaluator, RagasMetric

Expand All @@ -28,15 +27,26 @@
]

pipeline = Pipeline()
evaluator = RagasEvaluator(
evaluator_context = RagasEvaluator(
metric=RagasMetric.CONTEXT_PRECISION,
api_key=Secret.from_env_var("OPENAI_API_KEY"),
)
pipeline.add_component("evaluator", evaluator)
evaluator_aspect = RagasEvaluator(
metric=RagasMetric.ASPECT_CRITIQUE,
metric_params={"name": "custom", "definition": "Is this answer problematic for children?", "strictness": 3},
)
pipeline.add_component("evaluator_context", evaluator_context)
pipeline.add_component("evaluator_aspect", evaluator_aspect)

# Each metric expects a specific set of parameters as input. Refer to the
# Ragas class' documentation for more details.
results = pipeline.run({"evaluator": {"questions": QUESTIONS, "contexts": CONTEXTS, "ground_truths": GROUND_TRUTHS}})
results = pipeline.run(
{
"evaluator_context": {"questions": QUESTIONS, "contexts": CONTEXTS, "ground_truths": GROUND_TRUTHS},
"evaluator_aspect": {"questions": QUESTIONS, "contexts": CONTEXTS, "responses": RESPONSES},
}
)


for output in results["evaluator"]["results"]:
print(output)
for component in ["evaluator_context", "evaluator_aspect"]:
for output in results[component]["results"]:
print(output)
38 changes: 0 additions & 38 deletions integrations/ragas/example/example_aspect_critique_custom.py

This file was deleted.

38 changes: 0 additions & 38 deletions integrations/ragas/example/example_aspect_critique_harmfulness.py

This file was deleted.

5 changes: 3 additions & 2 deletions integrations/ragas/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ classifiers = [
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = ["haystack-ai>=2.0.0b6", "ragas>=0.1.0"]
dependencies = ["haystack-ai>=2.0.0b6", "ragas>=0.1.0rc1"]

[project.urls]
Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/ragas"
Expand All @@ -40,12 +40,13 @@ root = "../.."
git_describe_command = 'git describe --tags --match="integrations/ragas-v[0-9]*"'

[tool.hatch.envs.default]
dependencies = ["coverage[toml]>=6.5", "pytest"]
dependencies = ["coverage[toml]>=6.5", "pytest", "haystack-pydoc-tools"]
[tool.hatch.envs.default.scripts]
test = "pytest {args:tests}"
test-cov = "coverage run -m pytest {args:tests}"
cov-report = ["- coverage combine", "coverage report"]
cov = ["test-cov", "cov-report"]
docs = ["pydoc-markdown pydoc/config.yml"]

[[tool.hatch.envs.all.matrix]]
python = ["3.8", "3.9", "3.10", "3.11"]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .evaluator import RagasEvaluator
from .metrics import RagasMetric, RagasMetricAspect
from .metrics import RagasMetric

__all__ = ("RagasEvaluator", "RagasMetric", "RagasMetricAspect")
__all__ = ("RagasEvaluator", "RagasMetric")
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
import json
from typing import Any, Callable, Dict, List, Optional, Union

from datasets import Dataset
from datasets import Dataset # type: ignore
from haystack import DeserializationError, component, default_from_dict, default_to_dict
from haystack.utils import Secret, deserialize_secrets_inplace

from ragas import evaluate
from ragas.evaluation import Result
from ragas.metrics import AspectCritique
from ragas.metrics.base import Metric
from ragas import evaluate # type: ignore
from ragas.evaluation import Result # type: ignore
from ragas.metrics.base import Metric # type: ignore

from .metrics import (
METRIC_ASPECTS,
METRIC_DESCRIPTORS,
InputConverters,
OutputConverters,
Expand All @@ -24,20 +21,20 @@ class RagasEvaluator:
"""
A component that uses the Ragas framework to evaluate inputs against a specific metric.

The supported metrics are defined by :class:`RagasMetric`. The inputs of the component are
metric-dependent. The output is a nested list of evaluation results where each inner list
contains the results for a single input.
The supported metrics are defined by `RagasMetric`.
Most of them require an OpenAI API key to be provided as an environment variable "OPENAI_API_KEY".
The inputs of the component are metric-dependent.
The output is a nested list of evaluation results where each inner list contains the results for a single input.
"""

# Wrapped for easy mocking.
_backend_callable: Callable
__backend_metric: Metric
julian-risch marked this conversation as resolved.
Show resolved Hide resolved

def __init__(
self,
metric: Union[str, RagasMetric],
metric_params: Optional[Dict[str, Any]] = None,
*,
api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"),
):
"""
Construct a new Ragas evaluator.
Expand All @@ -46,60 +43,31 @@ def __init__(
The metric to use for evaluation.
:param metric_params:
Parameters to pass to the metric's constructor.
:param api_key:
The API key to use.
"""
self.metric = metric if isinstance(metric, RagasMetric) else RagasMetric.from_str(metric)
self.metric_params = metric_params
self.metric_params = metric_params or {}
self.descriptor = METRIC_DESCRIPTORS[self.metric]
self.api_key = api_key

self._init_backend()
self._init_metric()

expected_inputs = self.descriptor.input_parameters
component.set_input_types(self, **expected_inputs)

@staticmethod
def _invoke_evaluate(dataset: Dataset, metric: Metric) -> Result:
return evaluate(dataset, [metric])

def _init_backend(self):
"""
Initialize the Ragas backend and validate inputs.
"""
if self.metric == RagasMetric.ASPECT_CRITIQUE:
if not self.metric_params:
msg = (
f"Invalid init parameters for Ragas metric '{self.metric}'. "
f"Expected metric parameters describing the aspect to critique but got none."
)
raise ValueError(msg)
if "aspect" in self.metric_params and ("name" in self.metric_params or "definition" in self.metric_params):
msg = (
f"Invalid init parameters for Ragas metric '{self.metric}'. "
f"If a predefined aspect is selected, no additional metric parameters are allowed."
)
raise ValueError(msg)
elif "name" in self.metric_params and "definition" not in self.metric_params:
msg = (
f"Invalid init parameters for Ragas metric '{self.metric}'. "
f"If a name of a custom aspect is provided, a definition must be provided as well."
)
raise ValueError(msg)
elif "definition" in self.metric_params and "name" not in self.metric_params:
msg = (
f"Invalid init parameters for Ragas metric '{self.metric}'. "
f"If a definition of a custom aspect is provided, a name must be provided as well."
)
raise ValueError(msg)
elif self.metric_params:
msg = (
f"Unexpected init parameters for Ragas metric '{self.metric}'. "
f"Additional parameters only supported for AspectCritique."
)
raise ValueError(msg)
self._backend_callable = RagasEvaluator._invoke_evaluate

def _init_metric(self):
self.descriptor.input_validator(self.metric, self.metric_params)
self.__backend_metric = self.descriptor.backend(**self.metric_params)

@staticmethod
def _invoke_evaluate(dataset: Dataset, metric: Metric) -> Result:
return evaluate(dataset, [metric])

@component.output_types(results=List[List[Dict[str, Any]]])
def run(self, **inputs) -> Dict[str, Any]:
"""
Expand All @@ -110,7 +78,6 @@ def run(self, **inputs) -> Dict[str, Any]:
p = Pipeline()
evaluator = RagasEvaluator(
metric=RagasMetric.CONTEXT_PRECISION,
api_key=Secret.from_env_var("OPENAI_API_KEY"),
)
p.add_component("evaluator", evaluator)

Expand All @@ -132,19 +99,11 @@ def run(self, **inputs) -> Dict[str, Any]:
converted_inputs: List[Dict[str, str]] = list(self.descriptor.input_converter(**inputs)) # type: ignore

dataset = Dataset.from_list(converted_inputs)
metric = None
if self.metric == RagasMetric.ASPECT_CRITIQUE and self.metric_params:
if "aspect" in self.metric_params:
metric = METRIC_ASPECTS[self.metric_params["aspect"]]
else:
metric = AspectCritique(**self.metric_params)
else:
metric = self.descriptor.backend
results = self._backend_callable(dataset=dataset, metric=metric)
results = self._backend_callable(dataset=dataset, metric=self.__backend_metric)

OutputConverters.validate_outputs(results)
converted_results = [
[result.to_dict()] for result in OutputConverters.extract_results(results, self.metric, self.metric_params)
[result.to_dict()] for result in self.descriptor.output_converter(results, self.metric, self.metric_params)
]

return {"results": converted_results}
Expand All @@ -169,7 +128,6 @@ def check_serializable(obj: Any):
self,
metric=self.metric,
metric_params=self.metric_params,
api_key=self.api_key.to_dict(),
)

@classmethod
Expand All @@ -180,5 +138,4 @@ def from_dict(cls, data: Dict[str, Any]) -> "RagasEvaluator":
:param data:
The dictionary to deserialize from.
"""
deserialize_secrets_inplace(data["init_parameters"], ["api_key"])
return default_from_dict(cls, data)
Loading