deepset-ai · julian-risch · Feb 16, 2024 · Feb 13, 2024 · Feb 13, 2024 · Feb 13, 2024
@@ -52,5 +52,9 @@ jobs:
         if: matrix.python-version == '3.9' && runner.os == 'Linux'
         run: hatch run lint:all
 
+      - name: Generate docs
+        if: matrix.python-version == '3.9' && runner.os == 'Linux'
+        run: hatch run docs
+
       - name: Run tests
         run: hatch run cov
@@ -1,7 +1,7 @@
 # ragas-haystack
 
-[![PyPI - Version](https://img.shields.io/pypi/v/uptrain-haystack.svg)](https://pypi.org/project/ragas-haystack)
-[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/uptrain-haystack.svg)](https://pypi.org/project/ragas-haystack)
+[![PyPI - Version](https://img.shields.io/pypi/v/ragas-haystack.svg)](https://pypi.org/project/ragas-haystack)
+[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ragas-haystack.svg)](https://pypi.org/project/ragas-haystack)
 
 ---
 

@@ -1,7 +1,6 @@
-# A valid OpenAI API key is required to run this example.
+# A valid OpenAI API key must be provided as an environment variable "OPENAI_API_KEY" to run this example.
 
 from haystack import Pipeline
-from haystack.utils import Secret
 
 from haystack_integrations.components.evaluators.ragas import RagasEvaluator, RagasMetric
 
@@ -28,15 +27,26 @@
 ]
 
 pipeline = Pipeline()
-evaluator = RagasEvaluator(
+evaluator_context = RagasEvaluator(
     metric=RagasMetric.CONTEXT_PRECISION,
-    api_key=Secret.from_env_var("OPENAI_API_KEY"),
 )
-pipeline.add_component("evaluator", evaluator)
+evaluator_aspect = RagasEvaluator(
+    metric=RagasMetric.ASPECT_CRITIQUE,
+    metric_params={"name": "custom", "definition": "Is this answer problematic for children?", "strictness": 3},
+)
+pipeline.add_component("evaluator_context", evaluator_context)
+pipeline.add_component("evaluator_aspect", evaluator_aspect)
 
 # Each metric expects a specific set of parameters as input. Refer to the
 # Ragas class' documentation for more details.
-results = pipeline.run({"evaluator": {"questions": QUESTIONS, "contexts": CONTEXTS, "ground_truths": GROUND_TRUTHS}})
+results = pipeline.run(
+    {
+        "evaluator_context": {"questions": QUESTIONS, "contexts": CONTEXTS, "ground_truths": GROUND_TRUTHS},
+        "evaluator_aspect": {"questions": QUESTIONS, "contexts": CONTEXTS, "responses": RESPONSES},
+    }
+)
+
 
-for output in results["evaluator"]["results"]:
-    print(output)
+for component in ["evaluator_context", "evaluator_aspect"]:
+    for output in results[component]["results"]:
+        print(output)
diff --git a/integrations/ragas/example/example_aspect_critique_custom.py b/integrations/ragas/example/example_aspect_critique_custom.py
diff --git a/integrations/ragas/example/example_aspect_critique_harmfulness.py b/integrations/ragas/example/example_aspect_critique_harmfulness.py
@@ -21,7 +21,7 @@ classifiers = [
   "Programming Language :: Python :: Implementation :: CPython",
   "Programming Language :: Python :: Implementation :: PyPy",
 ]
-dependencies = ["haystack-ai>=2.0.0b6", "ragas>=0.1.0"]
+dependencies = ["haystack-ai>=2.0.0b6", "ragas>=0.1.0rc1"]
 
 [project.urls]
 Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/ragas"
@@ -40,12 +40,13 @@ root = "../.."
 git_describe_command = 'git describe --tags --match="integrations/ragas-v[0-9]*"'
 
 [tool.hatch.envs.default]
-dependencies = ["coverage[toml]>=6.5", "pytest"]
+dependencies = ["coverage[toml]>=6.5", "pytest", "haystack-pydoc-tools"]
 [tool.hatch.envs.default.scripts]
 test = "pytest {args:tests}"
 test-cov = "coverage run -m pytest {args:tests}"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+docs = ["pydoc-markdown pydoc/config.yml"]
 
 [[tool.hatch.envs.all.matrix]]
 python = ["3.8", "3.9", "3.10", "3.11"]

@@ -1,4 +1,4 @@
 from .evaluator import RagasEvaluator
-from .metrics import RagasMetric, RagasMetricAspect
+from .metrics import RagasMetric
 
-__all__ = ("RagasEvaluator", "RagasMetric", "RagasMetricAspect")
+__all__ = ("RagasEvaluator", "RagasMetric")
@@ -1,17 +1,14 @@
 import json
 from typing import Any, Callable, Dict, List, Optional, Union
 
-from datasets import Dataset
+from datasets import Dataset  # type: ignore
 from haystack import DeserializationError, component, default_from_dict, default_to_dict
-from haystack.utils import Secret, deserialize_secrets_inplace
 
-from ragas import evaluate
-from ragas.evaluation import Result
-from ragas.metrics import AspectCritique
-from ragas.metrics.base import Metric
+from ragas import evaluate  # type: ignore
+from ragas.evaluation import Result  # type: ignore
+from ragas.metrics.base import Metric  # type: ignore
 
 from .metrics import (
-    METRIC_ASPECTS,
     METRIC_DESCRIPTORS,
     InputConverters,
     OutputConverters,
@@ -24,20 +21,20 @@ class RagasEvaluator:
     """
     A component that uses the Ragas framework to evaluate inputs against a specific metric.
 
-    The supported metrics are defined by :class:`RagasMetric`. The inputs of the component are
-    metric-dependent. The output is a nested list of evaluation results where each inner list
-    contains the results for a single input.
+    The supported metrics are defined by `RagasMetric`.
+    Most of them require an OpenAI API key to be provided as an environment variable "OPENAI_API_KEY".
+    The inputs of the component are metric-dependent.
+    The output is a nested list of evaluation results where each inner list contains the results for a single input.
     """
 
     # Wrapped for easy mocking.
     _backend_callable: Callable
+    __backend_metric: Metric
 
     def __init__(
         self,
         metric: Union[str, RagasMetric],
         metric_params: Optional[Dict[str, Any]] = None,
-        *,
-        api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"),
     ):
         """
         Construct a new Ragas evaluator.
@@ -46,60 +43,31 @@ def __init__(
             The metric to use for evaluation.
         :param metric_params:
             Parameters to pass to the metric's constructor.
-        :param api_key:
-            The API key to use.
         """
         self.metric = metric if isinstance(metric, RagasMetric) else RagasMetric.from_str(metric)
-        self.metric_params = metric_params
+        self.metric_params = metric_params or {}
         self.descriptor = METRIC_DESCRIPTORS[self.metric]
-        self.api_key = api_key
 
         self._init_backend()
+        self._init_metric()
 
         expected_inputs = self.descriptor.input_parameters
         component.set_input_types(self, **expected_inputs)
 
-    @staticmethod
-    def _invoke_evaluate(dataset: Dataset, metric: Metric) -> Result:
-        return evaluate(dataset, [metric])
-
     def _init_backend(self):
         """
         Initialize the Ragas backend and validate inputs.
         """
-        if self.metric == RagasMetric.ASPECT_CRITIQUE:
-            if not self.metric_params:
-                msg = (
-                    f"Invalid init parameters for Ragas metric '{self.metric}'. "
-                    f"Expected metric parameters describing the aspect to critique but got none."
-                )
-                raise ValueError(msg)
-            if "aspect" in self.metric_params and ("name" in self.metric_params or "definition" in self.metric_params):
-                msg = (
-                    f"Invalid init parameters for Ragas metric '{self.metric}'. "
-                    f"If a predefined aspect is selected, no additional metric parameters are allowed."
-                )
-                raise ValueError(msg)
-            elif "name" in self.metric_params and "definition" not in self.metric_params:
-                msg = (
-                    f"Invalid init parameters for Ragas metric '{self.metric}'. "
-                    f"If a name of a custom aspect is provided, a definition must be provided as well."
-                )
-                raise ValueError(msg)
-            elif "definition" in self.metric_params and "name" not in self.metric_params:
-                msg = (
-                    f"Invalid init parameters for Ragas metric '{self.metric}'. "
-                    f"If a definition of a custom aspect is provided, a name must be provided as well."
-                )
-                raise ValueError(msg)
-        elif self.metric_params:
-            msg = (
-                f"Unexpected init parameters for Ragas metric '{self.metric}'. "
-                f"Additional parameters only supported for AspectCritique."
-            )
-            raise ValueError(msg)
         self._backend_callable = RagasEvaluator._invoke_evaluate
 
+    def _init_metric(self):
+        self.descriptor.input_validator(self.metric, self.metric_params)
+        self.__backend_metric = self.descriptor.backend(**self.metric_params)
+
+    @staticmethod
+    def _invoke_evaluate(dataset: Dataset, metric: Metric) -> Result:
+        return evaluate(dataset, [metric])
+
     @component.output_types(results=List[List[Dict[str, Any]]])
     def run(self, **inputs) -> Dict[str, Any]:
         """
@@ -110,7 +78,6 @@ def run(self, **inputs) -> Dict[str, Any]:
         p = Pipeline()
         evaluator = RagasEvaluator(
             metric=RagasMetric.CONTEXT_PRECISION,
-            api_key=Secret.from_env_var("OPENAI_API_KEY"),
         )
         p.add_component("evaluator", evaluator)
 
@@ -132,19 +99,11 @@ def run(self, **inputs) -> Dict[str, Any]:
         converted_inputs: List[Dict[str, str]] = list(self.descriptor.input_converter(**inputs))  # type: ignore
 
         dataset = Dataset.from_list(converted_inputs)
-        metric = None
-        if self.metric == RagasMetric.ASPECT_CRITIQUE and self.metric_params:
-            if "aspect" in self.metric_params:
-                metric = METRIC_ASPECTS[self.metric_params["aspect"]]
-            else:
-                metric = AspectCritique(**self.metric_params)
-        else:
-            metric = self.descriptor.backend
-        results = self._backend_callable(dataset=dataset, metric=metric)
+        results = self._backend_callable(dataset=dataset, metric=self.__backend_metric)
 
         OutputConverters.validate_outputs(results)
         converted_results = [
-            [result.to_dict()] for result in OutputConverters.extract_results(results, self.metric, self.metric_params)
+            [result.to_dict()] for result in self.descriptor.output_converter(results, self.metric, self.metric_params)
         ]
 
         return {"results": converted_results}
@@ -169,7 +128,6 @@ def check_serializable(obj: Any):
             self,
             metric=self.metric,
             metric_params=self.metric_params,
-            api_key=self.api_key.to_dict(),
         )
 
     @classmethod
@@ -180,5 +138,4 @@ def from_dict(cls, data: Dict[str, Any]) -> "RagasEvaluator":
         :param data:
             The dictionary to deserialize from.
         """
-        deserialize_secrets_inplace(data["init_parameters"], ["api_key"])
         return default_from_dict(cls, data)