Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Ragas integration #404

Merged
merged 13 commits into from
Feb 16, 2024
2 changes: 1 addition & 1 deletion integrations/ragas/pydoc/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ processors:
do_not_filter_modules: false
skip_empty_modules: true
- type: filter
expression: "name not in ['MetricResult', 'MetricDescriptor', 'OutputConverters', 'InputConverters', 'METRIC_ASPECTS', 'METRIC_DESCRIPTORS']"
expression: "name not in ['InputConverters', 'MetricDescriptor', 'MetricParamsValidators', 'OutputConverters', 'METRIC_DESCRIPTORS']"
- type: smart
- type: crossref
renderer:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class RagasEvaluator:

# Wrapped for easy mocking.
_backend_callable: Callable
__backend_metric: Metric
_backend_metric: Metric

def __init__(
self,
Expand Down Expand Up @@ -61,8 +61,8 @@ def _init_backend(self):
self._backend_callable = RagasEvaluator._invoke_evaluate

def _init_metric(self):
self.descriptor.input_validator(self.metric, self.metric_params)
self.__backend_metric = self.descriptor.backend(**self.metric_params)
self.descriptor.metric_params_validator(self.metric, self.descriptor.init_parameters, self.metric_params)
self._backend_metric = self.descriptor.backend(**self.metric_params)

@staticmethod
def _invoke_evaluate(dataset: Dataset, metric: Metric) -> Result:
Expand Down Expand Up @@ -99,7 +99,7 @@ def run(self, **inputs) -> Dict[str, Any]:
converted_inputs: List[Dict[str, str]] = list(self.descriptor.input_converter(**inputs)) # type: ignore

dataset = Dataset.from_list(converted_inputs)
results = self._backend_callable(dataset=dataset, metric=self.__backend_metric)
results = self._backend_callable(dataset=dataset, metric=self._backend_metric)

OutputConverters.validate_outputs(results)
converted_results = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ class MetricDescriptor:
The metric.
:param backend:
The associated Ragas metric class.
:param metric_params_validator:
Callable that validates metric parameters.
:param input_parameters:
Parameters accepted by the metric. This is used
to set the input types of the evaluator component.
Expand All @@ -123,27 +125,27 @@ class MetricDescriptor:
Callable that converts the Ragas output format to our output format.
Accepts a single output parameter and returns a list of results derived from it.
:param init_parameters:
Additional parameters that need to be passed to the metric class during initialization.
Additional parameters that are allowed to be passed to the metric class during initialization.
"""

metric: RagasMetric
backend: Type[Metric]
input_validator: Callable[[Any], None]
input_parameters: Dict[str, Type]
input_converter: Callable[[Any], Iterable[Dict[str, str]]]
output_converter: Callable[[Result, RagasMetric, Dict[str, Any]], List[MetricResult]]
init_parameters: Optional[Dict[str, Type[Any]]] = None
metric_params_validator: Callable[[RagasMetric, List[str], Dict[str, Any]], None]
julian-risch marked this conversation as resolved.
Show resolved Hide resolved
init_parameters: Optional[List[str]] = None

@classmethod
def new(
cls,
metric: RagasMetric,
backend: Type[Metric],
input_validator,
input_converter: Callable[[Any], Iterable[Dict[str, str]]],
output_converter: Optional[Callable[[Result, RagasMetric, Dict[str, Any]], List[MetricResult]]] = None,
metric_params_validator: Optional[Callable[[RagasMetric, List[str], Dict[str, Any]], None]] = None,
*,
init_parameters: Optional[Dict[str, Type]] = None,
init_parameters: Optional[List[str]] = None,
) -> "MetricDescriptor":
input_converter_signature = inspect.signature(input_converter)
input_parameters = {}
Expand All @@ -157,47 +159,32 @@ def new(
return cls(
metric=metric,
backend=backend,
input_validator=input_validator,
input_parameters=input_parameters,
input_converter=input_converter,
output_converter=output_converter if output_converter is not None else OutputConverters.default,
metric_params_validator=(
metric_params_validator
if metric_params_validator is not None
else MetricParamsValidators.validate_metric_parameters
),
init_parameters=init_parameters,
)


class InputValidators:
class MetricParamsValidators:
"""
Validates for input parameters.
Validates metric parameters.

The signature of the converter functions serves as the ground-truth of the
expected input parameters of a given metric. They are also responsible for validating
the input parameters and converting them to the format expected by Ragas.
Depending on the metric type, different metric parameters are allowed.
The validator functions are responsible for validating the parameters and raising an error if they are invalid.
"""

@staticmethod
def validate_empty_metric_parameters(metric: RagasMetric, metric_params: Dict[str, Any]) -> None:
if metric_params:
msg = f"Unexpected init parameters '{metric_params}' for metric '{metric}'."
raise ValueError(msg)

@staticmethod
def validate_aspect_critique_parameters(metric: RagasMetric, metric_params: Dict[str, Any]) -> None:
if not metric_params:
msg = (
f"Invalid init parameters for Ragas metric '{metric}'. "
f"Expected metric parameters describing the aspect to critique but got none."
)
raise ValueError(msg)
if "name" in metric_params and "definition" not in metric_params:
msg = (
f"Invalid init parameters for Ragas metric '{metric}'. "
f"If a name of a custom aspect is provided, a definition must be provided as well."
)
raise ValueError(msg)
elif "definition" in metric_params and "name" not in metric_params:
def validate_metric_parameters(metric: RagasMetric, allowed: List[str], received: Dict[str, Any]) -> None:
if not set(received).issubset(allowed):
msg = (
f"Invalid init parameters for Ragas metric '{metric}'. "
f"If a definition of a custom aspect is provided, a name must be provided as well."
f"Allowed metric parameters {allowed} but got '{received}'"
)
raise ValueError(msg)

Expand Down Expand Up @@ -324,56 +311,56 @@ def aspect_critique(output: Result, _: RagasMetric, metric_params: Dict[str, Any
RagasMetric.ANSWER_CORRECTNESS: MetricDescriptor.new(
RagasMetric.ANSWER_CORRECTNESS,
AnswerCorrectness,
InputValidators.validate_empty_metric_parameters,
InputConverters.question_response_ground_truth, # type: ignore
init_parameters=["name", "weights", "answer_similarity"],
),
RagasMetric.FAITHFULNESS: MetricDescriptor.new(
RagasMetric.FAITHFULNESS,
Faithfulness,
InputValidators.validate_empty_metric_parameters,
InputConverters.question_context_response, # type: ignore
init_parameters=["name"],
),
RagasMetric.ANSWER_SIMILARITY: MetricDescriptor.new(
RagasMetric.ANSWER_SIMILARITY,
AnswerSimilarity,
InputValidators.validate_empty_metric_parameters,
InputConverters.response_ground_truth, # type: ignore
init_parameters=["name", "model_name", "threshold"],
),
RagasMetric.CONTEXT_PRECISION: MetricDescriptor.new(
RagasMetric.CONTEXT_PRECISION,
ContextPrecision,
InputValidators.validate_empty_metric_parameters,
InputConverters.question_context_ground_truth, # type: ignore
init_parameters=["name"],
),
RagasMetric.CONTEXT_UTILIZATION: MetricDescriptor.new(
RagasMetric.CONTEXT_UTILIZATION,
ContextUtilization,
InputValidators.validate_empty_metric_parameters,
InputConverters.question_context_response, # type: ignore
init_parameters=["name"],
),
RagasMetric.CONTEXT_RECALL: MetricDescriptor.new(
RagasMetric.CONTEXT_RECALL,
ContextRecall,
InputValidators.validate_empty_metric_parameters,
InputConverters.question_context_ground_truth, # type: ignore
init_parameters=["name"],
),
RagasMetric.ASPECT_CRITIQUE: MetricDescriptor.new(
RagasMetric.ASPECT_CRITIQUE,
AspectCritique,
InputValidators.validate_aspect_critique_parameters,
InputConverters.question_context_response, # type: ignore
OutputConverters.aspect_critique,
init_parameters=["name", "definition", "strictness", "llm"],
),
RagasMetric.CONTEXT_RELEVANCY: MetricDescriptor.new(
RagasMetric.CONTEXT_RELEVANCY,
ContextRelevancy,
InputValidators.validate_empty_metric_parameters,
InputConverters.question_context, # type: ignore
init_parameters=["name"],
),
RagasMetric.ANSWER_RELEVANCY: MetricDescriptor.new(
RagasMetric.ANSWER_RELEVANCY,
AnswerRelevancy,
InputValidators.validate_empty_metric_parameters,
InputConverters.question_context_response, # type: ignore
init_parameters=["name", "strictness", "embeddings"],
),
}
10 changes: 5 additions & 5 deletions integrations/ragas/tests/test_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,25 +74,25 @@ def test_evaluator_metric_init_params():
'name': 'harmfulness',
}

with pytest.raises(ValueError, match="Invalid init parameters"):
with pytest.raises(ValueError, match="Expects a name"):
RagasEvaluator(RagasMetric.ASPECT_CRITIQUE, metric_params=None)

with pytest.raises(ValueError, match="Invalid init parameters"):
with pytest.raises(ValueError, match="Expects a name"):
RagasEvaluator(RagasMetric.ASPECT_CRITIQUE, metric_params={})

with pytest.raises(ValueError, match="Invalid init parameters"):
with pytest.raises(ValueError, match="Expects a name"):
RagasEvaluator(
RagasMetric.ASPECT_CRITIQUE,
metric_params={"definition": "custom definition"},
)

with pytest.raises(ValueError, match="Invalid init parameters"):
with pytest.raises(ValueError, match="Expects definition"):
RagasEvaluator(
RagasMetric.ASPECT_CRITIQUE,
metric_params={"name": "custom name"},
)

with pytest.raises(ValueError, match="Unexpected init parameters"):
with pytest.raises(ValueError, match="Invalid init parameters"):
RagasEvaluator(
RagasMetric.FAITHFULNESS,
metric_params={"check_numbers": True},
Expand Down