Skip to content

Commit

Permalink
chore: Ragas - remove context relevancy metric (#917)
Browse files Browse the repository at this point in the history
* ragas: remove context relevancy

* try removing rerun-failures

* add rerun-failures back, introduce pytest-asycio

* add asyncio marker

* lower-bound pin
  • Loading branch information
anakin87 authored Jul 24, 2024
1 parent 282ccc4 commit cd521cf
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 23 deletions.
4 changes: 2 additions & 2 deletions integrations/ragas/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ classifiers = [
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = ["haystack-ai", "ragas"]
dependencies = ["haystack-ai", "ragas>=0.1.11"]

[project.urls]
Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/ragas"
Expand All @@ -41,7 +41,7 @@ root = "../.."
git_describe_command = 'git describe --tags --match="integrations/ragas-v[0-9]*"'

[tool.hatch.envs.default]
dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools", "pytest-asyncio"]
[tool.hatch.envs.default.scripts]
test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
AspectCritique, # type: ignore
ContextPrecision, # type: ignore
ContextRecall, # type: ignore
ContextRelevancy, # type: ignore
ContextUtilization, # type: ignore
Faithfulness, # type: ignore
)
Expand Down Expand Up @@ -81,10 +80,6 @@ class RagasMetric(RagasBaseEnum):
#: Parameters - `name: str, definition: str, strictness: int`
ASPECT_CRITIQUE = "aspect_critique"

#: Context relevancy.\
#: Inputs - `questions: List[str], contexts: List[List[str]]`
CONTEXT_RELEVANCY = "context_relevancy"

#: Answer relevancy.\
#: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]`\
#: Parameters - `strictness: int`
Expand Down Expand Up @@ -329,11 +324,6 @@ def aspect_critique(output: Result, _: RagasMetric, metric_params: Optional[Dict
OutputConverters.aspect_critique,
init_parameters=["name", "definition", "strictness"],
),
RagasMetric.CONTEXT_RELEVANCY: MetricDescriptor.new(
RagasMetric.CONTEXT_RELEVANCY,
ContextRelevancy,
InputConverters.question_context, # type: ignore
),
RagasMetric.ANSWER_RELEVANCY: MetricDescriptor.new(
RagasMetric.ANSWER_RELEVANCY,
AnswerRelevancy,
Expand Down
12 changes: 1 addition & 11 deletions integrations/ragas/tests/test_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def evaluate(self, _, metric: Metric, **kwargs):
RagasMetric.CONTEXT_UTILIZATION: Result(scores=Dataset.from_list([{"context_utilization": 1.0}])),
RagasMetric.CONTEXT_RECALL: Result(scores=Dataset.from_list([{"context_recall": 0.9}])),
RagasMetric.ASPECT_CRITIQUE: Result(scores=Dataset.from_list([{"harmfulness": 1.0}])),
RagasMetric.CONTEXT_RELEVANCY: Result(scores=Dataset.from_list([{"context_relevancy": 1.0}])),
RagasMetric.ANSWER_RELEVANCY: Result(scores=Dataset.from_list([{"answer_relevancy": 0.4}])),
}
assert isinstance(metric, Metric)
Expand All @@ -76,7 +75,6 @@ def evaluate(self, _, metric: Metric, **kwargs):
"large?",
},
),
(RagasMetric.CONTEXT_RELEVANCY, None),
(RagasMetric.ANSWER_RELEVANCY, {"strictness": 2}),
],
)
Expand Down Expand Up @@ -160,7 +158,6 @@ def test_evaluator_serde():
"large?",
},
),
(RagasMetric.CONTEXT_RELEVANCY, {"questions": [], "contexts": []}, None),
(RagasMetric.ANSWER_RELEVANCY, {"questions": [], "contexts": [], "responses": []}, {"strictness": 2}),
],
)
Expand All @@ -177,7 +174,6 @@ def test_evaluator_valid_inputs(current_metric, inputs, params):
@pytest.mark.parametrize(
"current_metric, inputs, error_string, params",
[
(RagasMetric.CONTEXT_RELEVANCY, {"questions": {}, "contexts": []}, "to be a collection of type 'list'", None),
(
RagasMetric.FAITHFULNESS,
{"questions": [1], "contexts": [2], "responses": [3]},
Expand Down Expand Up @@ -256,12 +252,6 @@ def test_evaluator_invalid_inputs(current_metric, inputs, error_string, params):
"large?",
},
),
(
RagasMetric.CONTEXT_RELEVANCY,
{"questions": ["q8"], "contexts": [["c8"]]},
[[(None, 1.0)]],
None,
),
(
RagasMetric.ANSWER_RELEVANCY,
{"questions": ["q9"], "contexts": [["c9"]], "responses": ["r9"]},
Expand Down Expand Up @@ -293,6 +283,7 @@ def test_evaluator_outputs(current_metric, inputs, expected_outputs, metric_para
# This integration test validates the evaluator by running it against the
# OpenAI API. It is parameterized by the metric, the inputs to the evaluator
# and the metric parameters.
@pytest.mark.asyncio
@pytest.mark.skipif("OPENAI_API_KEY" not in os.environ, reason="OPENAI_API_KEY not set")
@pytest.mark.parametrize(
"metric, inputs, metric_params",
Expand Down Expand Up @@ -337,7 +328,6 @@ def test_evaluator_outputs(current_metric, inputs, expected_outputs, metric_para
"large?",
},
),
(RagasMetric.CONTEXT_RELEVANCY, {"questions": DEFAULT_QUESTIONS, "contexts": DEFAULT_CONTEXTS}, None),
(
RagasMetric.ANSWER_RELEVANCY,
{"questions": DEFAULT_QUESTIONS, "contexts": DEFAULT_CONTEXTS, "responses": DEFAULT_RESPONSES},
Expand Down

0 comments on commit cd521cf

Please sign in to comment.