diff --git a/src/ragas/metrics/bleu_score.py b/src/ragas/metrics/_bleu_score.py similarity index 67% rename from src/ragas/metrics/bleu_score.py rename to src/ragas/metrics/_bleu_score.py index 818cf53f42..e28206b538 100644 --- a/src/ragas/metrics/bleu_score.py +++ b/src/ragas/metrics/_bleu_score.py @@ -2,8 +2,6 @@ from dataclasses import dataclass, field from langchain_core.callbacks import Callbacks -from nltk.tokenize import word_tokenize -from nltk.translate.bleu_score import corpus_bleu from ragas.dataset_schema import SingleTurnSample from ragas.metrics._faithfulness import HasSegmentMethod @@ -21,7 +19,16 @@ class BleuScore(SingleTurnMetric): sentence_segmenter: t.Optional[HasSegmentMethod] = None def __post_init__(self): + try: + from nltk.tokenize import word_tokenize + from nltk.translate.bleu_score import corpus_bleu + except ImportError: + raise ImportError( + "nltk is required for bleu score. Please install it using `pip install nltk`" + ) self.segmenter = get_segmenter() + self.word_tokenizer = word_tokenize + self.corpus_bleu = corpus_bleu def init(self, run_config: RunConfig): pass @@ -32,9 +39,11 @@ async def _single_turn_ascore( reference_sentences = self.segmenter.segment(sample.reference) response_sentences = self.segmenter.segment(sample.response) - reference = [[word_tokenize(reference)] for reference in reference_sentences] - response = [word_tokenize(response) for response in response_sentences] - score = corpus_bleu(reference, response, weights=self.weights) + reference = [ + [self.word_tokenizer(reference)] for reference in reference_sentences + ] + response = [self.word_tokenizer(response) for response in response_sentences] + score = self.corpus_bleu(reference, response, weights=self.weights) assert isinstance(score, float), "Expecting a float" return score diff --git a/src/ragas/metrics/_string.py b/src/ragas/metrics/_string.py index 9198d9c7cc..045d510a87 100644 --- a/src/ragas/metrics/_string.py +++ b/src/ragas/metrics/_string.py @@ -3,7 +3,6 @@ from enum import Enum from langchain_core.callbacks import Callbacks -from rapidfuzz import distance from ragas.dataset_schema import SingleTurnSample from ragas.metrics.base import MetricType, SingleTurnMetric @@ -16,13 +15,6 @@ class DistanceMeasure(Enum): JARO = "jaro" -DISTANCE_MEASURE_MAP = { - DistanceMeasure.LEVENSHTEIN: distance.Levenshtein, - DistanceMeasure.HAMMING: distance.Hamming, - DistanceMeasure.JARO: distance.Jaro, -} - - @dataclass class ExactMatch(SingleTurnMetric): name: str = "exact_match" # type: ignore @@ -42,6 +34,7 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float: return await self._single_turn_ascore(SingleTurnSample(**row), callbacks) +@dataclass class StringPresent(SingleTurnMetric): name: str = "string_present" # type: ignore _required_columns: t.Dict[MetricType, t.Set[str]] = field( @@ -64,13 +57,28 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float: return await self._single_turn_ascore(SingleTurnSample(**row), callbacks) -class StringDistance(SingleTurnMetric): - name: str = "string_distance" # type: ignore +@dataclass +class NonLLMStringSimilarity(SingleTurnMetric): + name: str = "non_llm_string_similarity" # type: ignore _required_columns: t.Dict[MetricType, t.Set[str]] = field( default_factory=lambda: {MetricType.SINGLE_TURN: {"reference", "response"}} ) distance_measure: DistanceMeasure = DistanceMeasure.LEVENSHTEIN + def __post_init__(self): + try: + from rapidfuzz import distance + except ImportError: + raise ImportError( + "rapidfuzz is required for string distance. Please install it using `pip install rapidfuzz`" + ) + + self.distance_measure_map = { + DistanceMeasure.LEVENSHTEIN: distance.Levenshtein, + DistanceMeasure.HAMMING: distance.Hamming, + DistanceMeasure.JARO: distance.Jaro, + } + def init(self, run_config: RunConfig): pass @@ -81,7 +89,7 @@ async def _single_turn_ascore( response = sample.response assert isinstance(reference, str), "Expecting a string" assert isinstance(response, str), "Expecting a string" - return 1 - DISTANCE_MEASURE_MAP[self.distance_measure].normalized_distance( + return 1 - self.distance_measure_map[self.distance_measure].normalized_distance( reference, response )