Skip to content

Commit

Permalink
fix: non llm based metrics (#1268)
Browse files Browse the repository at this point in the history
1) rename metrics
2) delay import of optional dependencies
  • Loading branch information
shahules786 authored Sep 11, 2024
1 parent c615a9f commit 3076f50
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
from dataclasses import dataclass, field

from langchain_core.callbacks import Callbacks
from nltk.tokenize import word_tokenize
from nltk.translate.bleu_score import corpus_bleu

from ragas.dataset_schema import SingleTurnSample
from ragas.metrics._faithfulness import HasSegmentMethod
Expand All @@ -21,7 +19,16 @@ class BleuScore(SingleTurnMetric):
sentence_segmenter: t.Optional[HasSegmentMethod] = None

def __post_init__(self):
try:
from nltk.tokenize import word_tokenize
from nltk.translate.bleu_score import corpus_bleu
except ImportError:
raise ImportError(
"nltk is required for bleu score. Please install it using `pip install nltk`"
)
self.segmenter = get_segmenter()
self.word_tokenizer = word_tokenize
self.corpus_bleu = corpus_bleu

def init(self, run_config: RunConfig):
pass
Expand All @@ -32,9 +39,11 @@ async def _single_turn_ascore(
reference_sentences = self.segmenter.segment(sample.reference)
response_sentences = self.segmenter.segment(sample.response)

reference = [[word_tokenize(reference)] for reference in reference_sentences]
response = [word_tokenize(response) for response in response_sentences]
score = corpus_bleu(reference, response, weights=self.weights)
reference = [
[self.word_tokenizer(reference)] for reference in reference_sentences
]
response = [self.word_tokenizer(response) for response in response_sentences]
score = self.corpus_bleu(reference, response, weights=self.weights)
assert isinstance(score, float), "Expecting a float"
return score

Expand Down
30 changes: 19 additions & 11 deletions src/ragas/metrics/_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from enum import Enum

from langchain_core.callbacks import Callbacks
from rapidfuzz import distance

from ragas.dataset_schema import SingleTurnSample
from ragas.metrics.base import MetricType, SingleTurnMetric
Expand All @@ -16,13 +15,6 @@ class DistanceMeasure(Enum):
JARO = "jaro"


DISTANCE_MEASURE_MAP = {
DistanceMeasure.LEVENSHTEIN: distance.Levenshtein,
DistanceMeasure.HAMMING: distance.Hamming,
DistanceMeasure.JARO: distance.Jaro,
}


@dataclass
class ExactMatch(SingleTurnMetric):
name: str = "exact_match" # type: ignore
Expand All @@ -42,6 +34,7 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
return await self._single_turn_ascore(SingleTurnSample(**row), callbacks)


@dataclass
class StringPresent(SingleTurnMetric):
name: str = "string_present" # type: ignore
_required_columns: t.Dict[MetricType, t.Set[str]] = field(
Expand All @@ -64,13 +57,28 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
return await self._single_turn_ascore(SingleTurnSample(**row), callbacks)


class StringDistance(SingleTurnMetric):
name: str = "string_distance" # type: ignore
@dataclass
class NonLLMStringSimilarity(SingleTurnMetric):
name: str = "non_llm_string_similarity" # type: ignore
_required_columns: t.Dict[MetricType, t.Set[str]] = field(
default_factory=lambda: {MetricType.SINGLE_TURN: {"reference", "response"}}
)
distance_measure: DistanceMeasure = DistanceMeasure.LEVENSHTEIN

def __post_init__(self):
try:
from rapidfuzz import distance
except ImportError:
raise ImportError(
"rapidfuzz is required for string distance. Please install it using `pip install rapidfuzz`"
)

self.distance_measure_map = {
DistanceMeasure.LEVENSHTEIN: distance.Levenshtein,
DistanceMeasure.HAMMING: distance.Hamming,
DistanceMeasure.JARO: distance.Jaro,
}

def init(self, run_config: RunConfig):
pass

Expand All @@ -81,7 +89,7 @@ async def _single_turn_ascore(
response = sample.response
assert isinstance(reference, str), "Expecting a string"
assert isinstance(response, str), "Expecting a string"
return 1 - DISTANCE_MEASURE_MAP[self.distance_measure].normalized_distance(
return 1 - self.distance_measure_map[self.distance_measure].normalized_distance(
reference, response
)

Expand Down

0 comments on commit 3076f50

Please sign in to comment.