fix: remove pysbd and sentence segmenting (#1826)

fixes: #1783 #1736
explodinggradients · Jan 9, 2025 · 6478a6e · 6478a6e
1 parent 2a96e6f
commit 6478a6e
Show file tree

Hide file tree

Showing 11 changed files with 107 additions and 265 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -13,7 +13,6 @@ dependencies = [
     "appdirs",
     "pydantic>=2",
     "openai>1",
-    "pysbd>=0.3.4",
     "diskcache>=5.6.3",
 ]
 dynamic = ["version", "readme"]

diff --git a/src/ragas/metrics/_answer_correctness.py b/src/ragas/metrics/_answer_correctness.py
@@ -10,17 +10,16 @@
 from ragas.dataset_schema import SingleTurnSample
 from ragas.metrics._answer_similarity import AnswerSimilarity
 from ragas.metrics._faithfulness import (
-    FaithfulnessStatements,
-    HasSegmentMethod,
-    LongFormAnswerPrompt,
+    StatementGeneratorInput,
+    StatementGeneratorOutput,
+    StatementGeneratorPrompt,
 )
 from ragas.metrics.base import (
     MetricOutputType,
     MetricType,
     MetricWithEmbeddings,
     MetricWithLLM,
     SingleTurnMetric,
-    get_segmenter,
 )
 from ragas.metrics.utils import fbeta_score
 from ragas.prompt import PydanticPrompt
@@ -29,9 +28,6 @@
 if t.TYPE_CHECKING:
     from langchain_core.callbacks import Callbacks
 
-    from ragas.metrics._faithfulness import SentencesSimplified
-
-
 logger = logging.getLogger(__name__)
 
 
@@ -166,13 +162,12 @@ class AnswerCorrectness(MetricWithLLM, MetricWithEmbeddings, SingleTurnMetric):
     )
     output_type = MetricOutputType.CONTINUOUS
     correctness_prompt: PydanticPrompt = field(default_factory=CorrectnessClassifier)
-    long_form_answer_prompt: PydanticPrompt = field(
-        default_factory=LongFormAnswerPrompt
+    statement_generator_prompt: PydanticPrompt = field(
+        default_factory=StatementGeneratorPrompt
     )
     weights: list[float] = field(default_factory=lambda: [0.75, 0.25])
     beta: float = 1.0
     answer_similarity: t.Optional[AnswerSimilarity] = None
-    sentence_segmenter: t.Optional[HasSegmentMethod] = None
     max_retries: int = 1
 
     def __post_init__(self):
@@ -185,10 +180,6 @@ def __post_init__(self):
         if not all([w >= 0 for w in self.weights]):
             raise ValueError("Weights must be non-negative")
 
-        if self.sentence_segmenter is None:
-            language = self.long_form_answer_prompt.language
-            self.sentence_segmenter = get_segmenter(language=language, clean=False)
-
         if type(self.beta) is not float:
             raise ValueError(
                 "Beta must be a float. A beta > 1 gives more weight to recall, while beta < 1 favors precision."
@@ -210,25 +201,17 @@ def _compute_statement_presence(
 
     async def _create_simplified_statements(
         self, question: str, text: str, callbacks: Callbacks
-    ) -> SentencesSimplified:
-        assert self.sentence_segmenter is not None, "sentence_segmenter is not set"
+    ) -> StatementGeneratorOutput:
         assert self.llm is not None, "llm is not set"
 
-        sentences = self.sentence_segmenter.segment(text)
-        sentences_with_index = {
-            i: sentence
-            for i, sentence in enumerate(sentences)
-            if sentence.strip().endswith(".")
-        }
-
-        statements_simplified = await self.long_form_answer_prompt.generate(
+        prompt_input = StatementGeneratorInput(question=question, answer=text)
+        statements = await self.statement_generator_prompt.generate(
             llm=self.llm,
-            data=FaithfulnessStatements(
-                question=question, answer=text, sentences=sentences_with_index
-            ),
+            data=prompt_input,
             callbacks=callbacks,
         )
-        return statements_simplified
+
+        return statements
 
     async def _single_turn_ascore(
         self, sample: SingleTurnSample, callbacks: Callbacks
@@ -244,13 +227,11 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
         question = row["user_input"]
         statements: t.Dict[str, t.List[str]] = {}
         for item in ["response", "reference"]:
-            simplified_statements = await self._create_simplified_statements(
+            statements_x = await self._create_simplified_statements(
                 question, row[item], callbacks
             )
-            _statements_unwrapped = []
-            for component in simplified_statements.sentences:
-                _statements_unwrapped.extend(component.simpler_statements)
-            statements[item] = _statements_unwrapped
+            statements_x = statements_x.statements
+            statements[item] = statements_x
 
         if not all([val == [] for val in statements.values()]):
             ground_truth = [statement for statement in statements["reference"]]

diff --git a/src/ragas/metrics/_bleu_score.py b/src/ragas/metrics/_bleu_score.py
@@ -4,8 +4,7 @@
 from langchain_core.callbacks import Callbacks
 
 from ragas.dataset_schema import SingleTurnSample
-from ragas.metrics._faithfulness import HasSegmentMethod
-from ragas.metrics.base import MetricType, SingleTurnMetric, get_segmenter
+from ragas.metrics.base import MetricType, SingleTurnMetric
 from ragas.run_config import RunConfig
 
 
@@ -15,7 +14,6 @@ class BleuScore(SingleTurnMetric):
     _required_columns: t.Dict[MetricType, t.Set[str]] = field(
         default_factory=lambda: {MetricType.SINGLE_TURN: {"reference", "response"}}
     )
-    sentence_segmenter: t.Optional[HasSegmentMethod] = None
     language: str = "english"
 
     def __post_init__(self):
@@ -25,8 +23,6 @@ def __post_init__(self):
             raise ImportError(
                 "sacrebleu is required for bleu score. Please install it using `pip install sacrebleu`"
             )
-        if not self.sentence_segmenter:
-            self.sentence_segmenter = get_segmenter(language=self.language, clean=False)
         self.corpus_bleu = corpus_bleu
 
     def init(self, run_config: RunConfig):
@@ -35,12 +31,13 @@ def init(self, run_config: RunConfig):
     async def _single_turn_ascore(
         self, sample: SingleTurnSample, callbacks: Callbacks
     ) -> float:
-        assert (
-            self.sentence_segmenter is not None
-        ), "Sentence segmenter is not initialized"
 
-        reference_sentences = self.sentence_segmenter.segment(sample.reference)
-        response_sentences = self.sentence_segmenter.segment(sample.response)
+        reference, response = sample.reference, sample.response
+        assert isinstance(reference, str), "BleuScore expects a valid reference string"
+        assert isinstance(response, str), "BleuScore expects a valid response string"
+
+        reference_sentences = reference.split(". ")
+        response_sentences = response.split(". ")
 
         reference = [[reference] for reference in reference_sentences]
         response = response_sentences

diff --git a/src/ragas/metrics/_factual_correctness.py b/src/ragas/metrics/_factual_correctness.py
@@ -9,17 +9,12 @@
 from numpy.typing import NDArray
 from pydantic import BaseModel, Field
 
-from ragas.metrics._faithfulness import (
-    HasSegmentMethod,
-    NLIStatementInput,
-    NLIStatementPrompt,
-)
+from ragas.metrics._faithfulness import NLIStatementInput, NLIStatementPrompt
 from ragas.metrics.base import (
     MetricOutputType,
     MetricType,
     MetricWithLLM,
     SingleTurnMetric,
-    get_segmenter,
 )
 from ragas.metrics.utils import fbeta_score
 from ragas.prompt import PydanticPrompt
@@ -35,11 +30,10 @@
 
 class ClaimDecompositionInput(BaseModel):
     response: str = Field(..., title="Response")
-    sentences: t.List[str] = Field(..., title="Sentences from response")
 
 
 class ClaimDecompositionOutput(BaseModel):
-    decomposed_claims: t.List[t.List[str]] = Field(..., title="Decomposed Claims")
+    claims: t.List[str] = Field(..., title="Decomposed Claims")
 
 
 # Define an enum for decomposition types
@@ -52,32 +46,25 @@ class DecompositionType(Enum):
 
 # Example input data
 example1_input = ClaimDecompositionInput(
-    response="Charles Babbage was a French mathematician, philosopher, and food critic.",
-    sentences=[
-        "Charles Babbage was a French mathematician, philosopher, and food critic."
-    ],
+    response="Charles Babbage was a French mathematician, philosopher, and food critic."
 )
 
-# Define the examples using the new structure
+# Define the examples using the Pydantic structure
 claim_decomposition_examples = {
     DecompositionType.LOW_ATOMICITY_LOW_COVERAGE: [
         (
             example1_input,
             ClaimDecompositionOutput(
-                decomposed_claims=[
-                    ["Charles Babbage was a mathematician and philosopher."]
-                ]
+                claims=["Charles Babbage was a mathematician and philosopher."]
             ),
         )
     ],
     DecompositionType.LOW_ATOMICITY_HIGH_COVERAGE: [
         (
             example1_input,
             ClaimDecompositionOutput(
-                decomposed_claims=[
-                    [
-                        "Charles Babbage was a French mathematician, philosopher, and food critic."
-                    ]
+                claims=[
+                    "Charles Babbage was a French mathematician, philosopher, and food critic."
                 ]
             ),
         )
@@ -86,9 +73,9 @@ class DecompositionType(Enum):
         (
             example1_input,
             ClaimDecompositionOutput(
-                decomposed_claims=[
-                    ["Charles Babbage was a mathematician."],
-                    ["Charles Babbage was a philosopher."],
+                claims=[
+                    "Charles Babbage was a mathematician.",
+                    "Charles Babbage was a philosopher.",
                 ]
             ),
         )
@@ -97,11 +84,11 @@ class DecompositionType(Enum):
         (
             example1_input,
             ClaimDecompositionOutput(
-                decomposed_claims=[
-                    ["Charles Babbage was a mathematician."],
-                    ["Charles Babbage was a philosopher."],
-                    ["Charles Babbage was a food critic."],
-                    ["Charles Babbage was French."],
+                claims=[
+                    "Charles Babbage was a mathematician.",
+                    "Charles Babbage was a philosopher.",
+                    "Charles Babbage was a food critic.",
+                    "Charles Babbage was French.",
                 ]
             ),
         )
@@ -110,23 +97,17 @@ class DecompositionType(Enum):
 
 # Example input data with two sentences
 example2_input = ClaimDecompositionInput(
-    response="Albert Einstein was a German theoretical physicist. He developed the theory of relativity and also contributed to the development of quantum mechanics.",
-    sentences=[
-        "Albert Einstein was a German theoretical physicist.",
-        "He developed the theory of relativity and also contributed to the development of quantum mechanics.",
-    ],
+    response="Albert Einstein was a German theoretical physicist. He developed the theory of relativity and also contributed to the development of quantum mechanics."
 )
 
 # Adding examples to the dictionary with different decomposition types
 claim_decomposition_examples[DecompositionType.LOW_ATOMICITY_LOW_COVERAGE].append(
     (
         example2_input,
         ClaimDecompositionOutput(
-            decomposed_claims=[
-                ["Albert Einstein was a German physicist."],
-                [
-                    "Albert Einstein developed relativity and contributed to quantum mechanics."
-                ],
+            claims=[
+                "Albert Einstein was a German physicist.",
+                "Albert Einstein developed relativity and contributed to quantum mechanics.",
             ]
         ),
     )
@@ -136,11 +117,9 @@ class DecompositionType(Enum):
     (
         example2_input,
         ClaimDecompositionOutput(
-            decomposed_claims=[
-                ["Albert Einstein was a German theoretical physicist."],
-                [
-                    "Albert Einstein developed the theory of relativity and also contributed to the development of quantum mechanics."
-                ],
+            claims=[
+                "Albert Einstein was a German theoretical physicist.",
+                "Albert Einstein developed the theory of relativity and also contributed to the development of quantum mechanics.",
             ]
         ),
     )
@@ -150,9 +129,9 @@ class DecompositionType(Enum):
     (
         example2_input,
         ClaimDecompositionOutput(
-            decomposed_claims=[
-                ["Albert Einstein was a German theoretical physicist."],
-                ["Albert Einstein developed the theory of relativity."],
+            claims=[
+                "Albert Einstein was a German theoretical physicist.",
+                "Albert Einstein developed the theory of relativity.",
             ]
         ),
     )
@@ -162,12 +141,10 @@ class DecompositionType(Enum):
     (
         example2_input,
         ClaimDecompositionOutput(
-            decomposed_claims=[
-                ["Albert Einstein was a German theoretical physicist."],
-                [
-                    "Albert Einstein developed the theory of relativity.",
-                    "Albert Einstein contributed to the development of quantum mechanics.",
-                ],
+            claims=[
+                "Albert Einstein was a German theoretical physicist.",
+                "Albert Einstein developed the theory of relativity.",
+                "Albert Einstein contributed to the development of quantum mechanics.",
             ]
         ),
     )
@@ -218,7 +195,6 @@ class FactualCorrectness(MetricWithLLM, SingleTurnMetric):
     coverage: t.Literal["low", "high"] = "low"
     claim_decomposition_prompt: PydanticPrompt = ClaimDecompositionPrompt()
     nli_prompt: PydanticPrompt = NLIStatementPrompt()
-    sentence_segmenter: t.Optional[HasSegmentMethod] = None
     language: str = "english"
 
     def __post_init__(self):
@@ -232,8 +208,6 @@ def __post_init__(self):
             logger.warning(
                 f"No examples found for the atomicity and coverage level: {value}"
             )
-        if not self.sentence_segmenter:
-            self.sentence_segmenter = get_segmenter(language=self.language, clean=False)
 
         if type(self.beta) is not float:
             raise ValueError(
@@ -244,20 +218,12 @@ async def decompose_claims(
         self, response: str, callbacks: Callbacks
     ) -> t.List[str]:
         assert self.llm is not None, "LLM must be set"
-        assert (
-            self.sentence_segmenter is not None
-        ), "Sentence segmenter is not initialized"
 
-        sentences = self.sentence_segmenter.segment(response)
-        assert isinstance(sentences, list), "Segmenter must return a list of sentences"
-        prompt_input = ClaimDecompositionInput(response=response, sentences=sentences)
+        prompt_input = ClaimDecompositionInput(response=response)
         result = await self.claim_decomposition_prompt.generate(
             data=prompt_input, llm=self.llm, callbacks=callbacks
         )
-        claims_list = [
-            claim for claim_list in result.decomposed_claims for claim in claim_list
-        ]
-        return claims_list
+        return result.claims
 
     async def verify_claims(
         self, premise: str, hypothesis_list: t.List[str], callbacks: Callbacks