Skip to content

Commit

Permalink
fix: remove pysbd and sentence segmenting (#1826)
Browse files Browse the repository at this point in the history
fixes: 
#1783
#1736
  • Loading branch information
shahules786 authored Jan 9, 2025
1 parent 2a96e6f commit 6478a6e
Show file tree
Hide file tree
Showing 11 changed files with 107 additions and 265 deletions.
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ dependencies = [
"appdirs",
"pydantic>=2",
"openai>1",
"pysbd>=0.3.4",
"diskcache>=5.6.3",
]
dynamic = ["version", "readme"]
Expand Down
47 changes: 14 additions & 33 deletions src/ragas/metrics/_answer_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,16 @@
from ragas.dataset_schema import SingleTurnSample
from ragas.metrics._answer_similarity import AnswerSimilarity
from ragas.metrics._faithfulness import (
FaithfulnessStatements,
HasSegmentMethod,
LongFormAnswerPrompt,
StatementGeneratorInput,
StatementGeneratorOutput,
StatementGeneratorPrompt,
)
from ragas.metrics.base import (
MetricOutputType,
MetricType,
MetricWithEmbeddings,
MetricWithLLM,
SingleTurnMetric,
get_segmenter,
)
from ragas.metrics.utils import fbeta_score
from ragas.prompt import PydanticPrompt
Expand All @@ -29,9 +28,6 @@
if t.TYPE_CHECKING:
from langchain_core.callbacks import Callbacks

from ragas.metrics._faithfulness import SentencesSimplified


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -166,13 +162,12 @@ class AnswerCorrectness(MetricWithLLM, MetricWithEmbeddings, SingleTurnMetric):
)
output_type = MetricOutputType.CONTINUOUS
correctness_prompt: PydanticPrompt = field(default_factory=CorrectnessClassifier)
long_form_answer_prompt: PydanticPrompt = field(
default_factory=LongFormAnswerPrompt
statement_generator_prompt: PydanticPrompt = field(
default_factory=StatementGeneratorPrompt
)
weights: list[float] = field(default_factory=lambda: [0.75, 0.25])
beta: float = 1.0
answer_similarity: t.Optional[AnswerSimilarity] = None
sentence_segmenter: t.Optional[HasSegmentMethod] = None
max_retries: int = 1

def __post_init__(self):
Expand All @@ -185,10 +180,6 @@ def __post_init__(self):
if not all([w >= 0 for w in self.weights]):
raise ValueError("Weights must be non-negative")

if self.sentence_segmenter is None:
language = self.long_form_answer_prompt.language
self.sentence_segmenter = get_segmenter(language=language, clean=False)

if type(self.beta) is not float:
raise ValueError(
"Beta must be a float. A beta > 1 gives more weight to recall, while beta < 1 favors precision."
Expand All @@ -210,25 +201,17 @@ def _compute_statement_presence(

async def _create_simplified_statements(
self, question: str, text: str, callbacks: Callbacks
) -> SentencesSimplified:
assert self.sentence_segmenter is not None, "sentence_segmenter is not set"
) -> StatementGeneratorOutput:
assert self.llm is not None, "llm is not set"

sentences = self.sentence_segmenter.segment(text)
sentences_with_index = {
i: sentence
for i, sentence in enumerate(sentences)
if sentence.strip().endswith(".")
}

statements_simplified = await self.long_form_answer_prompt.generate(
prompt_input = StatementGeneratorInput(question=question, answer=text)
statements = await self.statement_generator_prompt.generate(
llm=self.llm,
data=FaithfulnessStatements(
question=question, answer=text, sentences=sentences_with_index
),
data=prompt_input,
callbacks=callbacks,
)
return statements_simplified

return statements

async def _single_turn_ascore(
self, sample: SingleTurnSample, callbacks: Callbacks
Expand All @@ -244,13 +227,11 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
question = row["user_input"]
statements: t.Dict[str, t.List[str]] = {}
for item in ["response", "reference"]:
simplified_statements = await self._create_simplified_statements(
statements_x = await self._create_simplified_statements(
question, row[item], callbacks
)
_statements_unwrapped = []
for component in simplified_statements.sentences:
_statements_unwrapped.extend(component.simpler_statements)
statements[item] = _statements_unwrapped
statements_x = statements_x.statements
statements[item] = statements_x

if not all([val == [] for val in statements.values()]):
ground_truth = [statement for statement in statements["reference"]]
Expand Down
17 changes: 7 additions & 10 deletions src/ragas/metrics/_bleu_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
from langchain_core.callbacks import Callbacks

from ragas.dataset_schema import SingleTurnSample
from ragas.metrics._faithfulness import HasSegmentMethod
from ragas.metrics.base import MetricType, SingleTurnMetric, get_segmenter
from ragas.metrics.base import MetricType, SingleTurnMetric
from ragas.run_config import RunConfig


Expand All @@ -15,7 +14,6 @@ class BleuScore(SingleTurnMetric):
_required_columns: t.Dict[MetricType, t.Set[str]] = field(
default_factory=lambda: {MetricType.SINGLE_TURN: {"reference", "response"}}
)
sentence_segmenter: t.Optional[HasSegmentMethod] = None
language: str = "english"

def __post_init__(self):
Expand All @@ -25,8 +23,6 @@ def __post_init__(self):
raise ImportError(
"sacrebleu is required for bleu score. Please install it using `pip install sacrebleu`"
)
if not self.sentence_segmenter:
self.sentence_segmenter = get_segmenter(language=self.language, clean=False)
self.corpus_bleu = corpus_bleu

def init(self, run_config: RunConfig):
Expand All @@ -35,12 +31,13 @@ def init(self, run_config: RunConfig):
async def _single_turn_ascore(
self, sample: SingleTurnSample, callbacks: Callbacks
) -> float:
assert (
self.sentence_segmenter is not None
), "Sentence segmenter is not initialized"

reference_sentences = self.sentence_segmenter.segment(sample.reference)
response_sentences = self.sentence_segmenter.segment(sample.response)
reference, response = sample.reference, sample.response
assert isinstance(reference, str), "BleuScore expects a valid reference string"
assert isinstance(response, str), "BleuScore expects a valid response string"

reference_sentences = reference.split(". ")
response_sentences = response.split(". ")

reference = [[reference] for reference in reference_sentences]
response = response_sentences
Expand Down
96 changes: 31 additions & 65 deletions src/ragas/metrics/_factual_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,12 @@
from numpy.typing import NDArray
from pydantic import BaseModel, Field

from ragas.metrics._faithfulness import (
HasSegmentMethod,
NLIStatementInput,
NLIStatementPrompt,
)
from ragas.metrics._faithfulness import NLIStatementInput, NLIStatementPrompt
from ragas.metrics.base import (
MetricOutputType,
MetricType,
MetricWithLLM,
SingleTurnMetric,
get_segmenter,
)
from ragas.metrics.utils import fbeta_score
from ragas.prompt import PydanticPrompt
Expand All @@ -35,11 +30,10 @@

class ClaimDecompositionInput(BaseModel):
response: str = Field(..., title="Response")
sentences: t.List[str] = Field(..., title="Sentences from response")


class ClaimDecompositionOutput(BaseModel):
decomposed_claims: t.List[t.List[str]] = Field(..., title="Decomposed Claims")
claims: t.List[str] = Field(..., title="Decomposed Claims")


# Define an enum for decomposition types
Expand All @@ -52,32 +46,25 @@ class DecompositionType(Enum):

# Example input data
example1_input = ClaimDecompositionInput(
response="Charles Babbage was a French mathematician, philosopher, and food critic.",
sentences=[
"Charles Babbage was a French mathematician, philosopher, and food critic."
],
response="Charles Babbage was a French mathematician, philosopher, and food critic."
)

# Define the examples using the new structure
# Define the examples using the Pydantic structure
claim_decomposition_examples = {
DecompositionType.LOW_ATOMICITY_LOW_COVERAGE: [
(
example1_input,
ClaimDecompositionOutput(
decomposed_claims=[
["Charles Babbage was a mathematician and philosopher."]
]
claims=["Charles Babbage was a mathematician and philosopher."]
),
)
],
DecompositionType.LOW_ATOMICITY_HIGH_COVERAGE: [
(
example1_input,
ClaimDecompositionOutput(
decomposed_claims=[
[
"Charles Babbage was a French mathematician, philosopher, and food critic."
]
claims=[
"Charles Babbage was a French mathematician, philosopher, and food critic."
]
),
)
Expand All @@ -86,9 +73,9 @@ class DecompositionType(Enum):
(
example1_input,
ClaimDecompositionOutput(
decomposed_claims=[
["Charles Babbage was a mathematician."],
["Charles Babbage was a philosopher."],
claims=[
"Charles Babbage was a mathematician.",
"Charles Babbage was a philosopher.",
]
),
)
Expand All @@ -97,11 +84,11 @@ class DecompositionType(Enum):
(
example1_input,
ClaimDecompositionOutput(
decomposed_claims=[
["Charles Babbage was a mathematician."],
["Charles Babbage was a philosopher."],
["Charles Babbage was a food critic."],
["Charles Babbage was French."],
claims=[
"Charles Babbage was a mathematician.",
"Charles Babbage was a philosopher.",
"Charles Babbage was a food critic.",
"Charles Babbage was French.",
]
),
)
Expand All @@ -110,23 +97,17 @@ class DecompositionType(Enum):

# Example input data with two sentences
example2_input = ClaimDecompositionInput(
response="Albert Einstein was a German theoretical physicist. He developed the theory of relativity and also contributed to the development of quantum mechanics.",
sentences=[
"Albert Einstein was a German theoretical physicist.",
"He developed the theory of relativity and also contributed to the development of quantum mechanics.",
],
response="Albert Einstein was a German theoretical physicist. He developed the theory of relativity and also contributed to the development of quantum mechanics."
)

# Adding examples to the dictionary with different decomposition types
claim_decomposition_examples[DecompositionType.LOW_ATOMICITY_LOW_COVERAGE].append(
(
example2_input,
ClaimDecompositionOutput(
decomposed_claims=[
["Albert Einstein was a German physicist."],
[
"Albert Einstein developed relativity and contributed to quantum mechanics."
],
claims=[
"Albert Einstein was a German physicist.",
"Albert Einstein developed relativity and contributed to quantum mechanics.",
]
),
)
Expand All @@ -136,11 +117,9 @@ class DecompositionType(Enum):
(
example2_input,
ClaimDecompositionOutput(
decomposed_claims=[
["Albert Einstein was a German theoretical physicist."],
[
"Albert Einstein developed the theory of relativity and also contributed to the development of quantum mechanics."
],
claims=[
"Albert Einstein was a German theoretical physicist.",
"Albert Einstein developed the theory of relativity and also contributed to the development of quantum mechanics.",
]
),
)
Expand All @@ -150,9 +129,9 @@ class DecompositionType(Enum):
(
example2_input,
ClaimDecompositionOutput(
decomposed_claims=[
["Albert Einstein was a German theoretical physicist."],
["Albert Einstein developed the theory of relativity."],
claims=[
"Albert Einstein was a German theoretical physicist.",
"Albert Einstein developed the theory of relativity.",
]
),
)
Expand All @@ -162,12 +141,10 @@ class DecompositionType(Enum):
(
example2_input,
ClaimDecompositionOutput(
decomposed_claims=[
["Albert Einstein was a German theoretical physicist."],
[
"Albert Einstein developed the theory of relativity.",
"Albert Einstein contributed to the development of quantum mechanics.",
],
claims=[
"Albert Einstein was a German theoretical physicist.",
"Albert Einstein developed the theory of relativity.",
"Albert Einstein contributed to the development of quantum mechanics.",
]
),
)
Expand Down Expand Up @@ -218,7 +195,6 @@ class FactualCorrectness(MetricWithLLM, SingleTurnMetric):
coverage: t.Literal["low", "high"] = "low"
claim_decomposition_prompt: PydanticPrompt = ClaimDecompositionPrompt()
nli_prompt: PydanticPrompt = NLIStatementPrompt()
sentence_segmenter: t.Optional[HasSegmentMethod] = None
language: str = "english"

def __post_init__(self):
Expand All @@ -232,8 +208,6 @@ def __post_init__(self):
logger.warning(
f"No examples found for the atomicity and coverage level: {value}"
)
if not self.sentence_segmenter:
self.sentence_segmenter = get_segmenter(language=self.language, clean=False)

if type(self.beta) is not float:
raise ValueError(
Expand All @@ -244,20 +218,12 @@ async def decompose_claims(
self, response: str, callbacks: Callbacks
) -> t.List[str]:
assert self.llm is not None, "LLM must be set"
assert (
self.sentence_segmenter is not None
), "Sentence segmenter is not initialized"

sentences = self.sentence_segmenter.segment(response)
assert isinstance(sentences, list), "Segmenter must return a list of sentences"
prompt_input = ClaimDecompositionInput(response=response, sentences=sentences)
prompt_input = ClaimDecompositionInput(response=response)
result = await self.claim_decomposition_prompt.generate(
data=prompt_input, llm=self.llm, callbacks=callbacks
)
claims_list = [
claim for claim_list in result.decomposed_claims for claim in claim_list
]
return claims_list
return result.claims

async def verify_claims(
self, premise: str, hypothesis_list: t.List[str], callbacks: Callbacks
Expand Down
Loading

0 comments on commit 6478a6e

Please sign in to comment.