Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: remove pysbd and sentence segmenting #1826

Merged
merged 9 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ dependencies = [
"appdirs",
"pydantic>=2",
"openai>1",
"pysbd>=0.3.4",
"diskcache>=5.6.3",
]
dynamic = ["version", "readme"]
Expand Down
47 changes: 14 additions & 33 deletions src/ragas/metrics/_answer_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,16 @@
from ragas.dataset_schema import SingleTurnSample
from ragas.metrics._answer_similarity import AnswerSimilarity
from ragas.metrics._faithfulness import (
FaithfulnessStatements,
HasSegmentMethod,
LongFormAnswerPrompt,
StatementGeneratorInput,
StatementGeneratorOutput,
StatementGeneratorPrompt,
)
from ragas.metrics.base import (
MetricOutputType,
MetricType,
MetricWithEmbeddings,
MetricWithLLM,
SingleTurnMetric,
get_segmenter,
)
from ragas.metrics.utils import fbeta_score
from ragas.prompt import PydanticPrompt
Expand All @@ -29,9 +28,6 @@
if t.TYPE_CHECKING:
from langchain_core.callbacks import Callbacks

from ragas.metrics._faithfulness import SentencesSimplified


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -166,13 +162,12 @@ class AnswerCorrectness(MetricWithLLM, MetricWithEmbeddings, SingleTurnMetric):
)
output_type = MetricOutputType.CONTINUOUS
correctness_prompt: PydanticPrompt = field(default_factory=CorrectnessClassifier)
long_form_answer_prompt: PydanticPrompt = field(
default_factory=LongFormAnswerPrompt
statement_generator_prompt: PydanticPrompt = field(
default_factory=StatementGeneratorPrompt
)
weights: list[float] = field(default_factory=lambda: [0.75, 0.25])
beta: float = 1.0
answer_similarity: t.Optional[AnswerSimilarity] = None
sentence_segmenter: t.Optional[HasSegmentMethod] = None
max_retries: int = 1

def __post_init__(self):
Expand All @@ -185,10 +180,6 @@ def __post_init__(self):
if not all([w >= 0 for w in self.weights]):
raise ValueError("Weights must be non-negative")

if self.sentence_segmenter is None:
language = self.long_form_answer_prompt.language
self.sentence_segmenter = get_segmenter(language=language, clean=False)

if type(self.beta) is not float:
raise ValueError(
"Beta must be a float. A beta > 1 gives more weight to recall, while beta < 1 favors precision."
Expand All @@ -210,25 +201,17 @@ def _compute_statement_presence(

async def _create_simplified_statements(
self, question: str, text: str, callbacks: Callbacks
) -> SentencesSimplified:
assert self.sentence_segmenter is not None, "sentence_segmenter is not set"
) -> StatementGeneratorOutput:
assert self.llm is not None, "llm is not set"

sentences = self.sentence_segmenter.segment(text)
sentences_with_index = {
i: sentence
for i, sentence in enumerate(sentences)
if sentence.strip().endswith(".")
}

statements_simplified = await self.long_form_answer_prompt.generate(
prompt_input = StatementGeneratorInput(question=question, answer=text)
statements = await self.statement_generator_prompt.generate(
llm=self.llm,
data=FaithfulnessStatements(
question=question, answer=text, sentences=sentences_with_index
),
data=prompt_input,
callbacks=callbacks,
)
return statements_simplified

return statements

async def _single_turn_ascore(
self, sample: SingleTurnSample, callbacks: Callbacks
Expand All @@ -244,13 +227,11 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
question = row["user_input"]
statements: t.Dict[str, t.List[str]] = {}
for item in ["response", "reference"]:
simplified_statements = await self._create_simplified_statements(
statements_x = await self._create_simplified_statements(
question, row[item], callbacks
)
_statements_unwrapped = []
for component in simplified_statements.sentences:
_statements_unwrapped.extend(component.simpler_statements)
statements[item] = _statements_unwrapped
statements_x = statements_x.statements
statements[item] = statements_x

if not all([val == [] for val in statements.values()]):
ground_truth = [statement for statement in statements["reference"]]
Expand Down
17 changes: 7 additions & 10 deletions src/ragas/metrics/_bleu_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
from langchain_core.callbacks import Callbacks

from ragas.dataset_schema import SingleTurnSample
from ragas.metrics._faithfulness import HasSegmentMethod
from ragas.metrics.base import MetricType, SingleTurnMetric, get_segmenter
from ragas.metrics.base import MetricType, SingleTurnMetric
from ragas.run_config import RunConfig


Expand All @@ -15,7 +14,6 @@ class BleuScore(SingleTurnMetric):
_required_columns: t.Dict[MetricType, t.Set[str]] = field(
default_factory=lambda: {MetricType.SINGLE_TURN: {"reference", "response"}}
)
sentence_segmenter: t.Optional[HasSegmentMethod] = None
language: str = "english"

def __post_init__(self):
Expand All @@ -25,8 +23,6 @@ def __post_init__(self):
raise ImportError(
"sacrebleu is required for bleu score. Please install it using `pip install sacrebleu`"
)
if not self.sentence_segmenter:
self.sentence_segmenter = get_segmenter(language=self.language, clean=False)
self.corpus_bleu = corpus_bleu

def init(self, run_config: RunConfig):
Expand All @@ -35,12 +31,13 @@ def init(self, run_config: RunConfig):
async def _single_turn_ascore(
self, sample: SingleTurnSample, callbacks: Callbacks
) -> float:
assert (
self.sentence_segmenter is not None
), "Sentence segmenter is not initialized"

reference_sentences = self.sentence_segmenter.segment(sample.reference)
response_sentences = self.sentence_segmenter.segment(sample.response)
reference, response = sample.reference, sample.response
assert isinstance(reference, str), "Expecting a string"
assert isinstance(response, str), "Expecting a string"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the fact that we have to use the assert here feels not good. The other problem is the error message, ideally it should tell them what to do about it, not sure if that is easy to define here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have edited the message to make it clear, but this is an API design problem. The metric expects valid response and reference but in SIngleTurnSample they are optional.


reference_sentences = reference.split(". ")
response_sentences = response.split(". ")

reference = [[reference] for reference in reference_sentences]
response = response_sentences
Expand Down
96 changes: 31 additions & 65 deletions src/ragas/metrics/_factual_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,12 @@
from numpy.typing import NDArray
from pydantic import BaseModel, Field

from ragas.metrics._faithfulness import (
HasSegmentMethod,
NLIStatementInput,
NLIStatementPrompt,
)
from ragas.metrics._faithfulness import NLIStatementInput, NLIStatementPrompt
from ragas.metrics.base import (
MetricOutputType,
MetricType,
MetricWithLLM,
SingleTurnMetric,
get_segmenter,
)
from ragas.metrics.utils import fbeta_score
from ragas.prompt import PydanticPrompt
Expand All @@ -35,11 +30,10 @@

class ClaimDecompositionInput(BaseModel):
response: str = Field(..., title="Response")
sentences: t.List[str] = Field(..., title="Sentences from response")


class ClaimDecompositionOutput(BaseModel):
decomposed_claims: t.List[t.List[str]] = Field(..., title="Decomposed Claims")
claims: t.List[str] = Field(..., title="Decomposed Claims")


# Define an enum for decomposition types
Expand All @@ -52,32 +46,25 @@ class DecompositionType(Enum):

# Example input data
example1_input = ClaimDecompositionInput(
response="Charles Babbage was a French mathematician, philosopher, and food critic.",
sentences=[
"Charles Babbage was a French mathematician, philosopher, and food critic."
],
response="Charles Babbage was a French mathematician, philosopher, and food critic."
)

# Define the examples using the new structure
# Define the examples using the Pydantic structure
claim_decomposition_examples = {
DecompositionType.LOW_ATOMICITY_LOW_COVERAGE: [
(
example1_input,
ClaimDecompositionOutput(
decomposed_claims=[
["Charles Babbage was a mathematician and philosopher."]
]
claims=["Charles Babbage was a mathematician and philosopher."]
),
)
],
DecompositionType.LOW_ATOMICITY_HIGH_COVERAGE: [
(
example1_input,
ClaimDecompositionOutput(
decomposed_claims=[
[
"Charles Babbage was a French mathematician, philosopher, and food critic."
]
claims=[
"Charles Babbage was a French mathematician, philosopher, and food critic."
]
),
)
Expand All @@ -86,9 +73,9 @@ class DecompositionType(Enum):
(
example1_input,
ClaimDecompositionOutput(
decomposed_claims=[
["Charles Babbage was a mathematician."],
["Charles Babbage was a philosopher."],
claims=[
"Charles Babbage was a mathematician.",
"Charles Babbage was a philosopher.",
]
),
)
Expand All @@ -97,11 +84,11 @@ class DecompositionType(Enum):
(
example1_input,
ClaimDecompositionOutput(
decomposed_claims=[
["Charles Babbage was a mathematician."],
["Charles Babbage was a philosopher."],
["Charles Babbage was a food critic."],
["Charles Babbage was French."],
claims=[
"Charles Babbage was a mathematician.",
"Charles Babbage was a philosopher.",
"Charles Babbage was a food critic.",
"Charles Babbage was French.",
]
),
)
Expand All @@ -110,23 +97,17 @@ class DecompositionType(Enum):

# Example input data with two sentences
example2_input = ClaimDecompositionInput(
response="Albert Einstein was a German theoretical physicist. He developed the theory of relativity and also contributed to the development of quantum mechanics.",
sentences=[
"Albert Einstein was a German theoretical physicist.",
"He developed the theory of relativity and also contributed to the development of quantum mechanics.",
],
response="Albert Einstein was a German theoretical physicist. He developed the theory of relativity and also contributed to the development of quantum mechanics."
)

# Adding examples to the dictionary with different decomposition types
claim_decomposition_examples[DecompositionType.LOW_ATOMICITY_LOW_COVERAGE].append(
(
example2_input,
ClaimDecompositionOutput(
decomposed_claims=[
["Albert Einstein was a German physicist."],
[
"Albert Einstein developed relativity and contributed to quantum mechanics."
],
claims=[
"Albert Einstein was a German physicist.",
"Albert Einstein developed relativity and contributed to quantum mechanics.",
]
),
)
Expand All @@ -136,11 +117,9 @@ class DecompositionType(Enum):
(
example2_input,
ClaimDecompositionOutput(
decomposed_claims=[
["Albert Einstein was a German theoretical physicist."],
[
"Albert Einstein developed the theory of relativity and also contributed to the development of quantum mechanics."
],
claims=[
"Albert Einstein was a German theoretical physicist.",
"Albert Einstein developed the theory of relativity and also contributed to the development of quantum mechanics.",
]
),
)
Expand All @@ -150,9 +129,9 @@ class DecompositionType(Enum):
(
example2_input,
ClaimDecompositionOutput(
decomposed_claims=[
["Albert Einstein was a German theoretical physicist."],
["Albert Einstein developed the theory of relativity."],
claims=[
"Albert Einstein was a German theoretical physicist.",
"Albert Einstein developed the theory of relativity.",
]
),
)
Expand All @@ -162,12 +141,10 @@ class DecompositionType(Enum):
(
example2_input,
ClaimDecompositionOutput(
decomposed_claims=[
["Albert Einstein was a German theoretical physicist."],
[
"Albert Einstein developed the theory of relativity.",
"Albert Einstein contributed to the development of quantum mechanics.",
],
claims=[
"Albert Einstein was a German theoretical physicist.",
"Albert Einstein developed the theory of relativity.",
"Albert Einstein contributed to the development of quantum mechanics.",
]
),
)
Expand Down Expand Up @@ -218,7 +195,6 @@ class FactualCorrectness(MetricWithLLM, SingleTurnMetric):
coverage: t.Literal["low", "high"] = "low"
claim_decomposition_prompt: PydanticPrompt = ClaimDecompositionPrompt()
nli_prompt: PydanticPrompt = NLIStatementPrompt()
sentence_segmenter: t.Optional[HasSegmentMethod] = None
language: str = "english"

def __post_init__(self):
Expand All @@ -232,8 +208,6 @@ def __post_init__(self):
logger.warning(
f"No examples found for the atomicity and coverage level: {value}"
)
if not self.sentence_segmenter:
self.sentence_segmenter = get_segmenter(language=self.language, clean=False)

if type(self.beta) is not float:
raise ValueError(
Expand All @@ -244,20 +218,12 @@ async def decompose_claims(
self, response: str, callbacks: Callbacks
) -> t.List[str]:
assert self.llm is not None, "LLM must be set"
assert (
self.sentence_segmenter is not None
), "Sentence segmenter is not initialized"

sentences = self.sentence_segmenter.segment(response)
assert isinstance(sentences, list), "Segmenter must return a list of sentences"
prompt_input = ClaimDecompositionInput(response=response, sentences=sentences)
prompt_input = ClaimDecompositionInput(response=response)
result = await self.claim_decomposition_prompt.generate(
data=prompt_input, llm=self.llm, callbacks=callbacks
)
claims_list = [
claim for claim_list in result.decomposed_claims for claim in claim_list
]
return claims_list
return result.claims

async def verify_claims(
self, premise: str, hypothesis_list: t.List[str], callbacks: Callbacks
Expand Down
Loading
Loading