From d757f6a1262e974076b287afd73f6709740eb383 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Thu, 9 Jan 2025 16:50:20 +0530 Subject: [PATCH] remove pysbd requirement --- pyproject.toml | 1 - src/ragas/metrics/base.py | 26 +------------------------- src/ragas/prompt/base.py | 10 +--------- src/ragas/prompt/mixin.py | 2 -- src/ragas/prompt/pydantic_prompt.py | 5 +---- src/ragas/utils.py | 4 ---- 6 files changed, 3 insertions(+), 45 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 111927e852..76e57481a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,6 @@ dependencies = [ "appdirs", "pydantic>=2", "openai>1", - "pysbd>=0.3.4", "diskcache>=5.6.3", ] dynamic = ["version", "readme"] diff --git a/src/ragas/metrics/base.py b/src/ragas/metrics/base.py index daf7b8d03a..06e02608aa 100644 --- a/src/ragas/metrics/base.py +++ b/src/ragas/metrics/base.py @@ -9,7 +9,6 @@ from enum import Enum from pydantic import ValidationError -from pysbd import Segmenter from tqdm import tqdm from ragas._analytics import EvaluationEvent, _analytics_batcher @@ -19,12 +18,7 @@ from ragas.losses import BinaryMetricLoss, MSELoss from ragas.prompt import FewShotPydanticPrompt, PromptMixin from ragas.run_config import RunConfig -from ragas.utils import ( - RAGAS_SUPPORTED_LANGUAGE_CODES, - camel_to_snake, - deprecated, - get_metric_language, -) +from ragas.utils import camel_to_snake, deprecated, get_metric_language if t.TYPE_CHECKING: from langchain_core.callbacks import Callbacks @@ -694,22 +688,4 @@ def from_discrete( return verdict_agg -def get_segmenter( - language: str = "english", clean: bool = False, char_span: bool = False -): - """ - Get a sentence segmenter for a given language - """ - language = language.lower() - if language not in RAGAS_SUPPORTED_LANGUAGE_CODES: - raise ValueError( - f"Language '{language}' not supported. Supported languages: {RAGAS_SUPPORTED_LANGUAGE_CODES.keys()}" - ) - return Segmenter( - language=RAGAS_SUPPORTED_LANGUAGE_CODES[language], - clean=clean, - char_span=char_span, - ) - - ensembler = Ensember() diff --git a/src/ragas/prompt/base.py b/src/ragas/prompt/base.py index 36bd60bd2b..6e315a4b56 100644 --- a/src/ragas/prompt/base.py +++ b/src/ragas/prompt/base.py @@ -7,7 +7,7 @@ from langchain_core.prompt_values import StringPromptValue from pydantic import BaseModel -from ragas.utils import RAGAS_SUPPORTED_LANGUAGE_CODES, camel_to_snake +from ragas.utils import camel_to_snake if t.TYPE_CHECKING: from langchain_core.callbacks import Callbacks @@ -17,13 +17,6 @@ logger = logging.getLogger(__name__) -def _check_if_language_is_supported(language: str): - if language not in RAGAS_SUPPORTED_LANGUAGE_CODES: - raise ValueError( - f"Language '{language}' not supported. Supported languages: {RAGAS_SUPPORTED_LANGUAGE_CODES.keys()}" - ) - - class BasePrompt(ABC): def __init__( self, @@ -34,7 +27,6 @@ def __init__( if name is None: self.name = camel_to_snake(self.__class__.__name__) - _check_if_language_is_supported(language) self.language = language self.original_hash = original_hash diff --git a/src/ragas/prompt/mixin.py b/src/ragas/prompt/mixin.py index 66ba13740c..c354a8d9ec 100644 --- a/src/ragas/prompt/mixin.py +++ b/src/ragas/prompt/mixin.py @@ -5,7 +5,6 @@ import os import typing as t -from .base import _check_if_language_is_supported from .pydantic_prompt import PydanticPrompt if t.TYPE_CHECKING: @@ -111,7 +110,6 @@ def load_prompts(self, path: str, language: t.Optional[str] = None): "Language not specified, loading prompts for default language: %s", language, ) - _check_if_language_is_supported(language) loaded_prompts = {} for prompt_name, prompt in self.get_prompts().items(): diff --git a/src/ragas/prompt/pydantic_prompt.py b/src/ragas/prompt/pydantic_prompt.py index 938a53473f..fb14085332 100644 --- a/src/ragas/prompt/pydantic_prompt.py +++ b/src/ragas/prompt/pydantic_prompt.py @@ -15,7 +15,7 @@ from ragas.callbacks import ChainType, new_group from ragas.exceptions import RagasOutputParserException -from .base import BasePrompt, StringIO, _check_if_language_is_supported +from .base import BasePrompt, StringIO from .utils import extract_json, get_all_strings, update_strings if t.TYPE_CHECKING: @@ -227,9 +227,6 @@ async def adapt( Adapt the prompt to a new language. """ - # throws ValueError if language is not supported - _check_if_language_is_supported(target_language) - # set the original hash, this is used to # identify the original prompt object when loading from file if self.original_hash is None: diff --git a/src/ragas/utils.py b/src/ragas/utils.py index 06eb2ee36d..d761e52c02 100644 --- a/src/ragas/utils.py +++ b/src/ragas/utils.py @@ -11,15 +11,11 @@ import numpy as np import tiktoken from datasets import Dataset -from pysbd.languages import LANGUAGE_CODES if t.TYPE_CHECKING: from ragas.metrics.base import Metric DEBUG_ENV_VAR = "RAGAS_DEBUG" -RAGAS_SUPPORTED_LANGUAGE_CODES = { - v.__name__.lower(): k for k, v in LANGUAGE_CODES.items() -} @lru_cache(maxsize=1)