diff --git a/integrations/uptrain/example/example.py b/integrations/uptrain/example/example.py index ea3c8cc7e..fe332548d 100644 --- a/integrations/uptrain/example/example.py +++ b/integrations/uptrain/example/example.py @@ -2,6 +2,7 @@ from haystack import Pipeline from haystack_integrations.components.evaluators import UpTrainEvaluator, UpTrainMetric +from haystack.utils import Secret QUESTIONS = [ "Which is the most popular global sport?", @@ -24,13 +25,13 @@ evaluator = UpTrainEvaluator( metric=UpTrainMetric.FACTUAL_ACCURACY, api="openai", - api_key_env_var="OPENAI_API_KEY", + api_key=Secret.from_env_var("OPENAI_API_KEY"), ) pipeline.add_component("evaluator", evaluator) # Each metric expects a specific set of parameters as input. Refer to the # UpTrainMetric class' documentation for more details. -output = pipeline.run({"evaluator": {"questions": QUESTIONS, "contexts": CONTEXTS, "responses": RESPONSES}}) +results = pipeline.run({"evaluator": {"questions": QUESTIONS, "contexts": CONTEXTS, "responses": RESPONSES}}) -for output in output["evaluator"]["results"]: +for output in results["evaluator"]["results"]: print(output) diff --git a/integrations/uptrain/pyproject.toml b/integrations/uptrain/pyproject.toml index 498772313..d86d12bd1 100644 --- a/integrations/uptrain/pyproject.toml +++ b/integrations/uptrain/pyproject.toml @@ -7,14 +7,13 @@ name = "uptrain-haystack" dynamic = ["version"] description = 'An integration of UpTrain LLM evaluation framework with Haystack' readme = "README.md" -requires-python = ">=3.7" +requires-python = ">=3.8" license = "Apache-2.0" keywords = [] authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }] classifiers = [ "Development Status :: 4 - Beta", "Programming Language :: Python", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", @@ -22,7 +21,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -dependencies = ["haystack-ai", "uptrain>=0.5"] +dependencies = ["haystack-ai>=2.0.0b6", "uptrain>=0.5"] [project.urls] Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/uptrain" @@ -49,7 +48,7 @@ cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] [[tool.hatch.envs.all.matrix]] -python = ["3.7", "3.8", "3.9", "3.10", "3.11"] +python = ["3.8", "3.9", "3.10", "3.11"] [tool.hatch.envs.lint] detached = true @@ -113,6 +112,7 @@ ignore = [ # Misc "S101", "TID252", + "B008", ] unfixable = [ # Don't touch unused imports diff --git a/integrations/uptrain/src/haystack_integrations/components/evaluators/__init__.py b/integrations/uptrain/src/haystack_integrations/components/evaluators/__init__.py index e8366dfc0..b9a3d231f 100644 --- a/integrations/uptrain/src/haystack_integrations/components/evaluators/__init__.py +++ b/integrations/uptrain/src/haystack_integrations/components/evaluators/__init__.py @@ -1,5 +1,5 @@ -from .evaluator import UpTrainEvaluator -from .metrics import UpTrainMetric +from .uptrain_evaluator import UpTrainEvaluator +from .uptrain_metrics import UpTrainMetric __all__ = ( "UpTrainEvaluator", diff --git a/integrations/uptrain/src/haystack_integrations/components/evaluators/evaluator.py b/integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain_evaluator.py similarity index 91% rename from integrations/uptrain/src/haystack_integrations/components/evaluators/evaluator.py rename to integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain_evaluator.py index d0d6a74bd..f9a9b863c 100644 --- a/integrations/uptrain/src/haystack_integrations/components/evaluators/evaluator.py +++ b/integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain_evaluator.py @@ -1,9 +1,9 @@ import json -import os from typing import Any, Dict, List, Optional, Union from haystack import DeserializationError, component, default_from_dict, default_to_dict -from haystack_integrations.components.evaluators.metrics import ( +from haystack.utils import Secret, deserialize_secrets_inplace +from haystack_integrations.components.evaluators.uptrain_metrics import ( METRIC_DESCRIPTORS, InputConverters, OutputConverters, @@ -32,7 +32,7 @@ def __init__( metric_params: Optional[Dict[str, Any]] = None, *, api: str = "openai", - api_key_env_var: Optional[str] = "OPENAI_API_KEY", + api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"), api_params: Optional[Dict[str, Any]] = None, ): """ @@ -46,8 +46,8 @@ def __init__( The API to use for evaluation. Supported APIs: "openai", "uptrain". - :param api_key_env_var: - The name of the environment variable containing the API key. + :param api_key: + The API key to use. :param api_params: Additional parameters to pass to the API client. """ @@ -55,7 +55,7 @@ def __init__( self.metric_params = metric_params self.descriptor = METRIC_DESCRIPTORS[self.metric] self.api = api - self.api_key_env_var = api_key_env_var + self.api_key = api_key self.api_params = api_params self._init_backend() @@ -73,7 +73,7 @@ def run(self, **inputs) -> Dict[str, Any]: evaluator = UpTrainEvaluator( metric=UpTrainMetric.FACTUAL_ACCURACY, api="openai", - api_key_env_var="OPENAI_API_KEY", + api_key=Secret.from_env_var("OPENAI_API_KEY"), ) pipeline.add_component("evaluator", evaluator) @@ -140,7 +140,7 @@ def check_serializable(obj: Any): metric=self.metric, metric_params=self.metric_params, api=self.api, - api_key_env_var=self.api_key_env_var, + api_key=self.api_key.to_dict(), api_params=self.api_params, ) @@ -152,6 +152,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "UpTrainEvaluator": :param data: The dictionary to deserialize from. """ + deserialize_secrets_inplace(data["init_parameters"], ["api_key"]) return default_from_dict(cls, data) def _init_backend(self): @@ -185,11 +186,8 @@ def _init_backend(self): msg = f"Unsupported API '{self.api}' for UpTrain evaluator. Supported APIs: {supported_apis}" raise ValueError(msg) - api_key = os.environ.get(self.api_key_env_var) - if api_key is None: - msg = f"Missing API key environment variable '{self.api_key_env_var}' for UpTrain evaluator" - raise ValueError(msg) - + api_key = self.api_key.resolve_value() + assert api_key is not None if self.api == "openai": backend_client = EvalLLM(openai_api_key=api_key) elif self.api == "uptrain": diff --git a/integrations/uptrain/src/haystack_integrations/components/evaluators/metrics.py b/integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain_metrics.py similarity index 98% rename from integrations/uptrain/src/haystack_integrations/components/evaluators/metrics.py rename to integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain_metrics.py index daf889058..1020f391e 100644 --- a/integrations/uptrain/src/haystack_integrations/components/evaluators/metrics.py +++ b/integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain_metrics.py @@ -257,7 +257,18 @@ def validate_outputs(outputs: List[Dict[str, Any]]): msg = "UpTrain evaluator expects outputs to be a list of `dict`s" elif not all(isinstance(y, str) for x in outputs for y in x.keys()): msg = "UpTrain evaluator expects keys in the output dicts to be `str`" - elif not all(isinstance(y, (float, str)) for x in outputs for y in x.values()): + elif not all( + y is None + or isinstance( + y, + ( + float, + str, + ), + ) + for x in outputs + for y in x.values() + ): msg = "UpTrain evaluator expects values in the output dicts to be either `str` or `float`" if msg is not None: diff --git a/integrations/uptrain/tests/test_evaluator.py b/integrations/uptrain/tests/test_evaluator.py index 0ecb57bd8..829734708 100644 --- a/integrations/uptrain/tests/test_evaluator.py +++ b/integrations/uptrain/tests/test_evaluator.py @@ -8,6 +8,7 @@ from haystack import DeserializationError from haystack_integrations.components.evaluators import UpTrainEvaluator, UpTrainMetric +from haystack.utils import Secret DEFAULT_QUESTIONS = [ "Which is the most popular global sport?", @@ -102,43 +103,47 @@ def log_and_evaluate(self, data, checks, **kwargs): return data -@patch("os.environ.get") -def test_evaluator_api(os_environ_get): - api_key_var = "test-api-key" - os_environ_get.return_value = api_key_var +def test_evaluator_api(monkeypatch): + monkeypatch.setenv("OPENAI_API_KEY", "test-api-key") + monkeypatch.setenv("UPTRAIN_API_KEY", "test-api-key") eval = UpTrainEvaluator(UpTrainMetric.RESPONSE_COMPLETENESS) assert eval.api == "openai" - assert eval.api_key_env_var == "OPENAI_API_KEY" + assert eval.api_key == Secret.from_env_var("OPENAI_API_KEY") - eval = UpTrainEvaluator(UpTrainMetric.RESPONSE_COMPLETENESS, api="uptrain", api_key_env_var="UPTRAIN_API_KEY") + eval = UpTrainEvaluator( + UpTrainMetric.RESPONSE_COMPLETENESS, api="uptrain", api_key=Secret.from_env_var("UPTRAIN_API_KEY") + ) assert eval.api == "uptrain" - assert eval.api_key_env_var == "UPTRAIN_API_KEY" + assert eval.api_key == Secret.from_env_var("UPTRAIN_API_KEY") with pytest.raises(ValueError, match="Unsupported API"): UpTrainEvaluator(UpTrainMetric.CONTEXT_RELEVANCE, api="cohere") - os_environ_get.return_value = None - with pytest.raises(ValueError, match="Missing API key"): - UpTrainEvaluator(UpTrainMetric.CONTEXT_RELEVANCE, api="uptrain") - + with pytest.raises(ValueError, match="None of the following authentication environment variables are set"): + UpTrainEvaluator(UpTrainMetric.CONTEXT_RELEVANCE, api="uptrain", api_key=Secret.from_env_var("asd39920qqq")) -@patch("os.environ.get") -def test_evaluator_metric_init_params(os_environ_get): - api_key = "test-api-key" - os_environ_get.return_value = api_key - eval = UpTrainEvaluator(UpTrainMetric.CRITIQUE_TONE, metric_params={"llm_persona": "village idiot"}) +def test_evaluator_metric_init_params(): + eval = UpTrainEvaluator( + UpTrainMetric.CRITIQUE_TONE, + metric_params={"llm_persona": "village idiot"}, + api_key=Secret.from_token("Aaa"), + ) assert eval._backend_metric.llm_persona == "village idiot" with pytest.raises(ValueError, match="Invalid init parameters"): - UpTrainEvaluator(UpTrainMetric.CRITIQUE_TONE, metric_params={"role": "village idiot"}) + UpTrainEvaluator( + UpTrainMetric.CRITIQUE_TONE, metric_params={"role": "village idiot"}, api_key=Secret.from_token("Aaa") + ) with pytest.raises(ValueError, match="unexpected init parameters"): - UpTrainEvaluator(UpTrainMetric.FACTUAL_ACCURACY, metric_params={"check_numbers": True}) + UpTrainEvaluator( + UpTrainMetric.FACTUAL_ACCURACY, metric_params={"check_numbers": True}, api_key=Secret.from_token("Aaa") + ) with pytest.raises(ValueError, match="expected init parameters"): - UpTrainEvaluator(UpTrainMetric.RESPONSE_MATCHING) + UpTrainEvaluator(UpTrainMetric.RESPONSE_MATCHING, api_key=Secret.from_token("Aaa")) @patch("os.environ.get") @@ -149,7 +154,7 @@ def test_evaluator_serde(os_environ_get): "metric": UpTrainMetric.RESPONSE_MATCHING, "metric_params": {"method": "rouge"}, "api": "uptrain", - "api_key_env_var": "abacab", + "api_key": Secret.from_env_var("ENV_VAR", strict=False), "api_params": {"eval_name": "test"}, } eval = UpTrainEvaluator(**init_params) @@ -158,7 +163,7 @@ def test_evaluator_serde(os_environ_get): assert eval.metric == new_eval.metric assert eval.api == new_eval.api - assert eval.api_key_env_var == new_eval.api_key_env_var + assert eval.api_key == new_eval.api_key assert eval.metric_params == new_eval.metric_params assert eval.api_params == new_eval.api_params assert type(new_eval._backend_client) == type(eval._backend_client) @@ -191,14 +196,12 @@ def test_evaluator_serde(os_environ_get): (UpTrainMetric.RESPONSE_MATCHING, {"ground_truths": [], "responses": []}, {"method": "llm"}), ], ) -@patch("os.environ.get") -def test_evaluator_valid_inputs(os_environ_get, metric, inputs, params): - os_environ_get.return_value = "abacab" +def test_evaluator_valid_inputs(metric, inputs, params): init_params = { "metric": metric, "metric_params": params, "api": "uptrain", - "api_key_env_var": "abacab", + "api_key": Secret.from_token("Aaa"), "api_params": None, } eval = UpTrainEvaluator(**init_params) @@ -220,15 +223,13 @@ def test_evaluator_valid_inputs(os_environ_get, metric, inputs, params): (UpTrainMetric.RESPONSE_RELEVANCE, {"responses": []}, "expected input parameter ", None), ], ) -@patch("os.environ.get") -def test_evaluator_invalid_inputs(os_environ_get, metric, inputs, error_string, params): - os_environ_get.return_value = "abacab" +def test_evaluator_invalid_inputs(metric, inputs, error_string, params): with pytest.raises(ValueError, match=error_string): init_params = { "metric": metric, "metric_params": params, "api": "uptrain", - "api_key_env_var": "abacab", + "api_key": Secret.from_token("Aaa"), "api_params": None, } eval = UpTrainEvaluator(**init_params) @@ -299,14 +300,12 @@ def test_evaluator_invalid_inputs(os_environ_get, metric, inputs, error_string, ), ], ) -@patch("os.environ.get") -def test_evaluator_outputs(os_environ_get, metric, inputs, expected_outputs, metric_params): - os_environ_get.return_value = "abacab" +def test_evaluator_outputs(metric, inputs, expected_outputs, metric_params): init_params = { "metric": metric, "metric_params": metric_params, "api": "uptrain", - "api_key_env_var": "abacab", + "api_key": Secret.from_token("Aaa"), "api_params": None, } eval = UpTrainEvaluator(**init_params)