Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor!: Use Secret for API key in UpTrainEvaluator #347

Merged
merged 3 commits into from
Feb 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions integrations/uptrain/example/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from haystack import Pipeline
from haystack_integrations.components.evaluators import UpTrainEvaluator, UpTrainMetric
from haystack.utils import Secret

QUESTIONS = [
"Which is the most popular global sport?",
Expand All @@ -24,13 +25,13 @@
evaluator = UpTrainEvaluator(
metric=UpTrainMetric.FACTUAL_ACCURACY,
api="openai",
api_key_env_var="OPENAI_API_KEY",
api_key=Secret.from_env_var("OPENAI_API_KEY"),
)
pipeline.add_component("evaluator", evaluator)

# Each metric expects a specific set of parameters as input. Refer to the
# UpTrainMetric class' documentation for more details.
output = pipeline.run({"evaluator": {"questions": QUESTIONS, "contexts": CONTEXTS, "responses": RESPONSES}})
results = pipeline.run({"evaluator": {"questions": QUESTIONS, "contexts": CONTEXTS, "responses": RESPONSES}})

for output in output["evaluator"]["results"]:
for output in results["evaluator"]["results"]:
print(output)
8 changes: 4 additions & 4 deletions integrations/uptrain/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,21 @@ name = "uptrain-haystack"
dynamic = ["version"]
description = 'An integration of UpTrain LLM evaluation framework with Haystack'
readme = "README.md"
requires-python = ">=3.7"
requires-python = ">=3.8"
license = "Apache-2.0"
keywords = []
authors = [{ name = "deepset GmbH", email = "[email protected]" }]
classifiers = [
"Development Status :: 4 - Beta",
"Programming Language :: Python",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = ["haystack-ai", "uptrain>=0.5"]
dependencies = ["haystack-ai>=2.0.0b6", "uptrain>=0.5"]

[project.urls]
Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/uptrain"
Expand All @@ -49,7 +48,7 @@ cov-report = ["- coverage combine", "coverage report"]
cov = ["test-cov", "cov-report"]

[[tool.hatch.envs.all.matrix]]
python = ["3.7", "3.8", "3.9", "3.10", "3.11"]
python = ["3.8", "3.9", "3.10", "3.11"]

[tool.hatch.envs.lint]
detached = true
Expand Down Expand Up @@ -113,6 +112,7 @@ ignore = [
# Misc
"S101",
"TID252",
"B008",
]
unfixable = [
# Don't touch unused imports
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .evaluator import UpTrainEvaluator
from .metrics import UpTrainMetric
from .uptrain_evaluator import UpTrainEvaluator
from .uptrain_metrics import UpTrainMetric

__all__ = (
"UpTrainEvaluator",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import json
import os
from typing import Any, Dict, List, Optional, Union

from haystack import DeserializationError, component, default_from_dict, default_to_dict
from haystack_integrations.components.evaluators.metrics import (
from haystack.utils import Secret, deserialize_secrets_inplace
from haystack_integrations.components.evaluators.uptrain_metrics import (
METRIC_DESCRIPTORS,
InputConverters,
OutputConverters,
Expand Down Expand Up @@ -32,7 +32,7 @@ def __init__(
metric_params: Optional[Dict[str, Any]] = None,
*,
api: str = "openai",
api_key_env_var: Optional[str] = "OPENAI_API_KEY",
api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"),
api_params: Optional[Dict[str, Any]] = None,
):
"""
Expand All @@ -46,16 +46,16 @@ def __init__(
The API to use for evaluation.

Supported APIs: "openai", "uptrain".
:param api_key_env_var:
The name of the environment variable containing the API key.
:param api_key:
The API key to use.
:param api_params:
Additional parameters to pass to the API client.
"""
self.metric = metric if isinstance(metric, UpTrainMetric) else UpTrainMetric.from_str(metric)
self.metric_params = metric_params
self.descriptor = METRIC_DESCRIPTORS[self.metric]
self.api = api
self.api_key_env_var = api_key_env_var
self.api_key = api_key
self.api_params = api_params

self._init_backend()
Expand All @@ -73,7 +73,7 @@ def run(self, **inputs) -> Dict[str, Any]:
evaluator = UpTrainEvaluator(
metric=UpTrainMetric.FACTUAL_ACCURACY,
api="openai",
api_key_env_var="OPENAI_API_KEY",
api_key=Secret.from_env_var("OPENAI_API_KEY"),
)
pipeline.add_component("evaluator", evaluator)

Expand Down Expand Up @@ -140,7 +140,7 @@ def check_serializable(obj: Any):
metric=self.metric,
metric_params=self.metric_params,
api=self.api,
api_key_env_var=self.api_key_env_var,
api_key=self.api_key.to_dict(),
api_params=self.api_params,
)

Expand All @@ -152,6 +152,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "UpTrainEvaluator":
:param data:
The dictionary to deserialize from.
"""
deserialize_secrets_inplace(data["init_parameters"], ["api_key"])
return default_from_dict(cls, data)

def _init_backend(self):
Expand Down Expand Up @@ -185,11 +186,8 @@ def _init_backend(self):
msg = f"Unsupported API '{self.api}' for UpTrain evaluator. Supported APIs: {supported_apis}"
raise ValueError(msg)

api_key = os.environ.get(self.api_key_env_var)
if api_key is None:
msg = f"Missing API key environment variable '{self.api_key_env_var}' for UpTrain evaluator"
raise ValueError(msg)

api_key = self.api_key.resolve_value()
assert api_key is not None
if self.api == "openai":
backend_client = EvalLLM(openai_api_key=api_key)
elif self.api == "uptrain":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,18 @@ def validate_outputs(outputs: List[Dict[str, Any]]):
msg = "UpTrain evaluator expects outputs to be a list of `dict`s"
elif not all(isinstance(y, str) for x in outputs for y in x.keys()):
msg = "UpTrain evaluator expects keys in the output dicts to be `str`"
elif not all(isinstance(y, (float, str)) for x in outputs for y in x.values()):
elif not all(
y is None
or isinstance(
y,
(
float,
str,
),
)
for x in outputs
for y in x.values()
):
msg = "UpTrain evaluator expects values in the output dicts to be either `str` or `float`"

if msg is not None:
Expand Down
65 changes: 32 additions & 33 deletions integrations/uptrain/tests/test_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from haystack import DeserializationError

from haystack_integrations.components.evaluators import UpTrainEvaluator, UpTrainMetric
from haystack.utils import Secret

DEFAULT_QUESTIONS = [
"Which is the most popular global sport?",
Expand Down Expand Up @@ -102,43 +103,47 @@ def log_and_evaluate(self, data, checks, **kwargs):
return data


@patch("os.environ.get")
def test_evaluator_api(os_environ_get):
api_key_var = "test-api-key"
os_environ_get.return_value = api_key_var
def test_evaluator_api(monkeypatch):
monkeypatch.setenv("OPENAI_API_KEY", "test-api-key")
monkeypatch.setenv("UPTRAIN_API_KEY", "test-api-key")

eval = UpTrainEvaluator(UpTrainMetric.RESPONSE_COMPLETENESS)
assert eval.api == "openai"
assert eval.api_key_env_var == "OPENAI_API_KEY"
assert eval.api_key == Secret.from_env_var("OPENAI_API_KEY")

eval = UpTrainEvaluator(UpTrainMetric.RESPONSE_COMPLETENESS, api="uptrain", api_key_env_var="UPTRAIN_API_KEY")
eval = UpTrainEvaluator(
UpTrainMetric.RESPONSE_COMPLETENESS, api="uptrain", api_key=Secret.from_env_var("UPTRAIN_API_KEY")
)
assert eval.api == "uptrain"
assert eval.api_key_env_var == "UPTRAIN_API_KEY"
assert eval.api_key == Secret.from_env_var("UPTRAIN_API_KEY")

with pytest.raises(ValueError, match="Unsupported API"):
UpTrainEvaluator(UpTrainMetric.CONTEXT_RELEVANCE, api="cohere")

os_environ_get.return_value = None
with pytest.raises(ValueError, match="Missing API key"):
UpTrainEvaluator(UpTrainMetric.CONTEXT_RELEVANCE, api="uptrain")

with pytest.raises(ValueError, match="None of the following authentication environment variables are set"):
UpTrainEvaluator(UpTrainMetric.CONTEXT_RELEVANCE, api="uptrain", api_key=Secret.from_env_var("asd39920qqq"))

@patch("os.environ.get")
def test_evaluator_metric_init_params(os_environ_get):
api_key = "test-api-key"
os_environ_get.return_value = api_key

eval = UpTrainEvaluator(UpTrainMetric.CRITIQUE_TONE, metric_params={"llm_persona": "village idiot"})
def test_evaluator_metric_init_params():
eval = UpTrainEvaluator(
UpTrainMetric.CRITIQUE_TONE,
metric_params={"llm_persona": "village idiot"},
api_key=Secret.from_token("Aaa"),
)
assert eval._backend_metric.llm_persona == "village idiot"

with pytest.raises(ValueError, match="Invalid init parameters"):
UpTrainEvaluator(UpTrainMetric.CRITIQUE_TONE, metric_params={"role": "village idiot"})
UpTrainEvaluator(
UpTrainMetric.CRITIQUE_TONE, metric_params={"role": "village idiot"}, api_key=Secret.from_token("Aaa")
)

with pytest.raises(ValueError, match="unexpected init parameters"):
UpTrainEvaluator(UpTrainMetric.FACTUAL_ACCURACY, metric_params={"check_numbers": True})
UpTrainEvaluator(
UpTrainMetric.FACTUAL_ACCURACY, metric_params={"check_numbers": True}, api_key=Secret.from_token("Aaa")
)

with pytest.raises(ValueError, match="expected init parameters"):
UpTrainEvaluator(UpTrainMetric.RESPONSE_MATCHING)
UpTrainEvaluator(UpTrainMetric.RESPONSE_MATCHING, api_key=Secret.from_token("Aaa"))


@patch("os.environ.get")
Expand All @@ -149,7 +154,7 @@ def test_evaluator_serde(os_environ_get):
"metric": UpTrainMetric.RESPONSE_MATCHING,
"metric_params": {"method": "rouge"},
"api": "uptrain",
"api_key_env_var": "abacab",
"api_key": Secret.from_env_var("ENV_VAR", strict=False),
"api_params": {"eval_name": "test"},
}
eval = UpTrainEvaluator(**init_params)
Expand All @@ -158,7 +163,7 @@ def test_evaluator_serde(os_environ_get):

assert eval.metric == new_eval.metric
assert eval.api == new_eval.api
assert eval.api_key_env_var == new_eval.api_key_env_var
assert eval.api_key == new_eval.api_key
assert eval.metric_params == new_eval.metric_params
assert eval.api_params == new_eval.api_params
assert type(new_eval._backend_client) == type(eval._backend_client)
Expand Down Expand Up @@ -191,14 +196,12 @@ def test_evaluator_serde(os_environ_get):
(UpTrainMetric.RESPONSE_MATCHING, {"ground_truths": [], "responses": []}, {"method": "llm"}),
],
)
@patch("os.environ.get")
def test_evaluator_valid_inputs(os_environ_get, metric, inputs, params):
os_environ_get.return_value = "abacab"
def test_evaluator_valid_inputs(metric, inputs, params):
init_params = {
"metric": metric,
"metric_params": params,
"api": "uptrain",
"api_key_env_var": "abacab",
"api_key": Secret.from_token("Aaa"),
"api_params": None,
}
eval = UpTrainEvaluator(**init_params)
Expand All @@ -220,15 +223,13 @@ def test_evaluator_valid_inputs(os_environ_get, metric, inputs, params):
(UpTrainMetric.RESPONSE_RELEVANCE, {"responses": []}, "expected input parameter ", None),
],
)
@patch("os.environ.get")
def test_evaluator_invalid_inputs(os_environ_get, metric, inputs, error_string, params):
os_environ_get.return_value = "abacab"
def test_evaluator_invalid_inputs(metric, inputs, error_string, params):
with pytest.raises(ValueError, match=error_string):
init_params = {
"metric": metric,
"metric_params": params,
"api": "uptrain",
"api_key_env_var": "abacab",
"api_key": Secret.from_token("Aaa"),
"api_params": None,
}
eval = UpTrainEvaluator(**init_params)
Expand Down Expand Up @@ -299,14 +300,12 @@ def test_evaluator_invalid_inputs(os_environ_get, metric, inputs, error_string,
),
],
)
@patch("os.environ.get")
def test_evaluator_outputs(os_environ_get, metric, inputs, expected_outputs, metric_params):
os_environ_get.return_value = "abacab"
def test_evaluator_outputs(metric, inputs, expected_outputs, metric_params):
init_params = {
"metric": metric,
"metric_params": metric_params,
"api": "uptrain",
"api_key_env_var": "abacab",
"api_key": Secret.from_token("Aaa"),
"api_params": None,
}
eval = UpTrainEvaluator(**init_params)
Expand Down