From 1fbad9a045546f7fe1ef9701fbfa933615076bfc Mon Sep 17 00:00:00 2001 From: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> Date: Wed, 7 Feb 2024 08:15:21 +0100 Subject: [PATCH 1/7] Add script and workflow to label PRs that edit docstrings (#344) --- .github/utils/docstrings_checksum.py | 45 +++++++++++++++++ .github/workflows/CI_docstring_labeler.yml | 56 ++++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 .github/utils/docstrings_checksum.py create mode 100644 .github/workflows/CI_docstring_labeler.yml diff --git a/.github/utils/docstrings_checksum.py b/.github/utils/docstrings_checksum.py new file mode 100644 index 000000000..6789d3bcd --- /dev/null +++ b/.github/utils/docstrings_checksum.py @@ -0,0 +1,45 @@ +import ast +import hashlib +from pathlib import Path +from typing import Iterator + + +def docstrings_checksum(python_files: Iterator[Path]): + files_content = (f.read_text() for f in python_files) + trees = (ast.parse(c) for c in files_content) + + # Get all docstrings from async functions, functions, + # classes and modules definitions + docstrings = [] + for tree in trees: + for node in ast.walk(tree): + if not isinstance( + node, (ast.AsyncFunctionDef, ast.FunctionDef, ast.ClassDef, ast.Module) + ): + # Skip all node types that can't have docstrings to prevent failures + continue + docstring = ast.get_docstring(node) + if docstring: + docstrings.append(docstring) + + # Sort them to be safe, since ast.walk() returns + # nodes in no specified order. + # See https://docs.python.org/3/library/ast.html#ast.walk + docstrings.sort() + + return hashlib.md5(str(docstrings).encode("utf-8")).hexdigest() + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--root", help="Project root folder", required=True, type=Path) + args = parser.parse_args() + + # Get all Python files + root: Path = args.root.absolute() + python_files = root.glob("integrations/**/*.py") + + md5 = docstrings_checksum(python_files) + print(md5) diff --git a/.github/workflows/CI_docstring_labeler.yml b/.github/workflows/CI_docstring_labeler.yml new file mode 100644 index 000000000..151bdb321 --- /dev/null +++ b/.github/workflows/CI_docstring_labeler.yml @@ -0,0 +1,56 @@ +name: Core / Add label on docstrings edit + +on: + pull_request_target: + paths: + - "integrations/**/*.py" + +jobs: + label: + runs-on: ubuntu-latest + + steps: + - name: Checkout base commit + uses: actions/checkout@v4 + with: + ref: ${{ github.base_ref }} + + - name: Copy file + # We copy our script after base ref checkout so we keep executing + # the same version even after checking out the HEAD ref. + # This is done to prevent executing malicious code in forks' PRs. + run: cp .github/utils/docstrings_checksum.py "${{ runner.temp }}/docstrings_checksum.py" + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Get docstrings + id: base-docstrings + run: | + CHECKSUM=$(python "${{ runner.temp }}/docstrings_checksum.py" --root "${{ github.workspace }}") + echo "checksum=$CHECKSUM" >> "$GITHUB_OUTPUT" + + - name: Checkout HEAD commit + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.ref }} + # This must be set to correctly checkout a fork + repository: ${{ github.event.pull_request.head.repo.full_name }} + + - name: Get docstrings + id: head-docstrings + run: | + CHECKSUM=$(python "${{ runner.temp }}/docstrings_checksum.py" --root "${{ github.workspace }}") + echo "checksum=$CHECKSUM" >> "$GITHUB_OUTPUT" + + - name: Check if we should label + id: run-check + run: echo "should_run=${{ steps.base-docstrings.outputs.checksum != steps.head-docstrings.outputs.checksum }}" >> "$GITHUB_OUTPUT" + + - name: Add label + if: ${{ steps.run-check.outputs.should_run == 'true' }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: gh pr edit ${{ github.event.pull_request.html_url }} --add-label "type:documentation" From 593af1ff599b53f3b2009d394052ea60a5611b06 Mon Sep 17 00:00:00 2001 From: Madeesh Kannan Date: Wed, 7 Feb 2024 10:28:28 +0100 Subject: [PATCH 2/7] refactor!: Use `Secret` for API key in `UpTrainEvaluator` (#347) * refactor!: Use `Secret` for API key in `UpTrainEvaluator` Some other misc build fixes * Disambiguate module names * `mypy` fixes --- integrations/uptrain/example/example.py | 7 +- integrations/uptrain/pyproject.toml | 8 +-- .../components/evaluators/__init__.py | 4 +- .../{evaluator.py => uptrain_evaluator.py} | 24 ++++--- .../{metrics.py => uptrain_metrics.py} | 13 +++- integrations/uptrain/tests/test_evaluator.py | 65 +++++++++---------- 6 files changed, 65 insertions(+), 56 deletions(-) rename integrations/uptrain/src/haystack_integrations/components/evaluators/{evaluator.py => uptrain_evaluator.py} (91%) rename integrations/uptrain/src/haystack_integrations/components/evaluators/{metrics.py => uptrain_metrics.py} (98%) diff --git a/integrations/uptrain/example/example.py b/integrations/uptrain/example/example.py index ea3c8cc7e..fe332548d 100644 --- a/integrations/uptrain/example/example.py +++ b/integrations/uptrain/example/example.py @@ -2,6 +2,7 @@ from haystack import Pipeline from haystack_integrations.components.evaluators import UpTrainEvaluator, UpTrainMetric +from haystack.utils import Secret QUESTIONS = [ "Which is the most popular global sport?", @@ -24,13 +25,13 @@ evaluator = UpTrainEvaluator( metric=UpTrainMetric.FACTUAL_ACCURACY, api="openai", - api_key_env_var="OPENAI_API_KEY", + api_key=Secret.from_env_var("OPENAI_API_KEY"), ) pipeline.add_component("evaluator", evaluator) # Each metric expects a specific set of parameters as input. Refer to the # UpTrainMetric class' documentation for more details. -output = pipeline.run({"evaluator": {"questions": QUESTIONS, "contexts": CONTEXTS, "responses": RESPONSES}}) +results = pipeline.run({"evaluator": {"questions": QUESTIONS, "contexts": CONTEXTS, "responses": RESPONSES}}) -for output in output["evaluator"]["results"]: +for output in results["evaluator"]["results"]: print(output) diff --git a/integrations/uptrain/pyproject.toml b/integrations/uptrain/pyproject.toml index 498772313..d86d12bd1 100644 --- a/integrations/uptrain/pyproject.toml +++ b/integrations/uptrain/pyproject.toml @@ -7,14 +7,13 @@ name = "uptrain-haystack" dynamic = ["version"] description = 'An integration of UpTrain LLM evaluation framework with Haystack' readme = "README.md" -requires-python = ">=3.7" +requires-python = ">=3.8" license = "Apache-2.0" keywords = [] authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }] classifiers = [ "Development Status :: 4 - Beta", "Programming Language :: Python", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", @@ -22,7 +21,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -dependencies = ["haystack-ai", "uptrain>=0.5"] +dependencies = ["haystack-ai>=2.0.0b6", "uptrain>=0.5"] [project.urls] Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/uptrain" @@ -49,7 +48,7 @@ cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] [[tool.hatch.envs.all.matrix]] -python = ["3.7", "3.8", "3.9", "3.10", "3.11"] +python = ["3.8", "3.9", "3.10", "3.11"] [tool.hatch.envs.lint] detached = true @@ -113,6 +112,7 @@ ignore = [ # Misc "S101", "TID252", + "B008", ] unfixable = [ # Don't touch unused imports diff --git a/integrations/uptrain/src/haystack_integrations/components/evaluators/__init__.py b/integrations/uptrain/src/haystack_integrations/components/evaluators/__init__.py index e8366dfc0..b9a3d231f 100644 --- a/integrations/uptrain/src/haystack_integrations/components/evaluators/__init__.py +++ b/integrations/uptrain/src/haystack_integrations/components/evaluators/__init__.py @@ -1,5 +1,5 @@ -from .evaluator import UpTrainEvaluator -from .metrics import UpTrainMetric +from .uptrain_evaluator import UpTrainEvaluator +from .uptrain_metrics import UpTrainMetric __all__ = ( "UpTrainEvaluator", diff --git a/integrations/uptrain/src/haystack_integrations/components/evaluators/evaluator.py b/integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain_evaluator.py similarity index 91% rename from integrations/uptrain/src/haystack_integrations/components/evaluators/evaluator.py rename to integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain_evaluator.py index d0d6a74bd..f9a9b863c 100644 --- a/integrations/uptrain/src/haystack_integrations/components/evaluators/evaluator.py +++ b/integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain_evaluator.py @@ -1,9 +1,9 @@ import json -import os from typing import Any, Dict, List, Optional, Union from haystack import DeserializationError, component, default_from_dict, default_to_dict -from haystack_integrations.components.evaluators.metrics import ( +from haystack.utils import Secret, deserialize_secrets_inplace +from haystack_integrations.components.evaluators.uptrain_metrics import ( METRIC_DESCRIPTORS, InputConverters, OutputConverters, @@ -32,7 +32,7 @@ def __init__( metric_params: Optional[Dict[str, Any]] = None, *, api: str = "openai", - api_key_env_var: Optional[str] = "OPENAI_API_KEY", + api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"), api_params: Optional[Dict[str, Any]] = None, ): """ @@ -46,8 +46,8 @@ def __init__( The API to use for evaluation. Supported APIs: "openai", "uptrain". - :param api_key_env_var: - The name of the environment variable containing the API key. + :param api_key: + The API key to use. :param api_params: Additional parameters to pass to the API client. """ @@ -55,7 +55,7 @@ def __init__( self.metric_params = metric_params self.descriptor = METRIC_DESCRIPTORS[self.metric] self.api = api - self.api_key_env_var = api_key_env_var + self.api_key = api_key self.api_params = api_params self._init_backend() @@ -73,7 +73,7 @@ def run(self, **inputs) -> Dict[str, Any]: evaluator = UpTrainEvaluator( metric=UpTrainMetric.FACTUAL_ACCURACY, api="openai", - api_key_env_var="OPENAI_API_KEY", + api_key=Secret.from_env_var("OPENAI_API_KEY"), ) pipeline.add_component("evaluator", evaluator) @@ -140,7 +140,7 @@ def check_serializable(obj: Any): metric=self.metric, metric_params=self.metric_params, api=self.api, - api_key_env_var=self.api_key_env_var, + api_key=self.api_key.to_dict(), api_params=self.api_params, ) @@ -152,6 +152,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "UpTrainEvaluator": :param data: The dictionary to deserialize from. """ + deserialize_secrets_inplace(data["init_parameters"], ["api_key"]) return default_from_dict(cls, data) def _init_backend(self): @@ -185,11 +186,8 @@ def _init_backend(self): msg = f"Unsupported API '{self.api}' for UpTrain evaluator. Supported APIs: {supported_apis}" raise ValueError(msg) - api_key = os.environ.get(self.api_key_env_var) - if api_key is None: - msg = f"Missing API key environment variable '{self.api_key_env_var}' for UpTrain evaluator" - raise ValueError(msg) - + api_key = self.api_key.resolve_value() + assert api_key is not None if self.api == "openai": backend_client = EvalLLM(openai_api_key=api_key) elif self.api == "uptrain": diff --git a/integrations/uptrain/src/haystack_integrations/components/evaluators/metrics.py b/integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain_metrics.py similarity index 98% rename from integrations/uptrain/src/haystack_integrations/components/evaluators/metrics.py rename to integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain_metrics.py index daf889058..1020f391e 100644 --- a/integrations/uptrain/src/haystack_integrations/components/evaluators/metrics.py +++ b/integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain_metrics.py @@ -257,7 +257,18 @@ def validate_outputs(outputs: List[Dict[str, Any]]): msg = "UpTrain evaluator expects outputs to be a list of `dict`s" elif not all(isinstance(y, str) for x in outputs for y in x.keys()): msg = "UpTrain evaluator expects keys in the output dicts to be `str`" - elif not all(isinstance(y, (float, str)) for x in outputs for y in x.values()): + elif not all( + y is None + or isinstance( + y, + ( + float, + str, + ), + ) + for x in outputs + for y in x.values() + ): msg = "UpTrain evaluator expects values in the output dicts to be either `str` or `float`" if msg is not None: diff --git a/integrations/uptrain/tests/test_evaluator.py b/integrations/uptrain/tests/test_evaluator.py index 0ecb57bd8..829734708 100644 --- a/integrations/uptrain/tests/test_evaluator.py +++ b/integrations/uptrain/tests/test_evaluator.py @@ -8,6 +8,7 @@ from haystack import DeserializationError from haystack_integrations.components.evaluators import UpTrainEvaluator, UpTrainMetric +from haystack.utils import Secret DEFAULT_QUESTIONS = [ "Which is the most popular global sport?", @@ -102,43 +103,47 @@ def log_and_evaluate(self, data, checks, **kwargs): return data -@patch("os.environ.get") -def test_evaluator_api(os_environ_get): - api_key_var = "test-api-key" - os_environ_get.return_value = api_key_var +def test_evaluator_api(monkeypatch): + monkeypatch.setenv("OPENAI_API_KEY", "test-api-key") + monkeypatch.setenv("UPTRAIN_API_KEY", "test-api-key") eval = UpTrainEvaluator(UpTrainMetric.RESPONSE_COMPLETENESS) assert eval.api == "openai" - assert eval.api_key_env_var == "OPENAI_API_KEY" + assert eval.api_key == Secret.from_env_var("OPENAI_API_KEY") - eval = UpTrainEvaluator(UpTrainMetric.RESPONSE_COMPLETENESS, api="uptrain", api_key_env_var="UPTRAIN_API_KEY") + eval = UpTrainEvaluator( + UpTrainMetric.RESPONSE_COMPLETENESS, api="uptrain", api_key=Secret.from_env_var("UPTRAIN_API_KEY") + ) assert eval.api == "uptrain" - assert eval.api_key_env_var == "UPTRAIN_API_KEY" + assert eval.api_key == Secret.from_env_var("UPTRAIN_API_KEY") with pytest.raises(ValueError, match="Unsupported API"): UpTrainEvaluator(UpTrainMetric.CONTEXT_RELEVANCE, api="cohere") - os_environ_get.return_value = None - with pytest.raises(ValueError, match="Missing API key"): - UpTrainEvaluator(UpTrainMetric.CONTEXT_RELEVANCE, api="uptrain") - + with pytest.raises(ValueError, match="None of the following authentication environment variables are set"): + UpTrainEvaluator(UpTrainMetric.CONTEXT_RELEVANCE, api="uptrain", api_key=Secret.from_env_var("asd39920qqq")) -@patch("os.environ.get") -def test_evaluator_metric_init_params(os_environ_get): - api_key = "test-api-key" - os_environ_get.return_value = api_key - eval = UpTrainEvaluator(UpTrainMetric.CRITIQUE_TONE, metric_params={"llm_persona": "village idiot"}) +def test_evaluator_metric_init_params(): + eval = UpTrainEvaluator( + UpTrainMetric.CRITIQUE_TONE, + metric_params={"llm_persona": "village idiot"}, + api_key=Secret.from_token("Aaa"), + ) assert eval._backend_metric.llm_persona == "village idiot" with pytest.raises(ValueError, match="Invalid init parameters"): - UpTrainEvaluator(UpTrainMetric.CRITIQUE_TONE, metric_params={"role": "village idiot"}) + UpTrainEvaluator( + UpTrainMetric.CRITIQUE_TONE, metric_params={"role": "village idiot"}, api_key=Secret.from_token("Aaa") + ) with pytest.raises(ValueError, match="unexpected init parameters"): - UpTrainEvaluator(UpTrainMetric.FACTUAL_ACCURACY, metric_params={"check_numbers": True}) + UpTrainEvaluator( + UpTrainMetric.FACTUAL_ACCURACY, metric_params={"check_numbers": True}, api_key=Secret.from_token("Aaa") + ) with pytest.raises(ValueError, match="expected init parameters"): - UpTrainEvaluator(UpTrainMetric.RESPONSE_MATCHING) + UpTrainEvaluator(UpTrainMetric.RESPONSE_MATCHING, api_key=Secret.from_token("Aaa")) @patch("os.environ.get") @@ -149,7 +154,7 @@ def test_evaluator_serde(os_environ_get): "metric": UpTrainMetric.RESPONSE_MATCHING, "metric_params": {"method": "rouge"}, "api": "uptrain", - "api_key_env_var": "abacab", + "api_key": Secret.from_env_var("ENV_VAR", strict=False), "api_params": {"eval_name": "test"}, } eval = UpTrainEvaluator(**init_params) @@ -158,7 +163,7 @@ def test_evaluator_serde(os_environ_get): assert eval.metric == new_eval.metric assert eval.api == new_eval.api - assert eval.api_key_env_var == new_eval.api_key_env_var + assert eval.api_key == new_eval.api_key assert eval.metric_params == new_eval.metric_params assert eval.api_params == new_eval.api_params assert type(new_eval._backend_client) == type(eval._backend_client) @@ -191,14 +196,12 @@ def test_evaluator_serde(os_environ_get): (UpTrainMetric.RESPONSE_MATCHING, {"ground_truths": [], "responses": []}, {"method": "llm"}), ], ) -@patch("os.environ.get") -def test_evaluator_valid_inputs(os_environ_get, metric, inputs, params): - os_environ_get.return_value = "abacab" +def test_evaluator_valid_inputs(metric, inputs, params): init_params = { "metric": metric, "metric_params": params, "api": "uptrain", - "api_key_env_var": "abacab", + "api_key": Secret.from_token("Aaa"), "api_params": None, } eval = UpTrainEvaluator(**init_params) @@ -220,15 +223,13 @@ def test_evaluator_valid_inputs(os_environ_get, metric, inputs, params): (UpTrainMetric.RESPONSE_RELEVANCE, {"responses": []}, "expected input parameter ", None), ], ) -@patch("os.environ.get") -def test_evaluator_invalid_inputs(os_environ_get, metric, inputs, error_string, params): - os_environ_get.return_value = "abacab" +def test_evaluator_invalid_inputs(metric, inputs, error_string, params): with pytest.raises(ValueError, match=error_string): init_params = { "metric": metric, "metric_params": params, "api": "uptrain", - "api_key_env_var": "abacab", + "api_key": Secret.from_token("Aaa"), "api_params": None, } eval = UpTrainEvaluator(**init_params) @@ -299,14 +300,12 @@ def test_evaluator_invalid_inputs(os_environ_get, metric, inputs, error_string, ), ], ) -@patch("os.environ.get") -def test_evaluator_outputs(os_environ_get, metric, inputs, expected_outputs, metric_params): - os_environ_get.return_value = "abacab" +def test_evaluator_outputs(metric, inputs, expected_outputs, metric_params): init_params = { "metric": metric, "metric_params": metric_params, "api": "uptrain", - "api_key_env_var": "abacab", + "api_key": Secret.from_token("Aaa"), "api_params": None, } eval = UpTrainEvaluator(**init_params) From 30164b8f759f1610b4002b45af2acf2ebcadbc0f Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Wed, 7 Feb 2024 13:11:01 +0100 Subject: [PATCH 3/7] feat: Generate weaviate API docs (#351) * Generate weaviate API docs * update order --------- Co-authored-by: Daria Fokina --- .github/workflows/weaviate.yml | 4 ++++ integrations/weaviate/pydoc/config.yml | 28 ++++++++++++++++++++++++++ integrations/weaviate/pyproject.toml | 4 ++++ 3 files changed, 36 insertions(+) create mode 100644 integrations/weaviate/pydoc/config.yml diff --git a/.github/workflows/weaviate.yml b/.github/workflows/weaviate.yml index 03cbd45a5..051415336 100644 --- a/.github/workflows/weaviate.yml +++ b/.github/workflows/weaviate.yml @@ -49,5 +49,9 @@ jobs: - name: Run Weaviate container run: docker-compose up -d + - name: Generate docs + if: matrix.python-version == '3.9' && runner.os == 'Linux' + run: hatch run docs + - name: Run tests run: hatch run cov diff --git a/integrations/weaviate/pydoc/config.yml b/integrations/weaviate/pydoc/config.yml new file mode 100644 index 000000000..fa59e6874 --- /dev/null +++ b/integrations/weaviate/pydoc/config.yml @@ -0,0 +1,28 @@ +loaders: + - type: haystack_pydoc_tools.loaders.CustomPythonLoader + search_path: [../src] + modules: [ + "haystack_integrations.document_stores.weaviate.document_store", + ] + ignore_when_discovered: ["__init__"] +processors: + - type: filter + expression: + documented_only: true + do_not_filter_modules: false + skip_empty_modules: true + - type: smart + - type: crossref +renderer: + type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer + excerpt: Weaviate integration for Haystack + category_slug: haystack-integrations + title: Weaviate + slug: integrations-weaviate + order: 180 + markdown: + descriptive_class_title: false + descriptive_module_title: true + add_method_class_prefix: true + add_member_class_prefix: false + filename: _readme_weaviate.md diff --git a/integrations/weaviate/pyproject.toml b/integrations/weaviate/pyproject.toml index 50f1c157c..fb132516c 100644 --- a/integrations/weaviate/pyproject.toml +++ b/integrations/weaviate/pyproject.toml @@ -27,6 +27,7 @@ classifiers = [ dependencies = [ "haystack-ai", "weaviate-client==3.*", + "haystack-pydoc-tools", ] [project.urls] @@ -62,6 +63,9 @@ cov = [ "test-cov", "cov-report", ] +docs = [ + "pydoc-markdown pydoc/config.yml" +] [[tool.hatch.envs.all.matrix]] python = ["3.8", "3.9", "3.10", "3.11", "3.12"] From 6365baeedce47031dba7ff727486518d8846efcd Mon Sep 17 00:00:00 2001 From: Massimiliano Pippi Date: Wed, 7 Feb 2024 14:59:52 +0100 Subject: [PATCH 4/7] update to latest haystack-ai version (#348) --- .../cohere/tests/test_cohere_chat_generator.py | 16 ++++++++-------- .../cohere/tests/test_cohere_generators.py | 17 +++++------------ 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/integrations/cohere/tests/test_cohere_chat_generator.py b/integrations/cohere/tests/test_cohere_chat_generator.py index edefc1a43..556535e10 100644 --- a/integrations/cohere/tests/test_cohere_chat_generator.py +++ b/integrations/cohere/tests/test_cohere_chat_generator.py @@ -3,7 +3,7 @@ import cohere import pytest -from haystack.components.generators.utils import default_streaming_callback +from haystack.components.generators.utils import print_streaming_chunk from haystack.dataclasses import ChatMessage, ChatRole, StreamingChunk from haystack_integrations.components.generators.cohere import CohereChatGenerator @@ -72,13 +72,13 @@ def test_init_with_parameters(self): component = CohereChatGenerator( api_key="test-api-key", model="command-nightly", - streaming_callback=default_streaming_callback, + streaming_callback=print_streaming_chunk, api_base_url="test-base-url", generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"}, ) assert component.api_key == "test-api-key" assert component.model == "command-nightly" - assert component.streaming_callback is default_streaming_callback + assert component.streaming_callback is print_streaming_chunk assert component.api_base_url == "test-base-url" assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"} @@ -101,7 +101,7 @@ def test_to_dict_with_parameters(self): component = CohereChatGenerator( api_key="test-api-key", model="command-nightly", - streaming_callback=default_streaming_callback, + streaming_callback=print_streaming_chunk, api_base_url="test-base-url", generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"}, ) @@ -110,7 +110,7 @@ def test_to_dict_with_parameters(self): "type": "haystack_integrations.components.generators.cohere.chat.chat_generator.CohereChatGenerator", "init_parameters": { "model": "command-nightly", - "streaming_callback": "haystack.components.generators.utils.default_streaming_callback", + "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk", "api_base_url": "test-base-url", "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"}, }, @@ -144,13 +144,13 @@ def test_from_dict(self, monkeypatch): "init_parameters": { "model": "command", "api_base_url": "test-base-url", - "streaming_callback": "haystack.components.generators.utils.default_streaming_callback", + "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk", "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"}, }, } component = CohereChatGenerator.from_dict(data) assert component.model == "command" - assert component.streaming_callback is default_streaming_callback + assert component.streaming_callback is print_streaming_chunk assert component.api_base_url == "test-base-url" assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"} @@ -162,7 +162,7 @@ def test_from_dict_fail_wo_env_var(self, monkeypatch): "init_parameters": { "model": "command", "api_base_url": "test-base-url", - "streaming_callback": "haystack.components.generators.utils.default_streaming_callback", + "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk", "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"}, }, } diff --git a/integrations/cohere/tests/test_cohere_generators.py b/integrations/cohere/tests/test_cohere_generators.py index 90d4d3e28..5b12374a7 100644 --- a/integrations/cohere/tests/test_cohere_generators.py +++ b/integrations/cohere/tests/test_cohere_generators.py @@ -5,19 +5,12 @@ import pytest from cohere import COHERE_API_URL +from haystack.components.generators.utils import print_streaming_chunk from haystack_integrations.components.generators.cohere import CohereGenerator pytestmark = pytest.mark.generators -def default_streaming_callback(chunk): - """ - Default callback function for streaming responses from Cohere API. - Prints the tokens of the first completion to stdout as soon as they are received and returns the chunk unchanged. - """ - print(chunk.text, flush=True, end="") # noqa: T201 - - class TestCohereGenerator: def test_init_default(self): component = CohereGenerator(api_key="test-api-key") @@ -61,7 +54,7 @@ def test_to_dict_with_parameters(self): model="command-light", max_tokens=10, some_test_param="test-params", - streaming_callback=default_streaming_callback, + streaming_callback=print_streaming_chunk, api_base_url="test-base-url", ) data = component.to_dict() @@ -72,7 +65,7 @@ def test_to_dict_with_parameters(self): "max_tokens": 10, "some_test_param": "test-params", "api_base_url": "test-base-url", - "streaming_callback": "tests.test_cohere_generators.default_streaming_callback", + "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk", }, } @@ -106,13 +99,13 @@ def test_from_dict(self, monkeypatch): "max_tokens": 10, "some_test_param": "test-params", "api_base_url": "test-base-url", - "streaming_callback": "tests.test_cohere_generators.default_streaming_callback", + "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk", }, } component: CohereGenerator = CohereGenerator.from_dict(data) assert component.api_key == "test-key" assert component.model == "command" - assert component.streaming_callback == default_streaming_callback + assert component.streaming_callback == print_streaming_chunk assert component.api_base_url == "test-base-url" assert component.model_parameters == {"max_tokens": 10, "some_test_param": "test-params"} From f86c758f8dedd09f2dfc3f0fd126691881b4aacb Mon Sep 17 00:00:00 2001 From: Daria Fokina Date: Wed, 7 Feb 2024 17:37:57 +0100 Subject: [PATCH 5/7] google_vertex: create api docs (#355) --- .github/workflows/google_vertex.yml | 4 +++ integrations/google_vertex/pydoc/config.yml | 34 +++++++++++++++++++++ integrations/google_vertex/pyproject.toml | 5 ++- 3 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 integrations/google_vertex/pydoc/config.yml diff --git a/.github/workflows/google_vertex.yml b/.github/workflows/google_vertex.yml index cf60d3229..6f6c6d0d9 100644 --- a/.github/workflows/google_vertex.yml +++ b/.github/workflows/google_vertex.yml @@ -52,5 +52,9 @@ jobs: if: matrix.python-version == '3.9' && runner.os == 'Linux' run: hatch run lint:all + - name: Generate docs + if: matrix.python-version == '3.9' && runner.os == 'Linux' + run: hatch run docs + - name: Run tests run: hatch run cov diff --git a/integrations/google_vertex/pydoc/config.yml b/integrations/google_vertex/pydoc/config.yml new file mode 100644 index 000000000..86d0f3b52 --- /dev/null +++ b/integrations/google_vertex/pydoc/config.yml @@ -0,0 +1,34 @@ +loaders: + - type: haystack_pydoc_tools.loaders.CustomPythonLoader + search_path: [../src] + modules: [ + "haystack_integrations.components.generators.google_vertex.gemini", + "haystack_integrations.components.generators.google_vertex.captioner", + "haystack_integrations.components.generators.google_vertex.code_generator", + "haystack_integrations.components.generators.google_vertex.image_generator", + "haystack_integrations.components.generators.google_vertex.question_answering", + "haystack_integrations.components.generators.google_vertex.text_generator", + "haystack_integrations.components.generators.google_vertex.chat.gemini", + ] + ignore_when_discovered: ["__init__"] +processors: + - type: filter + expression: + documented_only: true + do_not_filter_modules: false + skip_empty_modules: true + - type: smart + - type: crossref +renderer: + type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer + excerpt: Google Vertex integration for Haystack + category_slug: haystack-integrations + title: Google Vertex + slug: integrations-google-vertex + order: 70 + markdown: + descriptive_class_title: false + descriptive_module_title: true + add_method_class_prefix: true + add_member_class_prefix: false + filename: _readme_google_vertex.md \ No newline at end of file diff --git a/integrations/google_vertex/pyproject.toml b/integrations/google_vertex/pyproject.toml index ecd509f15..f846d5bc4 100644 --- a/integrations/google_vertex/pyproject.toml +++ b/integrations/google_vertex/pyproject.toml @@ -48,6 +48,7 @@ git_describe_command = 'git describe --tags --match="integrations/google_vertex- dependencies = [ "coverage[toml]>=6.5", "pytest", + "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] test = "pytest {args:tests}" @@ -60,7 +61,9 @@ cov = [ "test-cov", "cov-report", ] - +docs = [ + "pydoc-markdown pydoc/config.yml" +] [[tool.hatch.envs.all.matrix]] python = ["3.7", "3.8", "3.9", "3.10", "3.11"] From 6b099b622c12e9c4cc399110421dbdd3a3afaccc Mon Sep 17 00:00:00 2001 From: Daria Fokina Date: Wed, 7 Feb 2024 17:38:11 +0100 Subject: [PATCH 6/7] google_ai: create api docs (#354) --- .github/workflows/google_ai.yml | 4 ++++ integrations/google_ai/pydoc/config.yml | 29 +++++++++++++++++++++++++ integrations/google_ai/pyproject.toml | 5 ++++- 3 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 integrations/google_ai/pydoc/config.yml diff --git a/.github/workflows/google_ai.yml b/.github/workflows/google_ai.yml index 46a871a76..6093df4a4 100644 --- a/.github/workflows/google_ai.yml +++ b/.github/workflows/google_ai.yml @@ -53,5 +53,9 @@ jobs: if: matrix.python-version == '3.9' && runner.os == 'Linux' run: hatch run lint:all + - name: Generate docs + if: matrix.python-version == '3.9' && runner.os == 'Linux' + run: hatch run docs + - name: Run tests run: hatch run cov diff --git a/integrations/google_ai/pydoc/config.yml b/integrations/google_ai/pydoc/config.yml new file mode 100644 index 000000000..dd0706c29 --- /dev/null +++ b/integrations/google_ai/pydoc/config.yml @@ -0,0 +1,29 @@ +loaders: + - type: haystack_pydoc_tools.loaders.CustomPythonLoader + search_path: [../src] + modules: [ + "haystack_integrations.components.generators.google_ai.gemini", + "haystack_integrations.components.generators.google_ai.chat.gemini", + ] + ignore_when_discovered: ["__init__"] +processors: + - type: filter + expression: + documented_only: true + do_not_filter_modules: false + skip_empty_modules: true + - type: smart + - type: crossref +renderer: + type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer + excerpt: Google AI integration for Haystack + category_slug: haystack-integrations + title: Google AI + slug: integrations-google-ai + order: 60 + markdown: + descriptive_class_title: false + descriptive_module_title: true + add_method_class_prefix: true + add_member_class_prefix: false + filename: _readme_google_ai.md \ No newline at end of file diff --git a/integrations/google_ai/pyproject.toml b/integrations/google_ai/pyproject.toml index 1127dc6bf..ced2310dd 100644 --- a/integrations/google_ai/pyproject.toml +++ b/integrations/google_ai/pyproject.toml @@ -49,6 +49,7 @@ git_describe_command = 'git describe --tags --match="integrations/google_ai-v[0- dependencies = [ "coverage[toml]>=6.5", "pytest", + "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] test = "pytest {args:tests}" @@ -61,7 +62,9 @@ cov = [ "test-cov", "cov-report", ] - +docs = [ + "pydoc-markdown pydoc/config.yml" +] [[tool.hatch.envs.all.matrix]] python = ["3.7", "3.8", "3.9", "3.10", "3.11"] From c5480af3012aab44d8eab6ddb8cb54fce1cdc917 Mon Sep 17 00:00:00 2001 From: Stefano Fiorucci Date: Wed, 7 Feb 2024 20:44:52 +0100 Subject: [PATCH 7/7] try run docker container as a step in workflow (#349) --- .github/workflows/unstructured.yml | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/.github/workflows/unstructured.yml b/.github/workflows/unstructured.yml index 6338b06e8..77ebb10ca 100644 --- a/.github/workflows/unstructured.yml +++ b/.github/workflows/unstructured.yml @@ -27,18 +27,23 @@ jobs: matrix: os: [ubuntu-latest] python-version: ["3.8", "3.9", "3.10", "3.11"] - services: - unstructured-api: - image: "quay.io/unstructured-io/unstructured-api:latest" - ports: - - 8000:8000 - options: >- - --health-cmd "curl --fail http://localhost:8000/healthcheck || exit 1" - --health-interval 10s - --health-timeout 1s - --health-retries 10 steps: + - name: Free up disk space + run: | + sudo docker image prune --all --force + + - name: Run Unstructured API (docker) + run: | + docker run -d \ + --name unstructured-api \ + -p 8000:8000 \ + --health-cmd "curl --fail http://localhost:8000/healthcheck || exit 1" \ + --health-interval 10s \ + --health-timeout 1s \ + --health-retries 10 \ + quay.io/unstructured-io/unstructured-api:latest + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }}