From ad5a29039be7a68bb18eb1597da8e708ab6c793f Mon Sep 17 00:00:00 2001 From: Ashwin Mathur <97467100+awinml@users.noreply.github.com> Date: Wed, 21 Feb 2024 18:52:08 +0530 Subject: [PATCH 1/7] feat: Add Optimum Embedders (#379) * Add Optimum Embedders * Add CI workflow * Update dependencies * Add embedding backend * Add pooling sub module; Update pyproject.toml with dependency info * Remove backend factory; Address review comments * Add API docs generation workflow * Add additional tests * Update order to 185 for API docs * Update integrations/optimum/pydoc/config.yml Co-authored-by: Daria Fokina --------- Co-authored-by: Daria Fokina --- .github/labeler.yml | 5 + .github/workflows/optimum.yml | 60 ++++ integrations/optimum/LICENSE.txt | 73 +++++ integrations/optimum/README.md | 30 ++ integrations/optimum/pydoc/config.yml | 32 ++ integrations/optimum/pyproject.toml | 202 ++++++++++++ .../components/embedders/__init__.py | 8 + .../components/embedders/optimum_backend.py | 99 ++++++ .../embedders/optimum_document_embedder.py | 251 +++++++++++++++ .../embedders/optimum_text_embedder.py | 201 ++++++++++++ .../components/embedders/pooling.py | 137 +++++++++ integrations/optimum/tests/__init__.py | 3 + .../optimum/tests/test_optimum_backend.py | 32 ++ .../tests/test_optimum_document_embedder.py | 288 ++++++++++++++++++ .../tests/test_optimum_text_embedder.py | 193 ++++++++++++ 15 files changed, 1614 insertions(+) create mode 100644 .github/workflows/optimum.yml create mode 100644 integrations/optimum/LICENSE.txt create mode 100644 integrations/optimum/README.md create mode 100644 integrations/optimum/pydoc/config.yml create mode 100644 integrations/optimum/pyproject.toml create mode 100644 integrations/optimum/src/haystack_integrations/components/embedders/__init__.py create mode 100644 integrations/optimum/src/haystack_integrations/components/embedders/optimum_backend.py create mode 100644 integrations/optimum/src/haystack_integrations/components/embedders/optimum_document_embedder.py create mode 100644 integrations/optimum/src/haystack_integrations/components/embedders/optimum_text_embedder.py create mode 100644 integrations/optimum/src/haystack_integrations/components/embedders/pooling.py create mode 100644 integrations/optimum/tests/__init__.py create mode 100644 integrations/optimum/tests/test_optimum_backend.py create mode 100644 integrations/optimum/tests/test_optimum_document_embedder.py create mode 100644 integrations/optimum/tests/test_optimum_text_embedder.py diff --git a/.github/labeler.yml b/.github/labeler.yml index 5f1b76912..a60f02c67 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -84,6 +84,11 @@ integration:opensearch: - any-glob-to-any-file: "integrations/opensearch/**/*" - any-glob-to-any-file: ".github/workflows/opensearch.yml" +integration:optimum: + - changed-files: + - any-glob-to-any-file: "integrations/optimum/**/*" + - any-glob-to-any-file: ".github/workflows/optimum.yml" + integration:pgvector: - changed-files: - any-glob-to-any-file: "integrations/pgvector/**/*" diff --git a/.github/workflows/optimum.yml b/.github/workflows/optimum.yml new file mode 100644 index 000000000..3b0d137da --- /dev/null +++ b/.github/workflows/optimum.yml @@ -0,0 +1,60 @@ +# This workflow comes from https://github.com/ofek/hatch-mypyc +# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml +name: Test / optimum + +on: + schedule: + - cron: "0 0 * * *" + pull_request: + paths: + - "integrations/optimum/**" + - ".github/workflows/optimum.yml" + +defaults: + run: + working-directory: integrations/optimum + +concurrency: + group: optimum-${{ github.head_ref }} + cancel-in-progress: true + +env: + PYTHONUNBUFFERED: "1" + FORCE_COLOR: "1" + +jobs: + run: + name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + python-version: ["3.9", "3.10"] + + steps: + - name: Support longpaths + if: matrix.os == 'windows-latest' + working-directory: . + run: git config --system core.longpaths true + + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Hatch + run: pip install --upgrade hatch + + - name: Lint + if: matrix.python-version == '3.9' && runner.os == 'Linux' + run: hatch run lint:all + + - name: Generate docs + if: matrix.python-version == '3.9' && runner.os == 'Linux' + run: hatch run docs + + - name: Run tests + run: hatch run cov diff --git a/integrations/optimum/LICENSE.txt b/integrations/optimum/LICENSE.txt new file mode 100644 index 000000000..137069b82 --- /dev/null +++ b/integrations/optimum/LICENSE.txt @@ -0,0 +1,73 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. + +"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: + + (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. + + You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + +To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/integrations/optimum/README.md b/integrations/optimum/README.md new file mode 100644 index 000000000..1438f6e92 --- /dev/null +++ b/integrations/optimum/README.md @@ -0,0 +1,30 @@ +# optimum + +[![PyPI - Version](https://img.shields.io/pypi/v/optimum.svg)](https://pypi.org/project/optimum-haystack) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/optimum.svg)](https://pypi.org/project/optimum-haystack) + +----- + +Component to embed strings and Documents using models loaded with the HuggingFace Optimum library. This component is designed to seamlessly inference models using the high speed ONNX runtime. + +**Table of Contents** + +- [Installation](#installation) +- [License](#license) + +## Installation + +To use the ONNX runtime for CPU, use the CPU version: +```console +pip install optimum-haystack[cpu] +``` + +For using the GPU runtimes: +```console +pip install optimum-haystack[gpu] +``` + + +## License + +`optimum-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. diff --git a/integrations/optimum/pydoc/config.yml b/integrations/optimum/pydoc/config.yml new file mode 100644 index 000000000..5fb353b5d --- /dev/null +++ b/integrations/optimum/pydoc/config.yml @@ -0,0 +1,32 @@ +loaders: + - type: haystack_pydoc_tools.loaders.CustomPythonLoader + search_path: [../src] + modules: + [ + "haystack_integrations.components.embedders.optimum_backend", + "haystack_integrations.components.embedders.optimum_document_embedder", + "haystack_integrations.components.embedders.optimum_text_embedder", + "haystack_integrations.components.embedders.pooling", + ] + ignore_when_discovered: ["__init__"] +processors: + - type: filter + expression: + documented_only: true + do_not_filter_modules: false + skip_empty_modules: true + - type: smart + - type: crossref +renderer: + type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer + excerpt: Optimum integration for Haystack + category_slug: integrations-api + title: Optimum + slug: integrations-optimum + order: 185 + markdown: + descriptive_class_title: false + descriptive_module_title: true + add_method_class_prefix: true + add_member_class_prefix: false + filename: _readme_optimum.md diff --git a/integrations/optimum/pyproject.toml b/integrations/optimum/pyproject.toml new file mode 100644 index 000000000..17bca2597 --- /dev/null +++ b/integrations/optimum/pyproject.toml @@ -0,0 +1,202 @@ +[build-system] +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" + +[project] +name = "optimum-haystack" +dynamic = ["version"] +description = "Component to embed strings and Documents using models loaded with the HuggingFace Optimum library. This component is designed to seamlessly inference models using the high speed ONNX runtime." +readme = "README.md" +requires-python = ">=3.8" +license = "Apache-2.0" +keywords = [] +authors = [ + { name = "deepset GmbH", email = "info@deepset.ai" }, + { name = "Ashwin Mathur", email = "" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Programming Language :: Python", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dependencies = [ + "haystack-ai", + "transformers[sentencepiece]", + # The main export function of Optimum into ONNX has hidden dependencies. + # It depends on either "sentence-transformers", "diffusers" or "timm", based + # on which model is loaded from HF Hub. + # Ref: https://github.com/huggingface/optimum/blob/8651c0ca1cccf095458bc80329dec9df4601edb4/optimum/exporters/onnx/__main__.py#L164 + # "sentence-transformers" has been added, since most embedding models use it + "sentence-transformers>=2.2.0", + "optimum[onnxruntime]" +] + +[project.urls] +Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/optimum#readme" +Issues = "https://github.com/deepset-ai/haystack-core-integrations/issues" +Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/optimum" + +[tool.hatch.build.targets.wheel] +packages = ["src/haystack_integrations"] + +[tool.hatch.version] +source = "vcs" +tag-pattern = 'integrations\/optimum-v(?P.*)' + +[tool.hatch.version.raw-options] +root = "../.." +git_describe_command = 'git describe --tags --match="integrations/optimum-v[0-9]*"' + +[tool.hatch.envs.default] +dependencies = [ + "coverage[toml]>=6.5", + "pytest", + "haystack-pydoc-tools" +] +[tool.hatch.envs.default.scripts] +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +cov-report = [ + "- coverage combine", + "coverage report", +] +cov = [ + "test-cov", + "cov-report", +] +docs = ["pydoc-markdown pydoc/config.yml"] + +[[tool.hatch.envs.all.matrix]] +python = ["3.8", "3.9", "3.10", "3.11", "3.12"] + + +[tool.hatch.envs.lint] +detached = true +dependencies = [ + "black>=23.1.0", + "mypy>=1.0.0", + "ruff>=0.0.243", +] + +[tool.hatch.envs.lint.scripts] +typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}" +style = [ + "ruff {args:.}", + "black --check --diff {args:.}", +] +fmt = [ + "black {args:.}", + "ruff --fix {args:.}", + "style", +] +all = [ + "style", + "typing", +] + +[tool.hatch.metadata] +allow-direct-references = true + +[tool.ruff.lint.isort] +known-first-party = ["src"] + +[tool.black] +target-version = ["py37"] +line-length = 120 +skip-string-normalization = true + +[tool.ruff] +target-version = "py37" +line-length = 120 +select = [ + "A", + "ARG", + "B", + "C", + "DTZ", + "E", + "EM", + "F", + "I", + "ICN", + "ISC", + "N", + "PLC", + "PLE", + "PLR", + "PLW", + "Q", + "RUF", + "S", + "T", + "TID", + "UP", + "W", + "YTT", +] +ignore = [ + # Allow non-abstract empty methods in abstract base classes + "B027", + # Ignore checks for possible passwords + "S105", "S106", "S107", + # Ignore complexity + "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915", +] +unfixable = [ + # Don't touch unused imports + "F401", +] + +[tool.ruff.lint.flake8-tidy-imports] +ban-relative-imports = "parents" + +[tool.ruff.lint.per-file-ignores] +# Tests can use magic values, assertions, and relative imports +"tests/**/*" = ["PLR2004", "S101", "TID252"] +# Examples can print their output +"examples/**" = ["T201"] +"tests/**" = ["T201"] + +[tool.coverage.run] +source_pkgs = ["optimum", "tests"] +branch = true +parallel = true + +[tool.coverage.paths] +optimum = ["src/haystack_integrations", "*/optimum/src/haystack_integrations"] +tests = ["tests", "*/optimum/tests"] + +[tool.coverage.report] +exclude_lines = [ + "no cov", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", +] + +[[tool.mypy.overrides]] +module = [ + "haystack.*", + "haystack_integrations.*", + "pytest.*", + "numpy.*", + "optimum.*", + "torch.*", + "transformers.*", + "huggingface_hub.*", + "sentence_transformers.*" +] +ignore_missing_imports = true + +[tool.pytest.ini_options] +addopts = ["--strict-markers", "-vv"] +markers = [ + "integration: integration tests", + "unit: unit tests", + "embedders: embedders tests", +] +log_cli = true diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/__init__.py b/integrations/optimum/src/haystack_integrations/components/embedders/__init__.py new file mode 100644 index 000000000..4e5ac1535 --- /dev/null +++ b/integrations/optimum/src/haystack_integrations/components/embedders/__init__.py @@ -0,0 +1,8 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from .optimum_document_embedder import OptimumDocumentEmbedder +from .optimum_text_embedder import OptimumTextEmbedder + +__all__ = ["OptimumDocumentEmbedder", "OptimumTextEmbedder"] diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum_backend.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum_backend.py new file mode 100644 index 000000000..55d0e6f20 --- /dev/null +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum_backend.py @@ -0,0 +1,99 @@ +from typing import Any, Dict, List, Optional, Union + +import numpy as np +import torch +from haystack.utils.auth import Secret +from haystack_integrations.components.embedders.pooling import Pooling, PoolingMode +from optimum.onnxruntime import ORTModelForFeatureExtraction +from tqdm import tqdm +from transformers import AutoTokenizer + + +class OptimumEmbeddingBackend: + """ + Class to manage Optimum embeddings. + """ + + def __init__(self, model: str, model_kwargs: Dict[str, Any], token: Optional[Secret] = None): + """ + Create an instance of OptimumEmbeddingBackend. + + :param model: A string representing the model id on HF Hub. + :param model_kwargs: Keyword arguments to pass to the model. + :param token: The HuggingFace token to use as HTTP bearer authorization. + """ + # export=True converts the model to ONNX on the fly + self.model = ORTModelForFeatureExtraction.from_pretrained(**model_kwargs, export=True) + self.tokenizer = AutoTokenizer.from_pretrained(model, token=token) + + def embed( + self, + texts_to_embed: Union[str, List[str]], + normalize_embeddings: bool, + pooling_mode: PoolingMode = PoolingMode.MEAN, + progress_bar: bool = False, + batch_size: int = 1, + ) -> Union[List[List[float]], List[float]]: + """ + Embed text or list of texts using the Optimum model. + + :param texts_to_embed: The text or list of texts to embed. + :param normalize_embeddings: Whether to normalize the embeddings to unit length. + :param pooling_mode: The pooling mode to use. + :param progress_bar: Whether to show a progress bar or not. + :param batch_size: Batch size to use. + :return: A single embedding if the input is a single string. A list of embeddings if the input is a list of + strings. + """ + if isinstance(texts_to_embed, str): + texts = [texts_to_embed] + else: + texts = texts_to_embed + + # Determine device for tokenizer output + device = self.model.device + + # Sorting by length + length_sorted_idx = np.argsort([-len(sen) for sen in texts]) + sentences_sorted = [texts[idx] for idx in length_sorted_idx] + + all_embeddings = [] + for i in tqdm( + range(0, len(sentences_sorted), batch_size), disable=not progress_bar, desc="Calculating embeddings" + ): + batch = sentences_sorted[i : i + batch_size] + encoded_input = self.tokenizer(batch, padding=True, truncation=True, return_tensors="pt").to(device) + + # Only pass required inputs otherwise onnxruntime can raise an error + inputs_to_remove = set(encoded_input.keys()).difference(self.model.inputs_names) + for key in inputs_to_remove: + encoded_input.pop(key) + model_output = self.model(**encoded_input) + + # Pool Embeddings + pooling = Pooling( + pooling_mode=pooling_mode, + attention_mask=encoded_input["attention_mask"].to(device), + model_output=model_output, + ) + sentence_embeddings = pooling.pool_embeddings() + all_embeddings.append(sentence_embeddings) + + embeddings = torch.cat(all_embeddings, dim=0) + + # Normalize all embeddings + if normalize_embeddings: + embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1) + + embeddings = embeddings.tolist() + + # Reorder embeddings according to original order + reordered_embeddings: List[List[float]] = [[]] * len(texts) + for embedding, idx in zip(embeddings, length_sorted_idx): + reordered_embeddings[idx] = embedding + + if isinstance(texts_to_embed, str): + # Return the embedding if only one text was passed + return reordered_embeddings[0] + + return reordered_embeddings diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum_document_embedder.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum_document_embedder.py new file mode 100644 index 000000000..f0310f872 --- /dev/null +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum_document_embedder.py @@ -0,0 +1,251 @@ +from typing import Any, Dict, List, Optional, Union + +from haystack import Document, component, default_from_dict, default_to_dict +from haystack.utils import Secret, deserialize_secrets_inplace +from haystack.utils.hf import HFModelType, check_valid_model, deserialize_hf_model_kwargs, serialize_hf_model_kwargs +from haystack_integrations.components.embedders.optimum_backend import OptimumEmbeddingBackend +from haystack_integrations.components.embedders.pooling import HFPoolingMode, PoolingMode + + +@component +class OptimumDocumentEmbedder: + """ + A component for computing Document embeddings using models loaded with the HuggingFace Optimum library. + This component is designed to seamlessly inference models using the high speed ONNX runtime. + + The embedding of each Document is stored in the `embedding` field of the Document. + + Usage example: + ```python + from haystack.dataclasses import Document + from haystack_integrations.components.embedders import OptimumDocumentEmbedder + + doc = Document(content="I love pizza!") + + document_embedder = OptimumDocumentEmbedder(model="sentence-transformers/all-mpnet-base-v2") + document_embedder.warm_up() + + result = document_embedder.run([doc]) + print(result["documents"][0].embedding) + + # [0.017020374536514282, -0.023255806416273117, ...] + ``` + + Key Features and Compatibility: + - **Primary Compatibility**: Designed to work seamlessly with any embedding model present on the Hugging Face + Hub. + - **Conversion to ONNX**: The models are converted to ONNX using the HuggingFace Optimum library. This is + performed in real-time, during the warm-up step. + - **Accelerated Inference on GPU**: Supports using different execution providers such as CUDA and TensorRT, to + accelerate ONNX Runtime inference on GPUs. + Simply pass the execution provider as the onnx_execution_provider parameter. Additonal parameters can be passed + to the model using the model_kwargs parameter. + For more details refer to the HuggingFace documentation: + https://huggingface.co/docs/optimum/onnxruntime/usage_guides/gpu. + """ + + def __init__( + self, + model: str = "sentence-transformers/all-mpnet-base-v2", + token: Optional[Secret] = Secret.from_env_var("HF_API_TOKEN", strict=False), # noqa: B008 + prefix: str = "", + suffix: str = "", + normalize_embeddings: bool = True, + onnx_execution_provider: str = "CPUExecutionProvider", + pooling_mode: Optional[Union[str, PoolingMode]] = None, + model_kwargs: Optional[Dict[str, Any]] = None, + batch_size: int = 32, + progress_bar: bool = True, + meta_fields_to_embed: Optional[List[str]] = None, + embedding_separator: str = "\n", + ): + """ + Create a OptimumDocumentEmbedder component. + + :param model: A string representing the model id on HF Hub. + :param token: The HuggingFace token to use as HTTP bearer authorization. + :param prefix: A string to add to the beginning of each text. + :param suffix: A string to add to the end of each text. + :param normalize_embeddings: Whether to normalize the embeddings to unit length. + :param onnx_execution_provider: The execution provider to use for ONNX models. See + https://onnxruntime.ai/docs/execution-providers/ for possible providers. + + Note: Using the TensorRT execution provider + TensorRT requires to build its inference engine ahead of inference, which takes some time due to the model + optimization and nodes fusion. To avoid rebuilding the engine every time the model is loaded, ONNX Runtime + provides a pair of options to save the engine: `trt_engine_cache_enable` and `trt_engine_cache_path`. We + recommend setting these two provider options using the model_kwargs parameter, when using the TensorRT + execution provider. The usage is as follows: + ```python + embedder = OptimumDocumentEmbedder( + model="sentence-transformers/all-mpnet-base-v2", + onnx_execution_provider="TensorrtExecutionProvider", + model_kwargs={ + "provider_options": { + "trt_engine_cache_enable": True, + "trt_engine_cache_path": "tmp/trt_cache", + } + }, + ) + ``` + :param pooling_mode: The pooling mode to use. When None, pooling mode will be inferred from the model config. + The supported pooling modes are: + - "cls": Perform CLS Pooling on the output of the embedding model. Uses the first token (CLS token) as text + representations. + - "max": Perform Max Pooling on the output of the embedding model. Uses max in each dimension over all + the tokens. + - "mean": Perform Mean Pooling on the output of the embedding model. + - "mean_sqrt_len": Perform mean-pooling on the output of the embedding model, but divide by sqrt + (input_length). + - "weighted_mean": Perform Weighted (position) Mean Pooling on the output of the embedding model. See + https://arxiv.org/abs/2202.08904. + - "last_token": Perform Last Token Pooling on the output of the embedding model. See + https://arxiv.org/abs/2202.08904 & https://arxiv.org/abs/2201.10005. + :param model_kwargs: Dictionary containing additional keyword arguments to pass to the model. + In case of duplication, these kwargs override `model`, `onnx_execution_provider`, and `token` initialization + parameters. + :param batch_size: Number of Documents to encode at once. + :param progress_bar: Whether to show a progress bar or not. Can be helpful to disable in production deployments + to keep the logs clean. + :param meta_fields_to_embed: List of meta fields that should be embedded along with the Document text. + :param embedding_separator: Separator used to concatenate the meta fields to the Document text. + """ + check_valid_model(model, HFModelType.EMBEDDING, token) + self.model = model + + self.token = token + token = token.resolve_value() if token else None + + if isinstance(pooling_mode, str): + self.pooling_mode = PoolingMode.from_str(pooling_mode) + # Infer pooling mode from model config if not provided, + if pooling_mode is None: + self.pooling_mode = HFPoolingMode.get_pooling_mode(model, token) + # Raise error if pooling mode is not found in model config and not specified by user + if self.pooling_mode is None: + modes = {e.value: e for e in PoolingMode} + msg = ( + f"Pooling mode not found in model config and not specified by user." + f" Supported modes are: {list(modes.keys())}" + ) + raise ValueError(msg) + + self.prefix = prefix + self.suffix = suffix + self.normalize_embeddings = normalize_embeddings + self.onnx_execution_provider = onnx_execution_provider + self.batch_size = batch_size + self.progress_bar = progress_bar + self.meta_fields_to_embed = meta_fields_to_embed or [] + self.embedding_separator = embedding_separator + + model_kwargs = model_kwargs or {} + + # Check if the model_kwargs contain the parameters, otherwise, populate them with values from init parameters + model_kwargs.setdefault("model_id", model) + model_kwargs.setdefault("provider", onnx_execution_provider) + model_kwargs.setdefault("use_auth_token", token) + + self.model_kwargs = model_kwargs + self.embedding_backend = None + + def warm_up(self): + """ + Load the embedding backend. + """ + if self.embedding_backend is None: + self.embedding_backend = OptimumEmbeddingBackend( + model=self.model, token=self.token, model_kwargs=self.model_kwargs + ) + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize this component to a dictionary. + """ + serialization_dict = default_to_dict( + self, + model=self.model, + prefix=self.prefix, + suffix=self.suffix, + normalize_embeddings=self.normalize_embeddings, + onnx_execution_provider=self.onnx_execution_provider, + pooling_mode=self.pooling_mode.value, + batch_size=self.batch_size, + progress_bar=self.progress_bar, + meta_fields_to_embed=self.meta_fields_to_embed, + embedding_separator=self.embedding_separator, + model_kwargs=self.model_kwargs, + token=self.token.to_dict() if self.token else None, + ) + + model_kwargs = serialization_dict["init_parameters"]["model_kwargs"] + model_kwargs.pop("token", None) + + serialize_hf_model_kwargs(model_kwargs) + return serialization_dict + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "OptimumDocumentEmbedder": + """ + Deserialize this component from a dictionary. + """ + data["init_parameters"]["pooling_mode"] = PoolingMode.from_str(data["init_parameters"]["pooling_mode"]) + deserialize_secrets_inplace(data["init_parameters"], keys=["token"]) + deserialize_hf_model_kwargs(data["init_parameters"]["model_kwargs"]) + return default_from_dict(cls, data) + + def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: + """ + Prepare the texts to embed by concatenating the Document text with the metadata fields to embed. + """ + texts_to_embed = [] + for doc in documents: + meta_values_to_embed = [ + str(doc.meta[key]) for key in self.meta_fields_to_embed if key in doc.meta and doc.meta[key] is not None + ] + + text_to_embed = ( + self.prefix + self.embedding_separator.join([*meta_values_to_embed, doc.content or ""]) + self.suffix + ) + + texts_to_embed.append(text_to_embed) + return texts_to_embed + + @component.output_types(documents=List[Document]) + def run(self, documents: List[Document]): + """ + Embed a list of Documents. + The embedding of each Document is stored in the `embedding` field of the Document. + + :param documents: A list of Documents to embed. + :return: A dictionary containing the updated Documents with their embeddings. + """ + if not isinstance(documents, list) or documents and not isinstance(documents[0], Document): + msg = ( + "OptimumDocumentEmbedder expects a list of Documents as input." + " In case you want to embed a string, please use the OptimumTextEmbedder." + ) + raise TypeError(msg) + + if self.embedding_backend is None: + msg = "The embedding model has not been loaded. Please call warm_up() before running." + raise RuntimeError(msg) + + # Return empty list if no documents + if not documents: + return {"documents": []} + + texts_to_embed = self._prepare_texts_to_embed(documents=documents) + + embeddings = self.embedding_backend.embed( + texts_to_embed=texts_to_embed, + normalize_embeddings=self.normalize_embeddings, + pooling_mode=self.pooling_mode, + progress_bar=self.progress_bar, + batch_size=self.batch_size, + ) + + for doc, emb in zip(documents, embeddings): + doc.embedding = emb + + return {"documents": documents} diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/optimum_text_embedder.py b/integrations/optimum/src/haystack_integrations/components/embedders/optimum_text_embedder.py new file mode 100644 index 000000000..8a33a9403 --- /dev/null +++ b/integrations/optimum/src/haystack_integrations/components/embedders/optimum_text_embedder.py @@ -0,0 +1,201 @@ +from typing import Any, Dict, List, Optional, Union + +from haystack import component, default_from_dict, default_to_dict +from haystack.utils import Secret, deserialize_secrets_inplace +from haystack.utils.hf import HFModelType, check_valid_model, deserialize_hf_model_kwargs, serialize_hf_model_kwargs +from haystack_integrations.components.embedders.optimum_backend import OptimumEmbeddingBackend +from haystack_integrations.components.embedders.pooling import HFPoolingMode, PoolingMode + + +@component +class OptimumTextEmbedder: + """ + A component to embed text using models loaded with the HuggingFace Optimum library. + This component is designed to seamlessly inference models using the high speed ONNX runtime. + + Usage example: + ```python + from haystack_integrations.components.embedders import OptimumTextEmbedder + + text_to_embed = "I love pizza!" + + text_embedder = OptimumTextEmbedder(model="sentence-transformers/all-mpnet-base-v2") + text_embedder.warm_up() + + print(text_embedder.run(text_to_embed)) + + # {'embedding': [-0.07804739475250244, 0.1498992145061493,, ...]} + ``` + + Key Features and Compatibility: + - **Primary Compatibility**: Designed to work seamlessly with any embedding model present on the Hugging Face + Hub. + - **Conversion to ONNX**: The models are converted to ONNX using the HuggingFace Optimum library. This is + performed in real-time, during the warm-up step. + - **Accelerated Inference on GPU**: Supports using different execution providers such as CUDA and TensorRT, to + accelerate ONNX Runtime inference on GPUs. + Simply pass the execution provider as the onnx_execution_provider parameter. Additonal parameters can be passed + to the model using the model_kwargs parameter. + For more details refer to the HuggingFace documentation: + https://huggingface.co/docs/optimum/onnxruntime/usage_guides/gpu. + """ + + def __init__( + self, + model: str = "sentence-transformers/all-mpnet-base-v2", + token: Optional[Secret] = Secret.from_env_var("HF_API_TOKEN", strict=False), # noqa: B008 + prefix: str = "", + suffix: str = "", + normalize_embeddings: bool = True, + onnx_execution_provider: str = "CPUExecutionProvider", + pooling_mode: Optional[Union[str, PoolingMode]] = None, + model_kwargs: Optional[Dict[str, Any]] = None, + ): + """ + Create a OptimumTextEmbedder component. + + :param model: A string representing the model id on HF Hub. + :param token: The HuggingFace token to use as HTTP bearer authorization. + :param prefix: A string to add to the beginning of each text. + :param suffix: A string to add to the end of each text. + :param normalize_embeddings: Whether to normalize the embeddings to unit length. + :param onnx_execution_provider: The execution provider to use for ONNX models. See + https://onnxruntime.ai/docs/execution-providers/ for possible providers. + + Note: Using the TensorRT execution provider + TensorRT requires to build its inference engine ahead of inference, which takes some time due to the model + optimization and nodes fusion. To avoid rebuilding the engine every time the model is loaded, ONNX Runtime + provides a pair of options to save the engine: `trt_engine_cache_enable` and `trt_engine_cache_path`. We + recommend setting these two provider options using the model_kwargs parameter, when using the TensorRT + execution provider. The usage is as follows: + ```python + embedder = OptimumTextEmbedder( + model="sentence-transformers/all-mpnet-base-v2", + onnx_execution_provider="TensorrtExecutionProvider", + model_kwargs={ + "provider_options": { + "trt_engine_cache_enable": True, + "trt_engine_cache_path": "tmp/trt_cache", + } + }, + ) + ``` + :param pooling_mode: The pooling mode to use. When None, pooling mode will be inferred from the model config. + The supported pooling modes are: + - "cls": Perform CLS Pooling on the output of the embedding model. Uses the first token (CLS token) as text + representations. + - "max": Perform Max Pooling on the output of the embedding model. Uses max in each dimension over all + the tokens. + - "mean": Perform Mean Pooling on the output of the embedding model. + - "mean_sqrt_len": Perform mean-pooling on the output of the embedding model, but divide by sqrt + (input_length). + - "weighted_mean": Perform Weighted (position) Mean Pooling on the output of the embedding model. See + https://arxiv.org/abs/2202.08904. + - "last_token": Perform Last Token Pooling on the output of the embedding model. See + https://arxiv.org/abs/2202.08904 & https://arxiv.org/abs/2201.10005. + :param model_kwargs: Dictionary containing additional keyword arguments to pass to the model. + In case of duplication, these kwargs override `model`, `onnx_execution_provider`, and `token` initialization + parameters. + """ + check_valid_model(model, HFModelType.EMBEDDING, token) + self.model = model + + self.token = token + token = token.resolve_value() if token else None + + if isinstance(pooling_mode, str): + self.pooling_mode = PoolingMode.from_str(pooling_mode) + # Infer pooling mode from model config if not provided, + if pooling_mode is None: + self.pooling_mode = HFPoolingMode.get_pooling_mode(model, token) + # Raise error if pooling mode is not found in model config and not specified by user + if self.pooling_mode is None: + modes = {e.value: e for e in PoolingMode} + msg = ( + f"Pooling mode not found in model config and not specified by user." + f" Supported modes are: {list(modes.keys())}" + ) + raise ValueError(msg) + + self.prefix = prefix + self.suffix = suffix + self.normalize_embeddings = normalize_embeddings + self.onnx_execution_provider = onnx_execution_provider + + model_kwargs = model_kwargs or {} + + # Check if the model_kwargs contain the parameters, otherwise, populate them with values from init parameters + model_kwargs.setdefault("model_id", model) + model_kwargs.setdefault("provider", onnx_execution_provider) + model_kwargs.setdefault("use_auth_token", token) + + self.model_kwargs = model_kwargs + self.embedding_backend = None + + def warm_up(self): + """ + Load the embedding backend. + """ + if self.embedding_backend is None: + self.embedding_backend = OptimumEmbeddingBackend( + model=self.model, token=self.token, model_kwargs=self.model_kwargs + ) + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize this component to a dictionary. + """ + serialization_dict = default_to_dict( + self, + model=self.model, + prefix=self.prefix, + suffix=self.suffix, + normalize_embeddings=self.normalize_embeddings, + onnx_execution_provider=self.onnx_execution_provider, + pooling_mode=self.pooling_mode.value, + model_kwargs=self.model_kwargs, + token=self.token.to_dict() if self.token else None, + ) + + model_kwargs = serialization_dict["init_parameters"]["model_kwargs"] + model_kwargs.pop("token", None) + + serialize_hf_model_kwargs(model_kwargs) + return serialization_dict + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "OptimumTextEmbedder": + """ + Deserialize this component from a dictionary. + """ + data["init_parameters"]["pooling_mode"] = PoolingMode.from_str(data["init_parameters"]["pooling_mode"]) + deserialize_secrets_inplace(data["init_parameters"], keys=["token"]) + deserialize_hf_model_kwargs(data["init_parameters"]["model_kwargs"]) + return default_from_dict(cls, data) + + @component.output_types(embedding=List[float]) + def run(self, text: str): + """ + Embed a string. + + :param text: The text to embed. + :return: The embeddings of the text. + """ + if not isinstance(text, str): + msg = ( + "OptimumTextEmbedder expects a string as an input. " + "In case you want to embed a list of Documents, please use the OptimumDocumentEmbedder." + ) + raise TypeError(msg) + + if self.embedding_backend is None: + msg = "The embedding model has not been loaded. Please call warm_up() before running." + raise RuntimeError(msg) + + text_to_embed = self.prefix + text + self.suffix + + embedding = self.embedding_backend.embed( + texts_to_embed=text_to_embed, normalize_embeddings=self.normalize_embeddings, pooling_mode=self.pooling_mode + ) + + return {"embedding": embedding} diff --git a/integrations/optimum/src/haystack_integrations/components/embedders/pooling.py b/integrations/optimum/src/haystack_integrations/components/embedders/pooling.py new file mode 100644 index 000000000..8f0cddc22 --- /dev/null +++ b/integrations/optimum/src/haystack_integrations/components/embedders/pooling.py @@ -0,0 +1,137 @@ +import json +from enum import Enum +from typing import Optional + +import torch +from haystack.utils import Secret +from huggingface_hub import hf_hub_download +from sentence_transformers.models import Pooling as PoolingLayer + + +class PoolingMode(Enum): + """ + Pooling Modes support by the Optimum Embedders. + """ + + CLS = "cls" + MEAN = "mean" + MAX = "max" + MEAN_SQRT_LEN = "mean_sqrt_len" + WEIGHTED_MEAN = "weighted_mean" + LAST_TOKEN = "last_token" + + def __str__(self): + return self.value + + @classmethod + def from_str(cls, string: str) -> "PoolingMode": + """ + Create a pooling mode from a string. + + :param string: + The string to convert. + :returns: + The pooling mode. + """ + enum_map = {e.value: e for e in PoolingMode} + pooling_mode = enum_map.get(string) + if pooling_mode is None: + msg = f"Unknown Pooling mode '{string}'. Supported modes are: {list(enum_map.keys())}" + raise ValueError(msg) + return pooling_mode + + +POOLING_MODES_MAP = { + "pooling_mode_cls_token": PoolingMode.CLS, + "pooling_mode_mean_tokens": PoolingMode.MEAN, + "pooling_mode_max_tokens": PoolingMode.MAX, + "pooling_mode_mean_sqrt_len_tokens": PoolingMode.MEAN_SQRT_LEN, + "pooling_mode_weightedmean_tokens": PoolingMode.WEIGHTED_MEAN, + "pooling_mode_lasttoken": PoolingMode.LAST_TOKEN, +} + +INVERSE_POOLING_MODES_MAP = {mode: name for name, mode in POOLING_MODES_MAP.items()} + + +class HFPoolingMode: + """ + Gets the pooling mode of the model from the Hugging Face Hub. + """ + + @staticmethod + def get_pooling_mode(model: str, token: Optional[Secret] = None) -> Optional[PoolingMode]: + """ + Gets the pooling mode of the model from the Hugging Face Hub. + + :param model: + The model to get the pooling mode for. + :param token: + The HuggingFace token to use as HTTP bearer authorization. + :returns: + The pooling mode. + """ + try: + pooling_config_path = hf_hub_download(repo_id=model, token=token, filename="1_Pooling/config.json") + + with open(pooling_config_path) as f: + pooling_config = json.load(f) + + # Filter only those keys that start with "pooling_mode" and are True + true_pooling_modes = [ + key for key, value in pooling_config.items() if key.startswith("pooling_mode") and value + ] + + # If exactly one True pooling mode is found, return it + if len(true_pooling_modes) == 1: + pooling_mode_from_config = true_pooling_modes[0] + pooling_mode = POOLING_MODES_MAP.get(pooling_mode_from_config) + # If no True pooling modes or more than one True pooling mode is found, return None + else: + pooling_mode = None + return pooling_mode + except Exception as e: + msg = f"An error occurred while inferring the pooling mode from the model config: {e}" + raise ValueError(msg) from e + + +class Pooling: + """ + Class to manage pooling of the embeddings. + + :param pooling_mode: The pooling mode to use. + :param attention_mask: The attention mask of the tokenized text. + :param model_output: The output of the embedding model. + """ + + def __init__(self, pooling_mode: PoolingMode, attention_mask: torch.tensor, model_output: torch.tensor): + self.pooling_mode = pooling_mode + self.attention_mask = attention_mask + self.model_output = model_output + + def pool_embeddings(self) -> torch.tensor: + """ + Perform pooling on the output of the embedding model. + + :param pooling_mode: The pooling mode to use. + :param attention_mask: The attention mask of the tokenized text. + :param model_output: The output of the embedding model. + :return: The embeddings of the text after pooling. + """ + pooling_func_map = { + INVERSE_POOLING_MODES_MAP[self.pooling_mode]: True, + } + # By default, sentence-transformers uses mean pooling + # If multiple pooling methods are specified, the output dimension of the embeddings is scaled by the number of + # pooling methods selected + if self.pooling_mode != PoolingMode.MEAN: + pooling_func_map[INVERSE_POOLING_MODES_MAP[PoolingMode.MEAN]] = False + + # First element of model_output contains all token embeddings + token_embeddings = self.model_output[0] + word_embedding_dimension = token_embeddings.size(dim=2) + pooling = PoolingLayer(word_embedding_dimension=word_embedding_dimension, **pooling_func_map) + features = {"token_embeddings": token_embeddings, "attention_mask": self.attention_mask} + pooled_outputs = pooling.forward(features) + embeddings = pooled_outputs["sentence_embedding"] + + return embeddings diff --git a/integrations/optimum/tests/__init__.py b/integrations/optimum/tests/__init__.py new file mode 100644 index 000000000..6b5e14dc1 --- /dev/null +++ b/integrations/optimum/tests/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 diff --git a/integrations/optimum/tests/test_optimum_backend.py b/integrations/optimum/tests/test_optimum_backend.py new file mode 100644 index 000000000..8ef61fd37 --- /dev/null +++ b/integrations/optimum/tests/test_optimum_backend.py @@ -0,0 +1,32 @@ +import pytest +from haystack_integrations.components.embedders.optimum_backend import OptimumEmbeddingBackend +from haystack_integrations.components.embedders.pooling import PoolingMode + + +@pytest.fixture +def backend(): + model = "sentence-transformers/all-mpnet-base-v2" + model_kwargs = {"model_id": model} + backend = OptimumEmbeddingBackend(model=model, model_kwargs=model_kwargs, token=None) + return backend + + +class TestOptimumBackend: + def test_embed_output_order(self, backend): + texts_to_embed = ["short text", "text that is longer than the other", "medium length text"] + embeddings = backend.embed(texts_to_embed, normalize_embeddings=False, pooling_mode=PoolingMode.MEAN) + + # Compute individual embeddings in order + expected_embeddings = [] + for text in texts_to_embed: + expected_embeddings.append(backend.embed(text, normalize_embeddings=False, pooling_mode=PoolingMode.MEAN)) + + # Assert that the embeddings are in the same order + assert embeddings == expected_embeddings + + def test_run_pooling_modes(self, backend): + for pooling_mode in PoolingMode: + embedding = backend.embed("test text", normalize_embeddings=False, pooling_mode=pooling_mode) + + assert len(embedding) == 768 + assert all(isinstance(x, float) for x in embedding) diff --git a/integrations/optimum/tests/test_optimum_document_embedder.py b/integrations/optimum/tests/test_optimum_document_embedder.py new file mode 100644 index 000000000..f61fea1d3 --- /dev/null +++ b/integrations/optimum/tests/test_optimum_document_embedder.py @@ -0,0 +1,288 @@ +from unittest.mock import MagicMock, patch + +import pytest +from haystack.dataclasses import Document +from haystack.utils.auth import Secret +from haystack_integrations.components.embedders import OptimumDocumentEmbedder +from haystack_integrations.components.embedders.pooling import PoolingMode +from huggingface_hub.utils import RepositoryNotFoundError + + +@pytest.fixture +def mock_check_valid_model(): + with patch( + "haystack_integrations.components.embedders.optimum_document_embedder.check_valid_model", + MagicMock(return_value=None), + ) as mock: + yield mock + + +@pytest.fixture +def mock_get_pooling_mode(): + with patch( + "haystack_integrations.components.embedders.optimum_text_embedder.HFPoolingMode.get_pooling_mode", + MagicMock(return_value=PoolingMode.MEAN), + ) as mock: + yield mock + + +class TestOptimumDocumentEmbedder: + def test_init_default(self, monkeypatch, mock_check_valid_model, mock_get_pooling_mode): # noqa: ARG002 + monkeypatch.setenv("HF_API_TOKEN", "fake-api-token") + embedder = OptimumDocumentEmbedder() + + assert embedder.model == "sentence-transformers/all-mpnet-base-v2" + assert embedder.token == Secret.from_env_var("HF_API_TOKEN", strict=False) + assert embedder.prefix == "" + assert embedder.suffix == "" + assert embedder.normalize_embeddings is True + assert embedder.onnx_execution_provider == "CPUExecutionProvider" + assert embedder.pooling_mode == PoolingMode.MEAN + assert embedder.batch_size == 32 + assert embedder.progress_bar is True + assert embedder.meta_fields_to_embed == [] + assert embedder.embedding_separator == "\n" + assert embedder.model_kwargs == { + "model_id": "sentence-transformers/all-mpnet-base-v2", + "provider": "CPUExecutionProvider", + "use_auth_token": "fake-api-token", + } + + def test_init_with_parameters(self, mock_check_valid_model): # noqa: ARG002 + embedder = OptimumDocumentEmbedder( + model="sentence-transformers/all-minilm-l6-v2", + token=Secret.from_token("fake-api-token"), + prefix="prefix", + suffix="suffix", + batch_size=64, + progress_bar=False, + meta_fields_to_embed=["test_field"], + embedding_separator=" | ", + normalize_embeddings=False, + pooling_mode="max", + onnx_execution_provider="CUDAExecutionProvider", + model_kwargs={"trust_remote_code": True}, + ) + + assert embedder.model == "sentence-transformers/all-minilm-l6-v2" + assert embedder.token == Secret.from_token("fake-api-token") + assert embedder.prefix == "prefix" + assert embedder.suffix == "suffix" + assert embedder.batch_size == 64 + assert embedder.progress_bar is False + assert embedder.meta_fields_to_embed == ["test_field"] + assert embedder.embedding_separator == " | " + assert embedder.normalize_embeddings is False + assert embedder.onnx_execution_provider == "CUDAExecutionProvider" + assert embedder.pooling_mode == PoolingMode.MAX + assert embedder.model_kwargs == { + "trust_remote_code": True, + "model_id": "sentence-transformers/all-minilm-l6-v2", + "provider": "CUDAExecutionProvider", + "use_auth_token": "fake-api-token", + } + + def test_to_dict(self, mock_check_valid_model, mock_get_pooling_mode): # noqa: ARG002 + component = OptimumDocumentEmbedder() + data = component.to_dict() + + assert data == { + "type": "haystack_integrations.components.embedders.optimum_document_embedder.OptimumDocumentEmbedder", + "init_parameters": { + "model": "sentence-transformers/all-mpnet-base-v2", + "token": {"env_vars": ["HF_API_TOKEN"], "strict": False, "type": "env_var"}, + "prefix": "", + "suffix": "", + "batch_size": 32, + "progress_bar": True, + "meta_fields_to_embed": [], + "embedding_separator": "\n", + "normalize_embeddings": True, + "onnx_execution_provider": "CPUExecutionProvider", + "pooling_mode": "mean", + "model_kwargs": { + "model_id": "sentence-transformers/all-mpnet-base-v2", + "provider": "CPUExecutionProvider", + "use_auth_token": None, + }, + }, + } + + def test_to_dict_with_custom_init_parameters(self, mock_check_valid_model, mock_get_pooling_mode): # noqa: ARG002 + component = OptimumDocumentEmbedder( + model="sentence-transformers/all-minilm-l6-v2", + token=Secret.from_env_var("ENV_VAR", strict=False), + prefix="prefix", + suffix="suffix", + batch_size=64, + progress_bar=False, + meta_fields_to_embed=["test_field"], + embedding_separator=" | ", + normalize_embeddings=False, + onnx_execution_provider="CUDAExecutionProvider", + pooling_mode="max", + model_kwargs={"trust_remote_code": True}, + ) + data = component.to_dict() + + assert data == { + "type": "haystack_integrations.components.embedders.optimum_document_embedder.OptimumDocumentEmbedder", + "init_parameters": { + "model": "sentence-transformers/all-minilm-l6-v2", + "token": {"env_vars": ["ENV_VAR"], "strict": False, "type": "env_var"}, + "prefix": "prefix", + "suffix": "suffix", + "batch_size": 64, + "progress_bar": False, + "meta_fields_to_embed": ["test_field"], + "embedding_separator": " | ", + "normalize_embeddings": False, + "onnx_execution_provider": "CUDAExecutionProvider", + "pooling_mode": "max", + "model_kwargs": { + "trust_remote_code": True, + "model_id": "sentence-transformers/all-minilm-l6-v2", + "provider": "CUDAExecutionProvider", + "use_auth_token": None, + }, + }, + } + + def test_initialize_with_invalid_model(self, mock_check_valid_model): + mock_check_valid_model.side_effect = RepositoryNotFoundError("Invalid model id") + with pytest.raises(RepositoryNotFoundError): + OptimumDocumentEmbedder(model="invalid_model_id") + + def test_initialize_with_invalid_pooling_mode(self, mock_check_valid_model): # noqa: ARG002 + mock_get_pooling_mode.side_effect = ValueError("Invalid pooling mode") + with pytest.raises(ValueError): + OptimumDocumentEmbedder( + model="sentence-transformers/all-mpnet-base-v2", pooling_mode="Invalid_pooling_mode" + ) + + def test_infer_pooling_mode_from_str(self): + """ + Test that the pooling mode is correctly inferred from a string. + The pooling mode is "mean" as per the model config. + """ + for pooling_mode in PoolingMode: + embedder = OptimumDocumentEmbedder( + model="sentence-transformers/all-minilm-l6-v2", + pooling_mode=pooling_mode.value, + ) + + assert embedder.model == "sentence-transformers/all-minilm-l6-v2" + assert embedder.pooling_mode == pooling_mode + + @pytest.mark.integration + def test_default_pooling_mode_when_config_not_found(self, mock_check_valid_model): # noqa: ARG002 + with pytest.raises(ValueError): + OptimumDocumentEmbedder( + model="embedding_model_finetuned", + pooling_mode=None, + ) + + @pytest.mark.integration + def test_infer_pooling_mode_from_hf(self): + embedder = OptimumDocumentEmbedder( + model="sentence-transformers/all-minilm-l6-v2", + pooling_mode=None, + ) + + assert embedder.model == "sentence-transformers/all-minilm-l6-v2" + assert embedder.pooling_mode == PoolingMode.MEAN + + def test_prepare_texts_to_embed_w_metadata(self, mock_check_valid_model): # noqa: ARG002 + documents = [ + Document(content=f"document number {i}: content", meta={"meta_field": f"meta_value {i}"}) for i in range(5) + ] + + embedder = OptimumDocumentEmbedder( + model="sentence-transformers/all-minilm-l6-v2", + meta_fields_to_embed=["meta_field"], + embedding_separator=" | ", + pooling_mode="mean", + ) + + prepared_texts = embedder._prepare_texts_to_embed(documents) + + assert prepared_texts == [ + "meta_value 0 | document number 0: content", + "meta_value 1 | document number 1: content", + "meta_value 2 | document number 2: content", + "meta_value 3 | document number 3: content", + "meta_value 4 | document number 4: content", + ] + + def test_prepare_texts_to_embed_w_suffix(self, mock_check_valid_model): # noqa: ARG002 + documents = [Document(content=f"document number {i}") for i in range(5)] + + embedder = OptimumDocumentEmbedder( + model="sentence-transformers/all-minilm-l6-v2", + prefix="my_prefix ", + suffix=" my_suffix", + pooling_mode="mean", + ) + + prepared_texts = embedder._prepare_texts_to_embed(documents) + + assert prepared_texts == [ + "my_prefix document number 0 my_suffix", + "my_prefix document number 1 my_suffix", + "my_prefix document number 2 my_suffix", + "my_prefix document number 3 my_suffix", + "my_prefix document number 4 my_suffix", + ] + + def test_run_wrong_input_format(self, mock_check_valid_model): # noqa: ARG002 + embedder = OptimumDocumentEmbedder(model="sentence-transformers/all-mpnet-base-v2", pooling_mode="mean") + embedder.warm_up() + # wrong formats + string_input = "text" + list_integers_input = [1, 2, 3] + + with pytest.raises(TypeError, match="OptimumDocumentEmbedder expects a list of Documents as input"): + embedder.run(documents=string_input) + + with pytest.raises(TypeError, match="OptimumDocumentEmbedder expects a list of Documents as input"): + embedder.run(documents=list_integers_input) + + def test_run_on_empty_list(self, mock_check_valid_model): # noqa: ARG002 + embedder = OptimumDocumentEmbedder( + model="sentence-transformers/all-mpnet-base-v2", + ) + embedder.warm_up() + empty_list_input = [] + result = embedder.run(documents=empty_list_input) + + assert result["documents"] is not None + assert not result["documents"] # empty list + + @pytest.mark.integration + def test_run(self): + docs = [ + Document(content="I love cheese", meta={"topic": "Cuisine"}), + Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}), + ] + + embedder = OptimumDocumentEmbedder( + model="sentence-transformers/all-mpnet-base-v2", + prefix="prefix ", + suffix=" suffix", + meta_fields_to_embed=["topic"], + embedding_separator=" | ", + batch_size=1, + ) + embedder.warm_up() + + result = embedder.run(documents=docs) + + documents_with_embeddings = result["documents"] + + assert isinstance(documents_with_embeddings, list) + assert len(documents_with_embeddings) == len(docs) + for doc in documents_with_embeddings: + assert isinstance(doc, Document) + assert isinstance(doc.embedding, list) + assert len(doc.embedding) == 768 + assert all(isinstance(x, float) for x in doc.embedding) diff --git a/integrations/optimum/tests/test_optimum_text_embedder.py b/integrations/optimum/tests/test_optimum_text_embedder.py new file mode 100644 index 000000000..9932d1dbf --- /dev/null +++ b/integrations/optimum/tests/test_optimum_text_embedder.py @@ -0,0 +1,193 @@ +from unittest.mock import MagicMock, patch + +import pytest +from haystack.utils.auth import Secret +from haystack_integrations.components.embedders import OptimumTextEmbedder +from haystack_integrations.components.embedders.pooling import PoolingMode +from huggingface_hub.utils import RepositoryNotFoundError + + +@pytest.fixture +def mock_check_valid_model(): + with patch( + "haystack_integrations.components.embedders.optimum_text_embedder.check_valid_model", + MagicMock(return_value=None), + ) as mock: + yield mock + + +@pytest.fixture +def mock_get_pooling_mode(): + with patch( + "haystack_integrations.components.embedders.optimum_text_embedder.HFPoolingMode.get_pooling_mode", + MagicMock(return_value=PoolingMode.MEAN), + ) as mock: + yield mock + + +class TestOptimumTextEmbedder: + def test_init_default(self, monkeypatch, mock_check_valid_model, mock_get_pooling_mode): # noqa: ARG002 + monkeypatch.setenv("HF_API_TOKEN", "fake-api-token") + embedder = OptimumTextEmbedder() + + assert embedder.model == "sentence-transformers/all-mpnet-base-v2" + assert embedder.token == Secret.from_env_var("HF_API_TOKEN", strict=False) + assert embedder.prefix == "" + assert embedder.suffix == "" + assert embedder.normalize_embeddings is True + assert embedder.onnx_execution_provider == "CPUExecutionProvider" + assert embedder.pooling_mode == PoolingMode.MEAN + assert embedder.model_kwargs == { + "model_id": "sentence-transformers/all-mpnet-base-v2", + "provider": "CPUExecutionProvider", + "use_auth_token": "fake-api-token", + } + + def test_init_with_parameters(self, mock_check_valid_model): # noqa: ARG002 + embedder = OptimumTextEmbedder( + model="sentence-transformers/all-minilm-l6-v2", + token=Secret.from_token("fake-api-token"), + prefix="prefix", + suffix="suffix", + normalize_embeddings=False, + pooling_mode="max", + onnx_execution_provider="CUDAExecutionProvider", + model_kwargs={"trust_remote_code": True}, + ) + + assert embedder.model == "sentence-transformers/all-minilm-l6-v2" + assert embedder.token == Secret.from_token("fake-api-token") + assert embedder.prefix == "prefix" + assert embedder.suffix == "suffix" + assert embedder.normalize_embeddings is False + assert embedder.onnx_execution_provider == "CUDAExecutionProvider" + assert embedder.pooling_mode == PoolingMode.MAX + assert embedder.model_kwargs == { + "trust_remote_code": True, + "model_id": "sentence-transformers/all-minilm-l6-v2", + "provider": "CUDAExecutionProvider", + "use_auth_token": "fake-api-token", + } + + def test_to_dict(self, mock_check_valid_model, mock_get_pooling_mode): # noqa: ARG002 + component = OptimumTextEmbedder() + data = component.to_dict() + + assert data == { + "type": "haystack_integrations.components.embedders.optimum_text_embedder.OptimumTextEmbedder", + "init_parameters": { + "model": "sentence-transformers/all-mpnet-base-v2", + "token": {"env_vars": ["HF_API_TOKEN"], "strict": False, "type": "env_var"}, + "prefix": "", + "suffix": "", + "normalize_embeddings": True, + "onnx_execution_provider": "CPUExecutionProvider", + "pooling_mode": "mean", + "model_kwargs": { + "model_id": "sentence-transformers/all-mpnet-base-v2", + "provider": "CPUExecutionProvider", + "use_auth_token": None, + }, + }, + } + + def test_to_dict_with_custom_init_parameters(self, mock_check_valid_model): # noqa: ARG002 + component = OptimumTextEmbedder( + model="sentence-transformers/all-minilm-l6-v2", + token=Secret.from_env_var("ENV_VAR", strict=False), + prefix="prefix", + suffix="suffix", + normalize_embeddings=False, + onnx_execution_provider="CUDAExecutionProvider", + pooling_mode="max", + model_kwargs={"trust_remote_code": True}, + ) + data = component.to_dict() + + assert data == { + "type": "haystack_integrations.components.embedders.optimum_text_embedder.OptimumTextEmbedder", + "init_parameters": { + "model": "sentence-transformers/all-minilm-l6-v2", + "token": {"env_vars": ["ENV_VAR"], "strict": False, "type": "env_var"}, + "prefix": "prefix", + "suffix": "suffix", + "normalize_embeddings": False, + "onnx_execution_provider": "CUDAExecutionProvider", + "pooling_mode": "max", + "model_kwargs": { + "trust_remote_code": True, + "model_id": "sentence-transformers/all-minilm-l6-v2", + "provider": "CUDAExecutionProvider", + "use_auth_token": None, + }, + }, + } + + def test_initialize_with_invalid_model(self, mock_check_valid_model): + mock_check_valid_model.side_effect = RepositoryNotFoundError("Invalid model id") + with pytest.raises(RepositoryNotFoundError): + OptimumTextEmbedder(model="invalid_model_id", pooling_mode="max") + + def test_initialize_with_invalid_pooling_mode(self, mock_check_valid_model): # noqa: ARG002 + mock_get_pooling_mode.side_effect = ValueError("Invalid pooling mode") + with pytest.raises(ValueError): + OptimumTextEmbedder(model="sentence-transformers/all-mpnet-base-v2", pooling_mode="Invalid_pooling_mode") + + def test_infer_pooling_mode_from_str(self): + """ + Test that the pooling mode is correctly inferred from a string. + The pooling mode is "mean" as per the model config. + """ + for pooling_mode in PoolingMode: + embedder = OptimumTextEmbedder( + model="sentence-transformers/all-minilm-l6-v2", + pooling_mode=pooling_mode.value, + ) + + assert embedder.model == "sentence-transformers/all-minilm-l6-v2" + assert embedder.pooling_mode == pooling_mode + + @pytest.mark.integration + def test_default_pooling_mode_when_config_not_found(self, mock_check_valid_model): # noqa: ARG002 + with pytest.raises(ValueError): + OptimumTextEmbedder( + model="embedding_model_finetuned", + pooling_mode=None, + ) + + @pytest.mark.integration + def test_infer_pooling_mode_from_hf(self): + embedder = OptimumTextEmbedder( + model="sentence-transformers/all-minilm-l6-v2", + pooling_mode=None, + ) + + assert embedder.model == "sentence-transformers/all-minilm-l6-v2" + assert embedder.pooling_mode == PoolingMode.MEAN + + def test_run_wrong_input_format(self, mock_check_valid_model): # noqa: ARG002 + embedder = OptimumTextEmbedder( + model="sentence-transformers/all-mpnet-base-v2", + token=Secret.from_token("fake-api-token"), + pooling_mode="mean", + ) + embedder.warm_up() + + list_integers_input = [1, 2, 3] + + with pytest.raises(TypeError, match="OptimumTextEmbedder expects a string as an input"): + embedder.run(text=list_integers_input) + + @pytest.mark.integration + def test_run(self): + embedder = OptimumTextEmbedder( + model="sentence-transformers/all-mpnet-base-v2", + prefix="prefix ", + suffix=" suffix", + ) + embedder.warm_up() + + result = embedder.run(text="The food was delicious") + + assert len(result["embedding"]) == 768 + assert all(isinstance(x, float) for x in result["embedding"]) From af346c537156e94e47802389c247ac1a7b565c82 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 21 Feb 2024 14:58:08 +0100 Subject: [PATCH 2/7] chore: Update Cohere integration to use new generic callable (de)serializers for their callback handlers (#453) * (de)serialize callback properly * lint * check for None --- .../components/generators/cohere/generator.py | 31 ++++--------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/integrations/cohere/src/haystack_integrations/components/generators/cohere/generator.py b/integrations/cohere/src/haystack_integrations/components/generators/cohere/generator.py index 4927839d2..4811d3581 100644 --- a/integrations/cohere/src/haystack_integrations/components/generators/cohere/generator.py +++ b/integrations/cohere/src/haystack_integrations/components/generators/cohere/generator.py @@ -2,10 +2,10 @@ # # SPDX-License-Identifier: Apache-2.0 import logging -import sys from typing import Any, Callable, Dict, List, Optional, cast -from haystack import DeserializationError, component, default_from_dict, default_to_dict +from haystack import component, default_from_dict, default_to_dict +from haystack.components.generators.utils import deserialize_callback_handler, serialize_callback_handler from haystack.dataclasses import StreamingChunk from haystack.utils import Secret, deserialize_secrets_inplace @@ -89,19 +89,10 @@ def to_dict(self) -> Dict[str, Any]: """ Serialize this component to a dictionary. """ - if self.streaming_callback: - module = self.streaming_callback.__module__ - if module == "builtins": - callback_name = self.streaming_callback.__name__ - else: - callback_name = f"{module}.{self.streaming_callback.__name__}" - else: - callback_name = None - return default_to_dict( self, model=self.model, - streaming_callback=callback_name, + streaming_callback=serialize_callback_handler(self.streaming_callback) if self.streaming_callback else None, api_base_url=self.api_base_url, api_key=self.api_key.to_dict(), **self.model_parameters, @@ -114,20 +105,10 @@ def from_dict(cls, data: Dict[str, Any]) -> "CohereGenerator": """ init_params = data.get("init_parameters", {}) deserialize_secrets_inplace(init_params, ["api_key"]) - streaming_callback = None if "streaming_callback" in init_params and init_params["streaming_callback"] is not None: - parts = init_params["streaming_callback"].split(".") - module_name = ".".join(parts[:-1]) - function_name = parts[-1] - module = sys.modules.get(module_name, None) - if not module: - msg = f"Could not locate the module of the streaming callback: {module_name}" - raise DeserializationError(msg) - streaming_callback = getattr(module, function_name, None) - if not streaming_callback: - msg = f"Could not locate the streaming callback: {function_name}" - raise DeserializationError(msg) - data["init_parameters"]["streaming_callback"] = streaming_callback + data["init_parameters"]["streaming_callback"] = deserialize_callback_handler( + init_params["streaming_callback"] + ) return default_from_dict(cls, data) @component.output_types(replies=List[str], meta=List[Dict[str, Any]]) From 0ccf07af3335df40009ba62fb77bc6059057a9ab Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 21 Feb 2024 15:00:18 +0100 Subject: [PATCH 3/7] chore: Update Amazon Bedrock integration to use new generic callable (de)serializers for their callback handlers (#452) * serialize the callback * fix test * lint --- .../generators/amazon_bedrock/chat/chat_generator.py | 4 ++-- integrations/amazon_bedrock/tests/test_amazon_chat_bedrock.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py index 6ce671e68..2af99e685 100644 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py @@ -6,7 +6,7 @@ import boto3 from botocore.exceptions import BotoCoreError, ClientError from haystack import component, default_from_dict, default_to_dict -from haystack.components.generators.utils import deserialize_callback_handler +from haystack.components.generators.utils import deserialize_callback_handler, serialize_callback_handler from haystack.dataclasses import ChatMessage, StreamingChunk from haystack.utils.auth import Secret, deserialize_secrets_inplace @@ -248,7 +248,7 @@ def to_dict(self) -> Dict[str, Any]: model=self.model, stop_words=self.stop_words, generation_kwargs=self.model_adapter.generation_kwargs, - streaming_callback=self.streaming_callback, + streaming_callback=serialize_callback_handler(self.streaming_callback), ) @classmethod diff --git a/integrations/amazon_bedrock/tests/test_amazon_chat_bedrock.py b/integrations/amazon_bedrock/tests/test_amazon_chat_bedrock.py index 574aab5cc..86a610811 100644 --- a/integrations/amazon_bedrock/tests/test_amazon_chat_bedrock.py +++ b/integrations/amazon_bedrock/tests/test_amazon_chat_bedrock.py @@ -58,7 +58,7 @@ def test_to_dict(mock_auto_tokenizer, mock_boto3_session, set_env_variables): "model": "anthropic.claude-v2", "generation_kwargs": {"temperature": 0.7}, "stop_words": [], - "streaming_callback": print_streaming_chunk, + "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk", }, } From ffe86a922549f30af6929187de666e21b1c8daec Mon Sep 17 00:00:00 2001 From: Ulises M <30765968+lbux@users.noreply.github.com> Date: Thu, 22 Feb 2024 00:00:26 -0800 Subject: [PATCH 4/7] Allow passing in required project_name (#445) * Allow passing in required project_name UpTrain requires a project_name when using its API whereas openai does not. The integration will crash if no project_name is included. This will allow for an eval declaration like so: evaluator = UpTrainEvaluator( metric=UpTrainMetric.METRIC, api="uptrain", project_name="uptrain-project", ) * comments + assert test * Update integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain/evaluator.py * fix tests --------- Co-authored-by: Massimiliano Pippi --- integrations/uptrain/pyproject.toml | 7 ++++++- .../components/evaluators/uptrain/evaluator.py | 10 +++++++++- integrations/uptrain/tests/test_evaluator.py | 11 ++++++++++- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/integrations/uptrain/pyproject.toml b/integrations/uptrain/pyproject.toml index a7b434acb..c2e7260eb 100644 --- a/integrations/uptrain/pyproject.toml +++ b/integrations/uptrain/pyproject.toml @@ -21,7 +21,12 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -dependencies = ["haystack-ai>=2.0.0b6", "uptrain>=0.5"] +dependencies = [ + "haystack-ai>=2.0.0b6", + "uptrain>=0.5", + "nest_asyncio", + "litellm", +] [project.urls] Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/uptrain" diff --git a/integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain/evaluator.py b/integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain/evaluator.py index 4e7307602..af543d726 100644 --- a/integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain/evaluator.py +++ b/integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain/evaluator.py @@ -36,6 +36,7 @@ def __init__( api: str = "openai", api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"), api_params: Optional[Dict[str, Any]] = None, + project_name: Optional[str] = None, ): """ Construct a new UpTrain evaluator. @@ -52,6 +53,8 @@ def __init__( The API key to use. :param api_params: Additional parameters to pass to the API client. + :param project_name: + Name of the project required when using UpTrain API. """ self.metric = metric if isinstance(metric, UpTrainMetric) else UpTrainMetric.from_str(metric) self.metric_params = metric_params @@ -59,6 +62,7 @@ def __init__( self.api = api self.api_key = api_key self.api_params = api_params + self.project_name = project_name self._init_backend() expected_inputs = self.descriptor.input_parameters @@ -112,7 +116,7 @@ def run(self, **inputs) -> Dict[str, Any]: if isinstance(self._backend_client, EvalLLM): results = self._backend_client.evaluate(**eval_args) else: - results = self._backend_client.log_and_evaluate(**eval_args) + results = self._backend_client.log_and_evaluate(**eval_args, project_name=self.project_name) OutputConverters.validate_outputs(results) converted_results = [ @@ -144,6 +148,7 @@ def check_serializable(obj: Any): api=self.api, api_key=self.api_key.to_dict(), api_params=self.api_params, + project_name=self.project_name, ) @classmethod @@ -193,6 +198,9 @@ def _init_backend(self): if self.api == "openai": backend_client = EvalLLM(openai_api_key=api_key) elif self.api == "uptrain": + if not self.project_name: + msg = "project_name not provided. UpTrain API requires a project name." + raise ValueError(msg) backend_client = APIClient(uptrain_api_key=api_key) self._backend_metric = backend_metric diff --git a/integrations/uptrain/tests/test_evaluator.py b/integrations/uptrain/tests/test_evaluator.py index 0f2eaba4d..1f9b245e3 100644 --- a/integrations/uptrain/tests/test_evaluator.py +++ b/integrations/uptrain/tests/test_evaluator.py @@ -112,7 +112,10 @@ def test_evaluator_api(monkeypatch): assert eval.api_key == Secret.from_env_var("OPENAI_API_KEY") eval = UpTrainEvaluator( - UpTrainMetric.RESPONSE_COMPLETENESS, api="uptrain", api_key=Secret.from_env_var("UPTRAIN_API_KEY") + UpTrainMetric.RESPONSE_COMPLETENESS, + api="uptrain", + api_key=Secret.from_env_var("UPTRAIN_API_KEY"), + project_name="test", ) assert eval.api == "uptrain" assert eval.api_key == Secret.from_env_var("UPTRAIN_API_KEY") @@ -156,6 +159,7 @@ def test_evaluator_serde(os_environ_get): "api": "uptrain", "api_key": Secret.from_env_var("ENV_VAR", strict=False), "api_params": {"eval_name": "test"}, + "project_name": "test", } eval = UpTrainEvaluator(**init_params) serde_data = eval.to_dict() @@ -166,6 +170,7 @@ def test_evaluator_serde(os_environ_get): assert eval.api_key == new_eval.api_key assert eval.metric_params == new_eval.metric_params assert eval.api_params == new_eval.api_params + assert eval.project_name == new_eval.project_name assert type(new_eval._backend_client) == type(eval._backend_client) assert type(new_eval._backend_metric) == type(eval._backend_metric) @@ -203,6 +208,7 @@ def test_evaluator_valid_inputs(metric, inputs, params): "api": "uptrain", "api_key": Secret.from_token("Aaa"), "api_params": None, + "project_name": "test", } eval = UpTrainEvaluator(**init_params) eval._backend_client = MockBackend([metric]) @@ -231,6 +237,7 @@ def test_evaluator_invalid_inputs(metric, inputs, error_string, params): "api": "uptrain", "api_key": Secret.from_token("Aaa"), "api_params": None, + "project_name": "test", } eval = UpTrainEvaluator(**init_params) eval._backend_client = MockBackend([metric]) @@ -307,6 +314,7 @@ def test_evaluator_outputs(metric, inputs, expected_outputs, metric_params): "api": "uptrain", "api_key": Secret.from_token("Aaa"), "api_params": None, + "project_name": "test", } eval = UpTrainEvaluator(**init_params) eval._backend_client = MockBackend([metric]) @@ -326,6 +334,7 @@ def test_evaluator_outputs(metric, inputs, expected_outputs, metric_params): # This integration test validates the evaluator by running it against the # OpenAI API. It is parameterized by the metric, the inputs to the evalutor # and the metric parameters. +@pytest.mark.integration @pytest.mark.skipif("OPENAI_API_KEY" not in os.environ, reason="OPENAI_API_KEY not set") @pytest.mark.parametrize( "metric, inputs, metric_params", From 48958557108dafa4ca2521f3ae9fd12b992b18e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tuana=20=C3=87elik?= Date: Thu, 22 Feb 2024 10:07:10 +0100 Subject: [PATCH 5/7] pinning updtrain version (#465) --- integrations/uptrain/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/uptrain/pyproject.toml b/integrations/uptrain/pyproject.toml index c2e7260eb..0e9166adc 100644 --- a/integrations/uptrain/pyproject.toml +++ b/integrations/uptrain/pyproject.toml @@ -23,7 +23,7 @@ classifiers = [ ] dependencies = [ "haystack-ai>=2.0.0b6", - "uptrain>=0.5", + "uptrain==0.5.0", "nest_asyncio", "litellm", ] From e390da66c58d67282b99cd4debe3f2ef1b6b85d1 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Thu, 22 Feb 2024 10:23:37 +0100 Subject: [PATCH 6/7] more cleaning --- integrations/amazon_bedrock/tests/conftest.py | 10 +---- ...n_chat_bedrock.py => test_chat_bedrock.py} | 40 ++++--------------- ...st_amazon_bedrock.py => test_generator.py} | 22 +++------- 3 files changed, 14 insertions(+), 58 deletions(-) rename integrations/amazon_bedrock/tests/{test_amazon_chat_bedrock.py => test_chat_bedrock.py} (87%) rename integrations/amazon_bedrock/tests/{test_amazon_bedrock.py => test_generator.py} (98%) diff --git a/integrations/amazon_bedrock/tests/conftest.py b/integrations/amazon_bedrock/tests/conftest.py index 4c8ce688c..9406559bf 100644 --- a/integrations/amazon_bedrock/tests/conftest.py +++ b/integrations/amazon_bedrock/tests/conftest.py @@ -1,4 +1,4 @@ -from unittest.mock import MagicMock, patch +from unittest.mock import patch import pytest @@ -12,14 +12,6 @@ def set_env_variables(monkeypatch): monkeypatch.setenv("AWS_PROFILE", "some_fake_profile") -@pytest.fixture -def mock_auto_tokenizer(): - with patch("transformers.AutoTokenizer.from_pretrained", autospec=True) as mock_from_pretrained: - mock_tokenizer = MagicMock() - mock_from_pretrained.return_value = mock_tokenizer - yield mock_tokenizer - - # create a fixture with mocked boto3 client and session @pytest.fixture def mock_boto3_session(): diff --git a/integrations/amazon_bedrock/tests/test_amazon_chat_bedrock.py b/integrations/amazon_bedrock/tests/test_chat_bedrock.py similarity index 87% rename from integrations/amazon_bedrock/tests/test_amazon_chat_bedrock.py rename to integrations/amazon_bedrock/tests/test_chat_bedrock.py index 574aab5cc..a73916e0d 100644 --- a/integrations/amazon_bedrock/tests/test_amazon_chat_bedrock.py +++ b/integrations/amazon_bedrock/tests/test_chat_bedrock.py @@ -14,31 +14,7 @@ clazz = "haystack_integrations.components.generators.amazon_bedrock.chat.chat_generator.AmazonBedrockChatGenerator" - -@pytest.fixture -def mock_auto_tokenizer(): - with patch("transformers.AutoTokenizer.from_pretrained", autospec=True) as mock_from_pretrained: - mock_tokenizer = MagicMock() - mock_from_pretrained.return_value = mock_tokenizer - yield mock_tokenizer - - -# create a fixture with mocked boto3 client and session -@pytest.fixture -def mock_boto3_session(): - with patch("boto3.Session") as mock_client: - yield mock_client - - -@pytest.fixture -def mock_prompt_handler(): - with patch( - "haystack_integrations.components.generators.amazon_bedrock.handlers.DefaultPromptHandler" - ) as mock_prompt_handler: - yield mock_prompt_handler - - -def test_to_dict(mock_auto_tokenizer, mock_boto3_session, set_env_variables): +def test_to_dict(mock_boto3_session): """ Test that the to_dict method returns the correct dictionary without aws credentials """ @@ -65,7 +41,7 @@ def test_to_dict(mock_auto_tokenizer, mock_boto3_session, set_env_variables): assert generator.to_dict() == expected_dict -def test_from_dict(mock_auto_tokenizer, mock_boto3_session): +def test_from_dict(mock_boto3_session): """ Test that the from_dict method returns the correct object """ @@ -89,7 +65,7 @@ def test_from_dict(mock_auto_tokenizer, mock_boto3_session): assert generator.streaming_callback == print_streaming_chunk -def test_default_constructor(mock_auto_tokenizer, mock_boto3_session, set_env_variables): +def test_default_constructor(mock_boto3_session, set_env_variables): """ Test that the default constructor sets the correct values """ @@ -116,7 +92,7 @@ def test_default_constructor(mock_auto_tokenizer, mock_boto3_session, set_env_va ) -def test_constructor_with_generation_kwargs(mock_auto_tokenizer, mock_boto3_session): +def test_constructor_with_generation_kwargs(mock_boto3_session): """ Test that model_kwargs are correctly set in the constructor """ @@ -135,8 +111,7 @@ def test_constructor_with_empty_model(): AmazonBedrockChatGenerator(model="") -@pytest.mark.unit -def test_invoke_with_no_kwargs(mock_auto_tokenizer, mock_boto3_session): +def test_invoke_with_no_kwargs(mock_boto3_session): """ Test invoke raises an error if no messages are provided """ @@ -145,7 +120,6 @@ def test_invoke_with_no_kwargs(mock_auto_tokenizer, mock_boto3_session): layer.invoke() -@pytest.mark.unit @pytest.mark.parametrize( "model, expected_model_adapter", [ @@ -168,7 +142,7 @@ def test_get_model_adapter(model: str, expected_model_adapter: Optional[Type[Bed class TestAnthropicClaudeAdapter: - def test_prepare_body_with_default_params(self, mock_auto_tokenizer) -> None: + def test_prepare_body_with_default_params(self) -> None: layer = AnthropicClaudeChatAdapter(generation_kwargs={}) prompt = "Hello, how are you?" expected_body = { @@ -181,7 +155,7 @@ def test_prepare_body_with_default_params(self, mock_auto_tokenizer) -> None: assert body == expected_body - def test_prepare_body_with_custom_inference_params(self, mock_auto_tokenizer) -> None: + def test_prepare_body_with_custom_inference_params(self) -> None: layer = AnthropicClaudeChatAdapter(generation_kwargs={"temperature": 0.7, "top_p": 0.8, "top_k": 4}) prompt = "Hello, how are you?" expected_body = { diff --git a/integrations/amazon_bedrock/tests/test_amazon_bedrock.py b/integrations/amazon_bedrock/tests/test_generator.py similarity index 98% rename from integrations/amazon_bedrock/tests/test_amazon_bedrock.py rename to integrations/amazon_bedrock/tests/test_generator.py index e43cc94cf..1ab9ba3c8 100644 --- a/integrations/amazon_bedrock/tests/test_amazon_bedrock.py +++ b/integrations/amazon_bedrock/tests/test_generator.py @@ -14,8 +14,7 @@ ) -@pytest.mark.unit -def test_to_dict(mock_auto_tokenizer, mock_boto3_session, set_env_variables): +def test_to_dict(mock_boto3_session): """ Test that the to_dict method returns the correct dictionary without aws credentials """ @@ -40,8 +39,7 @@ def test_to_dict(mock_auto_tokenizer, mock_boto3_session, set_env_variables): assert generator.to_dict() == expected_dict -@pytest.mark.unit -def test_from_dict(mock_auto_tokenizer, mock_boto3_session, set_env_variables): +def test_from_dict(mock_boto3_session): """ Test that the from_dict method returns the correct object """ @@ -64,8 +62,7 @@ def test_from_dict(mock_auto_tokenizer, mock_boto3_session, set_env_variables): assert generator.model == "anthropic.claude-v2" -@pytest.mark.unit -def test_default_constructor(mock_auto_tokenizer, mock_boto3_session, set_env_variables): +def test_default_constructor(mock_boto3_session, set_env_variables): """ Test that the default constructor sets the correct values """ @@ -94,8 +91,7 @@ def test_default_constructor(mock_auto_tokenizer, mock_boto3_session, set_env_va ) -@pytest.mark.unit -def test_constructor_prompt_handler_initialized(mock_auto_tokenizer, mock_boto3_session, mock_prompt_handler): +def test_constructor_prompt_handler_initialized(mock_boto3_session, mock_prompt_handler): """ Test that the constructor sets the prompt_handler correctly, with the correct model_max_length for llama-2 """ @@ -104,8 +100,7 @@ def test_constructor_prompt_handler_initialized(mock_auto_tokenizer, mock_boto3_ assert layer.prompt_handler.model_max_length == 4096 -@pytest.mark.unit -def test_constructor_with_model_kwargs(mock_auto_tokenizer, mock_boto3_session): +def test_constructor_with_model_kwargs(mock_boto3_session): """ Test that model_kwargs are correctly set in the constructor """ @@ -116,7 +111,6 @@ def test_constructor_with_model_kwargs(mock_auto_tokenizer, mock_boto3_session): assert layer.model_adapter.model_kwargs["temperature"] == 0.7 -@pytest.mark.unit def test_constructor_with_empty_model(): """ Test that the constructor raises an error when the model is empty @@ -125,8 +119,7 @@ def test_constructor_with_empty_model(): AmazonBedrockGenerator(model="") -@pytest.mark.unit -def test_invoke_with_no_kwargs(mock_auto_tokenizer, mock_boto3_session): +def test_invoke_with_no_kwargs(mock_boto3_session): """ Test invoke raises an error if no prompt is provided """ @@ -135,7 +128,6 @@ def test_invoke_with_no_kwargs(mock_auto_tokenizer, mock_boto3_session): layer.invoke() -@pytest.mark.unit def test_short_prompt_is_not_truncated(mock_boto3_session): """ Test that a short prompt is not truncated @@ -166,7 +158,6 @@ def test_short_prompt_is_not_truncated(mock_boto3_session): assert prompt_after_resize == mock_prompt_text -@pytest.mark.unit def test_long_prompt_is_truncated(mock_boto3_session): """ Test that a long prompt is truncated @@ -201,7 +192,6 @@ def test_long_prompt_is_truncated(mock_boto3_session): assert prompt_after_resize == truncated_prompt_text -@pytest.mark.unit @pytest.mark.parametrize( "model, expected_model_adapter", [ From 274767df09823513814d0260c936a2a260ba9b20 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Thu, 22 Feb 2024 10:27:20 +0100 Subject: [PATCH 7/7] lint --- integrations/amazon_bedrock/tests/test_chat_bedrock.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/amazon_bedrock/tests/test_chat_bedrock.py b/integrations/amazon_bedrock/tests/test_chat_bedrock.py index 52b683c7f..0f7bced89 100644 --- a/integrations/amazon_bedrock/tests/test_chat_bedrock.py +++ b/integrations/amazon_bedrock/tests/test_chat_bedrock.py @@ -1,5 +1,4 @@ from typing import Optional, Type -from unittest.mock import MagicMock, patch import pytest from haystack.components.generators.utils import print_streaming_chunk @@ -14,6 +13,7 @@ clazz = "haystack_integrations.components.generators.amazon_bedrock.chat.chat_generator.AmazonBedrockChatGenerator" + def test_to_dict(mock_boto3_session): """ Test that the to_dict method returns the correct dictionary without aws credentials