From b80138e92d02b5f29d690f55e32abc79d1f8570d Mon Sep 17 00:00:00 2001 From: Alistair Rogers <36593376+AlistairLR112@users.noreply.github.com> Date: Wed, 3 Jan 2024 17:42:53 +0000 Subject: [PATCH] Ollama Integration (#132) * add ollama generator * add test skeleton * add __inits__ * add __inits__ * add __inits__ * add pyproject * add version * Add instructions for building local environment * Add integration tests using docker ollama service * Add integration test market in pyproject.toml * lint with black * Delete integrations/__init__.py * drop unused dunders * update pyproject based on elasticsearch integration * rename metadata to meta * Use full /generate URL from ollama in init * add docstrings and additional post arguments * add github action * change single to double quotes in tests * fix init paths * make timeout argument explicit in POST * lint with black * fix typo from jina to ollama in hatch lint * ignore pytest and haystack typing issues * correct type hint in output type * add assertion for replies and meta in tests * update labeler with ollama * try to install and run ollama wo docker * try to see if the issue with ports is related to concurrency * another try * better try to run ollama * Update ollama.yml * Update ollama.yml * simplify docker-compose * try to pull the model when the container is already running * add -d * Update ollama.yml * Delete integrations/ollama/docker-compose.yml * Update integrations/ollama/pyproject.toml Co-authored-by: Stefano Fiorucci * Add timeout to POST Request * Remove unpacking of dictionary arguments to pass mypy * Add docstring to run function * add test for init defaults, delete telemetry tests * drop use of dataclass for housing response data * modify type hints in protected methods * update readme with new docker testing regime * refactor post_args to json payload only * try using a github service for Ollama * modify post_args test to new json_payload * add ollama integration to general README * refinements * fix tests * rm unused fixture --------- Co-authored-by: Stefano Fiorucci --- .github/labeler.yml | 5 + .github/workflows/ollama.yml | 59 ++++++ README.md | 5 +- integrations/ollama/README.md | 39 ++++ integrations/ollama/pyproject.toml | 180 ++++++++++++++++++ .../ollama/src/ollama_haystack/__init__.py | 7 + .../ollama/src/ollama_haystack/generator.py | 102 ++++++++++ integrations/ollama/tests/__init__.py | 3 + integrations/ollama/tests/test_generator.py | 99 ++++++++++ 9 files changed, 497 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/ollama.yml create mode 100644 integrations/ollama/README.md create mode 100644 integrations/ollama/pyproject.toml create mode 100644 integrations/ollama/src/ollama_haystack/__init__.py create mode 100644 integrations/ollama/src/ollama_haystack/generator.py create mode 100644 integrations/ollama/tests/__init__.py create mode 100644 integrations/ollama/tests/test_generator.py diff --git a/.github/labeler.yml b/.github/labeler.yml index 7c8296625..b7c3ce98e 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -64,6 +64,11 @@ integration:unstructured-fileconverter: - any-glob-to-any-file: "integrations/unstructured/fileconverter/**/*" - any-glob-to-any-file: ".github/workflows/unstructured_fileconverter.yml" +integration:ollama: + - changed-files: + - any-glob-to-any-file: "integrations/ollama/**/*" + - any-glob-to-any-file: ".github/workflows/ollama.yml" + # Topics topic:CI: - changed-files: diff --git a/.github/workflows/ollama.yml b/.github/workflows/ollama.yml new file mode 100644 index 000000000..793943798 --- /dev/null +++ b/.github/workflows/ollama.yml @@ -0,0 +1,59 @@ +# This workflow comes from https://github.com/ofek/hatch-mypyc +# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml +name: Test / ollama + +on: + schedule: + - cron: "0 0 * * *" + pull_request: + paths: + - "integrations/ollama/**" + - ".github/workflows/ollama.yml" + +concurrency: + group: ollama-${{ github.head_ref }} + cancel-in-progress: true + +env: + PYTHONUNBUFFERED: "1" + FORCE_COLOR: "1" + LLM_FOR_TESTS: "orca-mini" + +jobs: + run: + name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + python-version: ["3.9","3.10","3.11"] + services: + ollama: + image: ollama/ollama:latest + ports: + - 11434:11434 + options: --name ollama + + steps: + - name: Pull the LLM in the Ollama service + run: docker exec ollama ollama pull ${{ env.LLM_FOR_TESTS }} + + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Hatch + run: pip install --upgrade hatch + + - name: Lint + working-directory: integrations/ollama + if: matrix.python-version == '3.9' + run: hatch run lint:all + + - name: Run tests + working-directory: integrations/ollama + run: hatch run cov diff --git a/README.md b/README.md index d9dfd3cc0..9e646dca8 100644 --- a/README.md +++ b/README.md @@ -73,5 +73,6 @@ deepset-haystack | [opensearch-haystack](integrations/opensearch/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/opensearch-haystack.svg)](https://pypi.org/project/opensearch-haystack) | [![Test / opensearch](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/opensearch.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/opensearch.yml) | | [qdrant-haystack](integrations/qdrant/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/qdrant-haystack.svg?color=orange)](https://pypi.org/project/qdrant-haystack) | [![Test / qdrant](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/qdrant.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/qdrant.yml) | | [unstructured-fileconverter-haystack](integrations/unstructured/fileconverter/) | File converter | [![PyPI - Version](https://img.shields.io/pypi/v/unstructured-fileconverter-haystack.svg)](https://pypi.org/project/unstructured-fileconverter-haystack) | [![Test / unstructured / fileconverter](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/unstructured_fileconverter.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/unstructured_fileconverter.yml) | -| [jina-haystack](integrations/jina/) | Embedder | [![PyPI - Version](https://img.shields.io/pypi/v/jina-haystack.svg)](https://pypi.org/project/jina-haystack) | [![Test / cohere](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/jina.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/jina.yml) | -| [pinecone-haystack](integrations/pinecone/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/pinecone-haystack.svg?color=orange)](https://pypi.org/project/pinecone-haystack) | [![Test / pinecone](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/pinecone.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/pinecone.yml) | +| [jina-haystack](integrations/jina/) | Embedder | [![PyPI - Version](https://img.shields.io/pypi/v/jina-haystack.svg)](https://pypi.org/project/jina-haystack) | [![Test / cohere](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/jina.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/jina.yml) +| [pinecone-haystack](integrations/pinecone/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/pinecone-haystack.svg?color=orange)](https://pypi.org/project/pinecone-haystack) | [![Test / pinecone](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/pinecone.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/pinecone.yml) | +| [ollama-haystack](integrations/ollama/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/ollama-haystack.svg?color=orange)](https://pypi.org/project/ollama-haystack) | [![Test / ollama](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/ollama.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/ollama.yml) | \ No newline at end of file diff --git a/integrations/ollama/README.md b/integrations/ollama/README.md new file mode 100644 index 000000000..c842cddf1 --- /dev/null +++ b/integrations/ollama/README.md @@ -0,0 +1,39 @@ +# ollama-haystack + +[![PyPI - Version](https://img.shields.io/pypi/v/ollama-haystack.svg)](https://pypi.org/project/ollama-haystack) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ollama-haystack.svg)](https://pypi.org/project/ollama-haystack) + +----- + +**Table of Contents** + +- [Installation](#installation) +- [License](#license) + +## Installation + +```console +pip install ollama-haystack +``` + +## License + +`ollama-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. + +## Testing + +To run tests first start a Docker container running Ollama and pull a model for integration testing +It's recommended to use the smallest model possible for testing purposes - see https://ollama.ai/library for a list that Ollama supportd + +```console +docker run -d -p 11434:11434 --name ollama ollama/ollama:latest +docker exec ollama ollama pull +``` + +Then run tests: + +```console +hatch run test +``` + +The default model used here is ``orca-mini`` \ No newline at end of file diff --git a/integrations/ollama/pyproject.toml b/integrations/ollama/pyproject.toml new file mode 100644 index 000000000..4505138d9 --- /dev/null +++ b/integrations/ollama/pyproject.toml @@ -0,0 +1,180 @@ +[build-system] +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" + +[project] +name = "ollama-haystack" +dynamic = ["version"] +description = 'An integration between the Ollama LLM framework and Haystack' +readme = "README.md" +requires-python = ">=3.8" +license = "Apache-2.0" +keywords = [] +authors = [ + { name = "Alistair Rogers", email = "alistairlr112@gmail.com" }, + { name = "Sachin Sachdeva", email = "emailforsachinsachdeva@gmail.com" }, + { name = "deepset GmbH", email = "info@deepset.ai" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Programming Language :: Python", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dependencies = ["haystack-ai", "requests"] + +[project.urls] +Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/ollama#readme" +Issues = "https://github.com/deepset-ai/haystack-core-integrations/issues" +Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/ollama" + +[tool.hatch.version] +source = "vcs" +tag-pattern = 'integrations\/ollama-v(?P.*)' + +[tool.hatch.version.raw-options] +root = "../.." +git_describe_command = 'git describe --tags --match="integrations/ollama-v[0-9]*"' + +[tool.hatch.envs.default] +dependencies = [ + "coverage[toml]>=6.5", + "pytest", +] +[tool.hatch.envs.default.scripts] +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +cov-report = [ + "- coverage combine", + "coverage report", +] +cov = [ + "test-cov", + "cov-report", +] + +[[tool.hatch.envs.all.matrix]] +python = ["3.8", "3.9", "3.10", "3.11", "3.12"] + + +[tool.hatch.envs.lint] +detached = true +dependencies = [ + "black>=23.1.0", + "mypy>=1.0.0", + "ruff>=0.0.243", +] + +[tool.hatch.envs.lint.scripts] +typing = "mypy --install-types --non-interactive {args:src/ollama_haystack tests}" +style = [ + "ruff {args:.}", + "black --check --diff {args:.}", +] +fmt = [ + "black {args:.}", + "ruff --fix {args:.}", + "style", +] +all = [ + "style", + "typing", +] + +[tool.hatch.metadata] +allow-direct-references = true + +[tool.ruff.isort] +known-first-party = ["ollama_haystack"] + +[tool.black] +target-version = ["py37"] +line-length = 120 +skip-string-normalization = true + +[tool.ruff] +target-version = "py37" +line-length = 120 +select = [ + "A", + "ARG", + "B", + "C", + "DTZ", + "E", + "EM", + "F", + "I", + "ICN", + "ISC", + "N", + "PLC", + "PLE", + "PLR", + "PLW", + "Q", + "RUF", + "S", + "T", + "TID", + "UP", + "W", + "YTT", +] +ignore = [ + # Allow non-abstract empty methods in abstract base classes + "B027", + # Ignore checks for possible passwords + "S105", "S106", "S107", + # Ignore complexity + "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915", +] +unfixable = [ + # Don't touch unused imports + "F401", +] + +[tool.ruff.flake8-tidy-imports] +ban-relative-imports = "all" + +[tool.ruff.per-file-ignores] +# Tests can use magic values, assertions, and relative imports +"tests/**/*" = ["PLR2004", "S101", "TID252"] + + +[tool.coverage.run] +source_pkgs = ["ollama_haystack", "tests"] +branch = true +parallel = true + + +[tool.coverage.paths] +ollama_haystack = ["src/ollama_haystack", "*/ollama-haystack/src/ollama_haystack"] +tests = ["tests", "*/ollama-haystack/tests"] + +[tool.coverage.report] +exclude_lines = [ + "no cov", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", +] + +[tool.pytest.ini_options] +markers = [ + "integration: marks tests as slow (deselect with '-m \"not integration\"')", +] +addopts = [ + "--import-mode=importlib", +] + +[[tool.mypy.overrides]] +module = [ + "haystack.*", + "pytest.*" +] +ignore_missing_imports = true diff --git a/integrations/ollama/src/ollama_haystack/__init__.py b/integrations/ollama/src/ollama_haystack/__init__.py new file mode 100644 index 000000000..8bbb69641 --- /dev/null +++ b/integrations/ollama/src/ollama_haystack/__init__.py @@ -0,0 +1,7 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from ollama_haystack.generator import OllamaGenerator + +__all__ = ["OllamaGenerator"] diff --git a/integrations/ollama/src/ollama_haystack/generator.py b/integrations/ollama/src/ollama_haystack/generator.py new file mode 100644 index 000000000..0c6d22391 --- /dev/null +++ b/integrations/ollama/src/ollama_haystack/generator.py @@ -0,0 +1,102 @@ +from typing import Any, Dict, List, Optional + +import requests +from haystack import component +from requests import Response + + +@component +class OllamaGenerator: + """ + Generator based on Ollama. Ollama is a library for easily running LLMs locally. + This component provides an interface to generate text using a LLM running in Ollama. + """ + + def __init__( + self, + model: str = "orca-mini", + url: str = "http://localhost:11434/api/generate", + generation_kwargs: Optional[Dict[str, Any]] = None, + system_prompt: Optional[str] = None, + template: Optional[str] = None, + raw: bool = False, + timeout: int = 30, + ): + """ + :param model: The name of the model to use. The model should be available in the running Ollama instance. + Default is "orca-mini". + :param url: The URL of the generation endpoint of a running Ollama instance. + Default is "http://localhost:11434/api/generate". + :param generation_kwargs: Optional arguments to pass to the Ollama generation endpoint, such as temperature, + top_p, etc. See the + [Ollama docs](https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values). + :param system_prompt: Optional system message (overrides what is defined in the Ollama Modelfile). + :param template: The full prompt template (overrides what is defined in the Ollama Modelfile). + :param raw: If True, no formatting will be applied to the prompt. You may choose to use the raw parameter + if you are specifying a full templated prompt in your request to the API. + :param timeout: The number of seconds before throwing a timeout error from the Ollama API. + Default is 30 seconds. + """ + self.timeout = timeout + self.raw = raw + self.template = template + self.system_prompt = system_prompt + self.model = model + self.url = url + self.generation_kwargs = generation_kwargs or {} + + def _create_json_payload(self, prompt: str, generation_kwargs=None) -> Dict[str, Any]: + """ + Returns A dictionary of JSON arguments for a POST request to an Ollama service + :param prompt: the prompt to generate a response for + :param generation_kwargs: + :return: A dictionary of arguments for a POST request to an Ollama service + """ + generation_kwargs = generation_kwargs or {} + return { + "prompt": prompt, + "model": self.model, + "stream": False, + "raw": self.raw, + "template": self.template, + "system": self.system_prompt, + "options": generation_kwargs, + } + + def _convert_to_haystack_response(self, ollama_response: Response) -> Dict[str, List[Any]]: + """ + Convert a response from the Ollama API to the required Haystack format + :param ollama_response: A response (requests library) from the Ollama API + :return: A dictionary of the returned responses and metadata + """ + resp_dict = ollama_response.json() + + replies = [resp_dict["response"]] + meta = {key: value for key, value in resp_dict.items() if key != "response"} + + return {"replies": replies, "meta": [meta]} + + @component.output_types(replies=List[str], metadata=List[Dict[str, Any]]) + def run( + self, + prompt: str, + generation_kwargs: Optional[Dict[str, Any]] = None, + ): + """ + Run an Ollama Model on the a given prompt. + :param prompt: The prompt to generate a response for. + :param generation_kwargs: Optional arguments to pass to the Ollama generation endpoint, such as temperature, + top_p, etc. See the + [Ollama docs](https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values). + :return: A dictionary of the response and returned metadata + """ + generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})} + + json_payload = self._create_json_payload(prompt, generation_kwargs) + + response = requests.post(url=self.url, json=json_payload, timeout=self.timeout) + + # throw error on unsuccessful response + response.raise_for_status() + + return self._convert_to_haystack_response(response) diff --git a/integrations/ollama/tests/__init__.py b/integrations/ollama/tests/__init__.py new file mode 100644 index 000000000..e873bc332 --- /dev/null +++ b/integrations/ollama/tests/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 diff --git a/integrations/ollama/tests/test_generator.py b/integrations/ollama/tests/test_generator.py new file mode 100644 index 000000000..c2450a3ec --- /dev/null +++ b/integrations/ollama/tests/test_generator.py @@ -0,0 +1,99 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +import pytest +from requests import HTTPError + +from ollama_haystack import OllamaGenerator + + +class TestOllamaGenerator: + @pytest.mark.integration + def test_run_capital_cities(self): + prompts_and_answers = [ + ("What's the capital of France?", "Paris"), + ("What is the capital of Canada?", "Ottawa"), + ("What is the capital of Ghana?", "Accra"), + ] + + component = OllamaGenerator() + + for prompt, answer in prompts_and_answers: + results = component.run(prompt=prompt) + response = results["replies"][0] + + assert "replies" in results + assert "meta" in results + assert answer in response + + @pytest.mark.integration + def test_run_model_unavailable(self): + component = OllamaGenerator(model="Alistair_is_great") + + with pytest.raises(HTTPError): + component.run(prompt="Why is Alistair so great?") + + def test_init_default(self): + component = OllamaGenerator() + assert component.model == "orca-mini" + assert component.url == "http://localhost:11434/api/generate" + assert component.generation_kwargs == {} + assert component.system_prompt is None + assert component.template is None + assert component.raw is False + assert component.timeout == 30 + + def test_init(self): + component = OllamaGenerator( + model="llama2", + url="http://my-custom-endpoint:11434/api/generate", + generation_kwargs={"temperature": 0.5}, + system_prompt="You are Luigi from Super Mario Bros.", + timeout=5, + ) + + assert component.model == "llama2" + assert component.url == "http://my-custom-endpoint:11434/api/generate" + assert component.generation_kwargs == {"temperature": 0.5} + assert component.system_prompt == "You are Luigi from Super Mario Bros." + assert component.template is None + assert component.raw is False + assert component.timeout == 5 + + @pytest.mark.parametrize( + "configuration", + [ + { + "model": "some_model", + "url": "https://localhost:11434/api/generate", + "raw": True, + "system_prompt": "You are mario from Super Mario Bros.", + "template": None, + }, + { + "model": "some_model2", + "url": "https://localhost:11434/api/generate", + "raw": False, + "system_prompt": None, + "template": "some template", + }, + ], + ) + def test_create_json_payload(self, configuration): + prompt = "hello" + component = OllamaGenerator(**configuration) + + observed = component._create_json_payload(prompt=prompt) + + expected = { + "prompt": prompt, + "model": configuration["model"], + "stream": False, + "system": configuration["system_prompt"], + "raw": configuration["raw"], + "template": configuration["template"], + "options": {}, + } + + assert observed == expected