Skip to content

Commit

Permalink
Merge branch 'tuana/mistral' of https://github.com/deepset-ai/haystac…
Browse files Browse the repository at this point in the history
…k-core-integrations into tuana/mistral
  • Loading branch information
TuanaCelik committed Feb 15, 2024
2 parents 08845fa + c21180f commit d59c233
Show file tree
Hide file tree
Showing 78 changed files with 2,975 additions and 400 deletions.
5 changes: 5 additions & 0 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ integration:mistral:
- any-glob-to-any-file: "integrations/mistral/**/*"
- any-glob-to-any-file: ".github/workflows/mistral.yml"

integration:mongodb-atlas:
- changed-files:
- any-glob-to-any-file: "integrations/mongodb_atlas/**/*"
- any-glob-to-any-file: ".github/workflows/mongodb_atlas.yml"

integration:ollama:
- changed-files:
- any-glob-to-any-file: "integrations/ollama/**/*"
Expand Down
58 changes: 58 additions & 0 deletions .github/workflows/mongodb_atlas.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# This workflow comes from https://github.com/ofek/hatch-mypyc
# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml
name: Test / mongodb_atlas

on:
schedule:
- cron: "0 0 * * *"
pull_request:
paths:
- "integrations/mongodb_atlas/**"
- ".github/workflows/mongodb_atlas.yml"

defaults:
run:
working-directory: integrations/mongodb_atlas

concurrency:
group: mongodb-atlas-${{ github.head_ref }}
cancel-in-progress: true

env:
PYTHONUNBUFFERED: "1"
FORCE_COLOR: "1"
MONGO_CONNECTION_STRING: ${{ secrets.MONGO_CONNECTION_STRING }}

jobs:
run:
name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: ['3.9', '3.10', '3.11']

steps:
- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install Hatch
run: pip install --upgrade hatch

- name: Lint
working-directory: integrations/mongodb_atlas
if: matrix.python-version == '3.9' && runner.os == 'Linux'
run: hatch run lint:all

- name: Generate docs
if: matrix.python-version == '3.9' && runner.os == 'Linux'
run: hatch run docs

- name: Run tests
working-directory: integrations/mongodb_atlas
run: hatch run cov
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ deepset-haystack
| [instructor-embedders-haystack](integrations/instructor_embedders/) | Embedder | [![PyPI - Version](https://img.shields.io/pypi/v/instructor-embedders-haystack.svg)](https://pypi.org/project/instructor-embedders-haystack) | [![Test / instructor-embedders](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/instructor_embedders.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/instructor_embedders.yml) |
| [jina-haystack](integrations/jina/) | Embedder | [![PyPI - Version](https://img.shields.io/pypi/v/jina-haystack.svg)](https://pypi.org/project/jina-haystack) | [![Test / jina](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/jina.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/jina.yml) |
| [llama-cpp-haystack](integrations/llama_cpp/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/ollama-haystack.svg?color=orange)](https://pypi.org/project/llama-cpp-haystack) | [![Test / llama-cpp](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/llama_cpp.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/llama_cpp.yml) |
| [mongodb-atlas-haystack](integrations/mongodb_atlas/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/mongodb-atlas-haystack.svg?color=orange)](https://pypi.org/project/mongodb-atlas-haystack) | [![Test / mongodb-atlas](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/mongodb_atlas.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/mongodb_atlas.yml) |
| [ollama-haystack](integrations/ollama/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/ollama-haystack.svg?color=orange)](https://pypi.org/project/ollama-haystack) | [![Test / ollama](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/ollama.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/ollama.yml) |
| [opensearch-haystack](integrations/opensearch/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/opensearch-haystack.svg)](https://pypi.org/project/opensearch-haystack) | [![Test / opensearch](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/opensearch.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/opensearch.yml) |
| [pinecone-haystack](integrations/pinecone/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/pinecone-haystack.svg?color=orange)](https://pypi.org/project/pinecone-haystack) | [![Test / pinecone](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/pinecone.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/pinecone.yml) |
Expand Down
4 changes: 2 additions & 2 deletions integrations/astra/examples/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
p.add_component(instance=DocumentCleaner(), name="cleaner")
p.add_component(instance=DocumentSplitter(split_by="word", split_length=150, split_overlap=30), name="splitter")
p.add_component(
instance=SentenceTransformersDocumentEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"),
instance=SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"),
name="embedder",
)
p.add_component(instance=DocumentWriter(document_store=document_store, policy=DuplicatePolicy.SKIP), name="writer")
Expand All @@ -63,7 +63,7 @@
# Create a querying pipeline on the indexed data
q = Pipeline()
q.add_component(
instance=SentenceTransformersTextEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"),
instance=SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"),
name="embedder",
)
q.add_component("retriever", AstraEmbeddingRetriever(document_store))
Expand Down
4 changes: 2 additions & 2 deletions integrations/astra/examples/pipeline_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
]
p = Pipeline()
p.add_component(
instance=SentenceTransformersDocumentEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"),
instance=SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"),
name="embedder",
)
p.add_component(instance=DocumentWriter(document_store=document_store, policy=DuplicatePolicy.SKIP), name="writer")
Expand All @@ -74,7 +74,7 @@
# Construct rag pipeline
rag_pipeline = Pipeline()
rag_pipeline.add_component(
instance=SentenceTransformersTextEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"),
instance=SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"),
name="embedder",
)
rag_pipeline.add_component(instance=AstraEmbeddingRetriever(document_store=document_store), name="retriever")
Expand Down
5 changes: 1 addition & 4 deletions integrations/cohere/tests/test_cohere_chat_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,10 +274,7 @@ def test_live_run(self):
@pytest.mark.integration
def test_live_run_wrong_model(self, chat_messages):
component = CohereChatGenerator(model="something-obviously-wrong")
with pytest.raises(
cohere.CohereAPIError,
match="model not found, make sure the correct model ID was used and that you have access to the model.",
):
with pytest.raises(cohere.CohereAPIError):
component.run(chat_messages)

@pytest.mark.skipif(
Expand Down
5 changes: 1 addition & 4 deletions integrations/cohere/tests/test_cohere_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,7 @@ def test_cohere_generator_run_wrong_model(self):
import cohere

component = CohereGenerator(model="something-obviously-wrong")
with pytest.raises(
cohere.CohereAPIError,
match="model not found, make sure the correct model ID was used and that you have access to the model.",
):
with pytest.raises(cohere.CohereAPIError):
component.run(prompt="What's the capital of France?")

@pytest.mark.skipif(
Expand Down
4 changes: 2 additions & 2 deletions integrations/cohere/tests/test_document_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,9 @@ def test_run(self):
def test_run_wrong_input_format(self):
embedder = CohereDocumentEmbedder(api_key=Secret.from_token("test-api-key"))

with pytest.raises(TypeError, match="CohereDocumentEmbedder expects a list of Documents as input"):
with pytest.raises(TypeError):
embedder.run(documents="text")
with pytest.raises(TypeError, match="CohereDocumentEmbedder expects a list of Documents as input"):
with pytest.raises(TypeError):
embedder.run(documents=[1, 2, 3])

assert embedder.run(documents=[]) == {"documents": [], "meta": {}}
2 changes: 1 addition & 1 deletion integrations/cohere/tests/test_text_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def test_run_wrong_input_format(self):
embedder = CohereTextEmbedder(api_key=Secret.from_token("test-api-key"))
list_integers_input = ["text_snippet_1", "text_snippet_2"]

with pytest.raises(TypeError, match="CohereTextEmbedder expects a string as input"):
with pytest.raises(TypeError):
embedder.run(text=list_integers_input)

@pytest.mark.skipif(
Expand Down
33 changes: 33 additions & 0 deletions integrations/fastembed/example/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from haystack import Document, Pipeline
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack_integrations.components.embedders.fastembed import FastembedDocumentEmbedder, FastembedTextEmbedder

document_store = InMemoryDocumentStore(embedding_similarity_function="cosine")

documents = [
Document(content="My name is Wolfgang and I live in Berlin"),
Document(content="I saw a black horse running"),
Document(content="Germany has many big cities"),
Document(content="fastembed is supported by and maintained by Qdrant."),
]

document_embedder = FastembedDocumentEmbedder()
document_embedder.warm_up()
documents_with_embeddings = document_embedder.run(documents)["documents"]
document_store.write_documents(documents_with_embeddings)

query_pipeline = Pipeline()
query_pipeline.add_component("text_embedder", FastembedTextEmbedder())
query_pipeline.add_component("retriever", InMemoryEmbeddingRetriever(document_store=document_store))
query_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")

query = "Who supports fastembed?"

result = query_pipeline.run({"text_embedder": {"text": query}})

print(result["retriever"]["documents"][0]) # noqa: T201

# Document(id=...,
# content: 'fastembed is supported by and maintained by Qdrant.',
# score: 0.758..)
3 changes: 3 additions & 0 deletions integrations/google_ai/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ ban-relative-imports = "parents"
"tests/**/*" = ["PLR2004", "S101", "TID252"]

[tool.coverage.run]
source = ["haystack_integrations"]
branch = true
parallel = true

Expand All @@ -160,6 +161,8 @@ google_ai_haystack = ["src"]
tests = ["tests"]

[tool.coverage.report]
omit = ["*/tests/*", "*/__init__.py"]
show_missing=true
exclude_lines = [
"no cov",
"if __name__ == .__main__.:",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from haystack.core.serialization import default_from_dict, default_to_dict
from haystack.dataclasses.byte_stream import ByteStream
from haystack.dataclasses.chat_message import ChatMessage, ChatRole
from haystack.utils import Secret, deserialize_secrets_inplace

logger = logging.getLogger(__name__)

Expand All @@ -20,11 +21,12 @@ class GoogleAIGeminiChatGenerator:
Sample usage:
```python
from haystack.utils import Secret
from haystack.dataclasses.chat_message import ChatMessage
from haystack_integrations.components.generators.google_ai import GoogleAIGeminiChatGenerator
gemini_chat = GoogleAIGeminiChatGenerator(model="gemini-pro", api_key="<MY_API_KEY>")
gemini_chat = GoogleAIGeminiChatGenerator(model="gemini-pro", api_key=Secret.from_token("<MY_API_KEY>"))
messages = [ChatMessage.from_user("What is the most interesting thing you know?")]
res = gemini_chat.run(messages=messages)
Expand All @@ -40,6 +42,7 @@ class GoogleAIGeminiChatGenerator:
This is a more advanced usage that also uses function calls:
```python
from haystack.utils import Secret
from haystack.dataclasses.chat_message import ChatMessage
from google.ai.generativelanguage import FunctionDeclaration, Tool
Expand Down Expand Up @@ -73,7 +76,8 @@ def get_current_weather(location: str, unit: str = "celsius") -> str:
messages = [ChatMessage.from_user("What is the most interesting thing you know?")]
gemini_chat = GoogleAIGeminiChatGenerator(model="gemini-pro", api_key="<MY_API_KEY>", tools=[tool])
gemini_chat = GoogleAIGeminiChatGenerator(model="gemini-pro", api_key=Secret.from_token("<MY_API_KEY>"),
tools=[tool])
messages = [ChatMessage.from_user(content = "What is the temperature in celsius in Berlin?")]
res = gemini_chat.run(messages=messages)
Expand All @@ -95,15 +99,14 @@ def get_current_weather(location: str, unit: str = "celsius") -> str:
def __init__(
self,
*,
api_key: Optional[str] = None,
api_key: Secret = Secret.from_env_var("GOOGLE_API_KEY"), # noqa: B008
model: str = "gemini-pro-vision",
generation_config: Optional[Union[GenerationConfig, Dict[str, Any]]] = None,
safety_settings: Optional[Dict[HarmCategory, HarmBlockThreshold]] = None,
tools: Optional[List[Tool]] = None,
):
"""
Initialize a GoogleAIGeminiChatGenerator instance.
If `api_key` is `None` it will use the `GOOGLE_API_KEY` env variable for authentication.
To get an API key, visit: https://makersuite.google.com
Expand All @@ -112,7 +115,7 @@ def __init__(
* `gemini-pro-vision`
* `gemini-ultra`
:param api_key: Google Makersuite API key, defaults to None
:param api_key: Google Makersuite API key.
:param model: Name of the model to use, defaults to "gemini-pro-vision"
:param generation_config: The generation config to use, defaults to None.
Can either be a GenerationConfig object or a dictionary of parameters.
Expand All @@ -130,8 +133,9 @@ def __init__(
"""

# Authenticate, if api_key is None it will use the GOOGLE_API_KEY env variable
genai.configure(api_key=api_key)
genai.configure(api_key=api_key.resolve_value())

self._api_key = api_key
self._model_name = model
self._generation_config = generation_config
self._safety_settings = safety_settings
Expand All @@ -153,6 +157,7 @@ def _generation_config_to_dict(self, config: Union[GenerationConfig, Dict[str, A
def to_dict(self) -> Dict[str, Any]:
data = default_to_dict(
self,
api_key=self._api_key.to_dict(),
model=self._model_name,
generation_config=self._generation_config,
safety_settings=self._safety_settings,
Expand All @@ -168,6 +173,8 @@ def to_dict(self) -> Dict[str, Any]:

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "GoogleAIGeminiChatGenerator":
deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])

if (tools := data["init_parameters"].get("tools")) is not None:
data["init_parameters"]["tools"] = [Tool.deserialize(t) for t in tools]
if (generation_config := data["init_parameters"].get("generation_config")) is not None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from haystack.core.component.types import Variadic
from haystack.core.serialization import default_from_dict, default_to_dict
from haystack.dataclasses.byte_stream import ByteStream
from haystack.utils import Secret, deserialize_secrets_inplace

logger = logging.getLogger(__name__)

Expand All @@ -20,9 +21,10 @@ class GoogleAIGeminiGenerator:
Sample usage:
```python
from haystack.utils import Secret
from haystack_integrations.components.generators.google_ai import GoogleAIGeminiGenerator
gemini = GoogleAIGeminiGenerator(model="gemini-pro", api_key="<MY_API_KEY>")
gemini = GoogleAIGeminiGenerator(model="gemini-pro", api_key=Secret.from_token("<MY_API_KEY>"))
res = gemini.run(parts = ["What is the most interesting thing you know?"])
for answer in res["answers"]:
print(answer)
Expand All @@ -31,6 +33,7 @@ class GoogleAIGeminiGenerator:
This is a more advanced usage that also uses text and images as input:
```python
import requests
from haystack.utils import Secret
from haystack.dataclasses.byte_stream import ByteStream
from haystack_integrations.components.generators.google_ai import GoogleAIGeminiGenerator
Expand All @@ -50,7 +53,7 @@ class GoogleAIGeminiGenerator:
for url in URLS
]
gemini = GoogleAIGeminiGenerator(model="gemini-pro-vision", api_key="<MY_API_KEY>")
gemini = GoogleAIGeminiGenerator(model="gemini-pro-vision", api_key=Secret.from_token("<MY_API_KEY>"))
result = gemini.run(parts = ["What can you tell me about this robots?", *images])
for answer in result["answers"]:
print(answer)
Expand All @@ -66,15 +69,14 @@ class GoogleAIGeminiGenerator:
def __init__(
self,
*,
api_key: Optional[str] = None,
api_key: Secret = Secret.from_env_var("GOOGLE_API_KEY"), # noqa: B008
model: str = "gemini-pro-vision",
generation_config: Optional[Union[GenerationConfig, Dict[str, Any]]] = None,
safety_settings: Optional[Dict[HarmCategory, HarmBlockThreshold]] = None,
tools: Optional[List[Tool]] = None,
):
"""
Initialize a GoogleAIGeminiGenerator instance.
If `api_key` is `None` it will use the `GOOGLE_API_KEY` env variable for authentication.
To get an API key, visit: https://makersuite.google.com
Expand All @@ -83,7 +85,7 @@ def __init__(
* `gemini-pro-vision`
* `gemini-ultra`
:param api_key: Google Makersuite API key, defaults to None
:param api_key: Google Makersuite API key.
:param model: Name of the model to use, defaults to "gemini-pro-vision"
:param generation_config: The generation config to use, defaults to None.
Can either be a GenerationConfig object or a dictionary of parameters.
Expand All @@ -99,9 +101,9 @@ def __init__(
:param tools: The tools to use, defaults to None.
A list of Tool objects that can be used to modify the generation process.
"""
# Authenticate, if api_key is None it will use the GOOGLE_API_KEY env variable
genai.configure(api_key=api_key)
genai.configure(api_key=api_key.resolve_value())

self._api_key = api_key
self._model_name = model
self._generation_config = generation_config
self._safety_settings = safety_settings
Expand All @@ -123,6 +125,7 @@ def _generation_config_to_dict(self, config: Union[GenerationConfig, Dict[str, A
def to_dict(self) -> Dict[str, Any]:
data = default_to_dict(
self,
api_key=self._api_key.to_dict(),
model=self._model_name,
generation_config=self._generation_config,
safety_settings=self._safety_settings,
Expand All @@ -138,6 +141,8 @@ def to_dict(self) -> Dict[str, Any]:

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "GoogleAIGeminiGenerator":
deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])

if (tools := data["init_parameters"].get("tools")) is not None:
data["init_parameters"]["tools"] = [Tool.deserialize(t) for t in tools]
if (generation_config := data["init_parameters"].get("generation_config")) is not None:
Expand Down
Loading

0 comments on commit d59c233

Please sign in to comment.