diff --git a/integrations/pinecone/pyproject.toml b/integrations/pinecone/pyproject.toml index 5ada5669e..2d73cdf58 100644 --- a/integrations/pinecone/pyproject.toml +++ b/integrations/pinecone/pyproject.toml @@ -34,6 +34,9 @@ Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/m Issues = "https://github.com/deepset-ai/haystack-core-integrations/issues" Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/pinecone" +[tool.hatch.build.targets.wheel] +packages = ["src/haystack_integrations"] + [tool.hatch.version] source = "vcs" tag-pattern = 'integrations\/pinecone-v(?P.*)' @@ -74,7 +77,7 @@ dependencies = [ "numpy", ] [tool.hatch.envs.lint.scripts] -typing = "mypy --install-types --non-interactive {args:src/pinecone_haystack tests}" +typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}" style = [ "ruff {args:.}", "black --check --diff {args:.}", @@ -143,26 +146,26 @@ unfixable = [ ] [tool.ruff.isort] -known-first-party = ["pinecone_haystack"] +known-first-party = ["haystack_integrations"] [tool.ruff.flake8-tidy-imports] -ban-relative-imports = "all" +ban-relative-imports = "parents" [tool.ruff.per-file-ignores] # Tests can use magic values, assertions, and relative imports "tests/**/*" = ["PLR2004", "S101", "TID252"] [tool.coverage.run] -source_pkgs = ["pinecone_haystack", "tests"] +source_pkgs = ["src", "tests"] branch = true parallel = true omit = [ - "example" + "examples" ] [tool.coverage.paths] -pinecone_haystack = ["src/pinecone_haystack", "*/pinecone_haystack/src/pinecone_haystack"] -tests = ["tests", "*/pinecone_haystack/tests"] +pinecone_haystack = ["src/*"] +tests = ["tests"] [tool.coverage.report] exclude_lines = [ @@ -182,6 +185,7 @@ markers = [ module = [ "pinecone.*", "haystack.*", + "haystack_integrations.*", "pytest.*" ] ignore_missing_imports = true diff --git a/integrations/pinecone/src/haystack_integrations/components/retrievers/pinecone/__init__.py b/integrations/pinecone/src/haystack_integrations/components/retrievers/pinecone/__init__.py new file mode 100644 index 000000000..d73d799d4 --- /dev/null +++ b/integrations/pinecone/src/haystack_integrations/components/retrievers/pinecone/__init__.py @@ -0,0 +1,3 @@ +from .dense_retriever import PineconeDenseRetriever + +__all__ = ["PineconeDenseRetriever"] diff --git a/integrations/pinecone/src/pinecone_haystack/dense_retriever.py b/integrations/pinecone/src/haystack_integrations/components/retrievers/pinecone/dense_retriever.py similarity index 96% rename from integrations/pinecone/src/pinecone_haystack/dense_retriever.py rename to integrations/pinecone/src/haystack_integrations/components/retrievers/pinecone/dense_retriever.py index 3f60f252b..279ef4977 100644 --- a/integrations/pinecone/src/pinecone_haystack/dense_retriever.py +++ b/integrations/pinecone/src/haystack_integrations/components/retrievers/pinecone/dense_retriever.py @@ -6,7 +6,7 @@ from haystack import component, default_from_dict, default_to_dict from haystack.dataclasses import Document -from pinecone_haystack.document_store import PineconeDocumentStore +from haystack_integrations.document_stores.pinecone import PineconeDocumentStore @component diff --git a/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/__init__.py b/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/__init__.py new file mode 100644 index 000000000..159a85fae --- /dev/null +++ b/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/__init__.py @@ -0,0 +1,6 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from .document_store import PineconeDocumentStore + +__all__ = ["PineconeDocumentStore"] diff --git a/integrations/pinecone/src/pinecone_haystack/document_store.py b/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/document_store.py similarity index 99% rename from integrations/pinecone/src/pinecone_haystack/document_store.py rename to integrations/pinecone/src/haystack_integrations/document_stores/pinecone/document_store.py index 8fe579611..a755b7e47 100644 --- a/integrations/pinecone/src/pinecone_haystack/document_store.py +++ b/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/document_store.py @@ -8,13 +8,14 @@ from typing import Any, Dict, List, Optional import pandas as pd -import pinecone from haystack import default_to_dict from haystack.dataclasses import Document from haystack.document_stores.types import DuplicatePolicy from haystack.utils.filters import convert -from pinecone_haystack.filters import _normalize_filters +import pinecone + +from .filters import _normalize_filters logger = logging.getLogger(__name__) diff --git a/integrations/pinecone/src/pinecone_haystack/errors.py b/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/errors.py similarity index 100% rename from integrations/pinecone/src/pinecone_haystack/errors.py rename to integrations/pinecone/src/haystack_integrations/document_stores/pinecone/errors.py diff --git a/integrations/pinecone/src/pinecone_haystack/filters.py b/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/filters.py similarity index 100% rename from integrations/pinecone/src/pinecone_haystack/filters.py rename to integrations/pinecone/src/haystack_integrations/document_stores/pinecone/filters.py diff --git a/integrations/pinecone/src/pinecone_haystack/__init__.py b/integrations/pinecone/src/pinecone_haystack/__init__.py deleted file mode 100644 index e3ec258d2..000000000 --- a/integrations/pinecone/src/pinecone_haystack/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# SPDX-FileCopyrightText: 2023-present deepset GmbH -# -# SPDX-License-Identifier: Apache-2.0 -from pinecone_haystack.dense_retriever import PineconeDenseRetriever -from pinecone_haystack.document_store import PineconeDocumentStore - -__all__ = ["PineconeDocumentStore", "PineconeDenseRetriever"] diff --git a/integrations/pinecone/tests/conftest.py b/integrations/pinecone/tests/conftest.py index 3ae642ae7..c7a1342d5 100644 --- a/integrations/pinecone/tests/conftest.py +++ b/integrations/pinecone/tests/conftest.py @@ -3,7 +3,7 @@ import pytest from haystack.document_stores.types import DuplicatePolicy -from pinecone_haystack.document_store import PineconeDocumentStore +from haystack_integrations.document_stores.pinecone import PineconeDocumentStore # This is the approximate time it takes for the documents to be available SLEEP_TIME = 20 diff --git a/integrations/pinecone/tests/test_dense_retriever.py b/integrations/pinecone/tests/test_dense_retriever.py index ceb73b687..e0f6dc375 100644 --- a/integrations/pinecone/tests/test_dense_retriever.py +++ b/integrations/pinecone/tests/test_dense_retriever.py @@ -5,8 +5,8 @@ from haystack.dataclasses import Document -from pinecone_haystack.dense_retriever import PineconeDenseRetriever -from pinecone_haystack.document_store import PineconeDocumentStore +from haystack_integrations.components.retrievers.pinecone import PineconeDenseRetriever +from haystack_integrations.document_stores.pinecone import PineconeDocumentStore def test_init_default(): @@ -17,7 +17,7 @@ def test_init_default(): assert retriever.top_k == 10 -@patch("pinecone_haystack.document_store.pinecone") +@patch("haystack_integrations.document_stores.pinecone.document_store.pinecone") def test_to_dict(mock_pinecone): mock_pinecone.Index.return_value.describe_index_stats.return_value = {"dimension": 512} document_store = PineconeDocumentStore( @@ -31,7 +31,7 @@ def test_to_dict(mock_pinecone): retriever = PineconeDenseRetriever(document_store=document_store) res = retriever.to_dict() assert res == { - "type": "pinecone_haystack.dense_retriever.PineconeDenseRetriever", + "type": "haystack_integrations.components.retrievers.pinecone.dense_retriever.PineconeDenseRetriever", "init_parameters": { "document_store": { "init_parameters": { @@ -41,7 +41,7 @@ def test_to_dict(mock_pinecone): "batch_size": 50, "dimension": 512, }, - "type": "pinecone_haystack.document_store.PineconeDocumentStore", + "type": "haystack_integrations.document_stores.pinecone.document_store.PineconeDocumentStore", }, "filters": {}, "top_k": 10, @@ -49,10 +49,10 @@ def test_to_dict(mock_pinecone): } -@patch("pinecone_haystack.document_store.pinecone") +@patch("haystack_integrations.document_stores.pinecone.document_store.pinecone") def test_from_dict(mock_pinecone, monkeypatch): data = { - "type": "pinecone_haystack.dense_retriever.PineconeDenseRetriever", + "type": "haystack_integrations.components.retrievers.pinecone.dense_retriever.PineconeDenseRetriever", "init_parameters": { "document_store": { "init_parameters": { @@ -62,7 +62,7 @@ def test_from_dict(mock_pinecone, monkeypatch): "batch_size": 50, "dimension": 512, }, - "type": "pinecone_haystack.document_store.PineconeDocumentStore", + "type": "haystack_integrations.document_stores.pinecone.document_store.PineconeDocumentStore", }, "filters": {}, "top_k": 10, diff --git a/integrations/pinecone/tests/test_document_store.py b/integrations/pinecone/tests/test_document_store.py index 5c9b32698..a856cde86 100644 --- a/integrations/pinecone/tests/test_document_store.py +++ b/integrations/pinecone/tests/test_document_store.py @@ -5,9 +5,75 @@ from haystack import Document from haystack.testing.document_store import CountDocumentsTest, DeleteDocumentsTest, WriteDocumentsTest -from pinecone_haystack.document_store import PineconeDocumentStore - - +from haystack_integrations.document_stores.pinecone import PineconeDocumentStore + + +@patch("haystack_integrations.document_stores.pinecone.document_store.pinecone") +def test_init(mock_pinecone): + mock_pinecone.Index.return_value.describe_index_stats.return_value = {"dimension": 30} + + document_store = PineconeDocumentStore( + api_key="fake-api-key", + environment="gcp-starter", + index="my_index", + namespace="test", + batch_size=50, + dimension=30, + metric="euclidean", + ) + + mock_pinecone.init.assert_called_with(api_key="fake-api-key", environment="gcp-starter") + + assert document_store.environment == "gcp-starter" + assert document_store.index == "my_index" + assert document_store.namespace == "test" + assert document_store.batch_size == 50 + assert document_store.dimension == 30 + assert document_store.index_creation_kwargs == {"metric": "euclidean"} + + +@patch("haystack_integrations.document_stores.pinecone.document_store.pinecone") +def test_init_api_key_in_environment_variable(mock_pinecone, monkeypatch): + monkeypatch.setenv("PINECONE_API_KEY", "fake-api-key") + + PineconeDocumentStore( + environment="gcp-starter", + index="my_index", + namespace="test", + batch_size=50, + dimension=30, + metric="euclidean", + ) + + mock_pinecone.init.assert_called_with(api_key="fake-api-key", environment="gcp-starter") + + +@patch("haystack_integrations.document_stores.pinecone.document_store.pinecone") +def test_to_dict(mock_pinecone): + mock_pinecone.Index.return_value.describe_index_stats.return_value = {"dimension": 30} + document_store = PineconeDocumentStore( + api_key="fake-api-key", + environment="gcp-starter", + index="my_index", + namespace="test", + batch_size=50, + dimension=30, + metric="euclidean", + ) + assert document_store.to_dict() == { + "type": "haystack_integrations.document_stores.pinecone.document_store.PineconeDocumentStore", + "init_parameters": { + "environment": "gcp-starter", + "index": "my_index", + "dimension": 30, + "namespace": "test", + "batch_size": 50, + "metric": "euclidean", + }, + } + + +@pytest.mark.integration class TestDocumentStore(CountDocumentsTest, DeleteDocumentsTest, WriteDocumentsTest): def test_write_documents(self, document_store: PineconeDocumentStore): docs = [Document(id="1")] @@ -21,44 +87,6 @@ def test_write_documents_duplicate_fail(self, document_store: PineconeDocumentSt def test_write_documents_duplicate_skip(self, document_store: PineconeDocumentStore): ... - @patch("pinecone_haystack.document_store.pinecone") - def test_init(self, mock_pinecone): - mock_pinecone.Index.return_value.describe_index_stats.return_value = {"dimension": 30} - - document_store = PineconeDocumentStore( - api_key="fake-api-key", - environment="gcp-starter", - index="my_index", - namespace="test", - batch_size=50, - dimension=30, - metric="euclidean", - ) - - mock_pinecone.init.assert_called_with(api_key="fake-api-key", environment="gcp-starter") - - assert document_store.environment == "gcp-starter" - assert document_store.index == "my_index" - assert document_store.namespace == "test" - assert document_store.batch_size == 50 - assert document_store.dimension == 30 - assert document_store.index_creation_kwargs == {"metric": "euclidean"} - - @patch("pinecone_haystack.document_store.pinecone") - def test_init_api_key_in_environment_variable(self, mock_pinecone, monkeypatch): - monkeypatch.setenv("PINECONE_API_KEY", "fake-api-key") - - PineconeDocumentStore( - environment="gcp-starter", - index="my_index", - namespace="test", - batch_size=50, - dimension=30, - metric="euclidean", - ) - - mock_pinecone.init.assert_called_with(api_key="fake-api-key", environment="gcp-starter") - def test_init_fails_wo_api_key(self, monkeypatch): api_key = None monkeypatch.delenv("PINECONE_API_KEY", raising=False) @@ -69,30 +97,6 @@ def test_init_fails_wo_api_key(self, monkeypatch): index="my_index", ) - @patch("pinecone_haystack.document_store.pinecone") - def test_to_dict(self, mock_pinecone): - mock_pinecone.Index.return_value.describe_index_stats.return_value = {"dimension": 30} - document_store = PineconeDocumentStore( - api_key="fake-api-key", - environment="gcp-starter", - index="my_index", - namespace="test", - batch_size=50, - dimension=30, - metric="euclidean", - ) - assert document_store.to_dict() == { - "type": "pinecone_haystack.document_store.PineconeDocumentStore", - "init_parameters": { - "environment": "gcp-starter", - "index": "my_index", - "dimension": 30, - "namespace": "test", - "batch_size": 50, - "metric": "euclidean", - }, - } - def test_embedding_retrieval(self, document_store: PineconeDocumentStore): query_embedding = [0.1] * 768 most_similar_embedding = [0.8] * 768 diff --git a/integrations/pinecone/tests/test_filters.py b/integrations/pinecone/tests/test_filters.py index 1e6aeb0cd..a38482a26 100644 --- a/integrations/pinecone/tests/test_filters.py +++ b/integrations/pinecone/tests/test_filters.py @@ -7,6 +7,7 @@ ) +@pytest.mark.integration class TestFilters(FilterDocumentsTest): def assert_documents_are_equal(self, received: List[Document], expected: List[Document]): for doc in received: