diff --git a/integrations/opensearch/pyproject.toml b/integrations/opensearch/pyproject.toml index cb63ee2f5..3edd544a2 100644 --- a/integrations/opensearch/pyproject.toml +++ b/integrations/opensearch/pyproject.toml @@ -33,6 +33,9 @@ Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/m Issues = "https://github.com/deepset-ai/haystack-core-integrations/issues" Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/opensearch" +[tool.hatch.build.targets.wheel] +packages = ["src/haystack_integrations"] + [tool.hatch.version] source = "vcs" tag-pattern = 'integrations\/opensearch-v(?P.*)' @@ -70,7 +73,7 @@ dependencies = [ "ruff>=0.0.243", ] [tool.hatch.envs.lint.scripts] -typing = "mypy --install-types --non-interactive {args:src/opensearch_haystack tests}" +typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}" style = [ "ruff {args:.}", "black --check --diff {args:.}", @@ -139,25 +142,22 @@ unfixable = [ ] [tool.ruff.isort] -known-first-party = ["opensearch_haystack"] +known-first-party = ["src"] [tool.ruff.flake8-tidy-imports] -ban-relative-imports = "all" +ban-relative-imports = "parents" [tool.ruff.per-file-ignores] # Tests can use magic values, assertions, and relative imports "tests/**/*" = ["PLR2004", "S101", "TID252"] [tool.coverage.run] -source_pkgs = ["opensearch_haystack", "tests"] +source_pkgs = ["src", "tests"] branch = true parallel = true -omit = [ - "src/opensearch_haystack/__about__.py", -] [tool.coverage.paths] -opensearch_haystack = ["src/opensearch_haystack", "*/opensearch-haystack/src/opensearch_haystack"] +opensearch_haystack = ["src/haystack_integrations", "*/opensearch-haystack/src"] tests = ["tests", "*/opensearch-haystack/tests"] [tool.coverage.report] @@ -177,6 +177,7 @@ markers = [ [[tool.mypy.overrides]] module = [ "haystack.*", + "haystack_integrations.*", "pytest.*", "opensearchpy.*", ] diff --git a/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/__init__.py b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/__init__.py new file mode 100644 index 000000000..63e0003df --- /dev/null +++ b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/__init__.py @@ -0,0 +1,7 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from .bm25_retriever import OpenSearchBM25Retriever +from .embedding_retriever import OpenSearchEmbeddingRetriever + +__all__ = ["OpenSearchBM25Retriever", "OpenSearchEmbeddingRetriever"] diff --git a/integrations/opensearch/src/opensearch_haystack/bm25_retriever.py b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/bm25_retriever.py similarity index 98% rename from integrations/opensearch/src/opensearch_haystack/bm25_retriever.py rename to integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/bm25_retriever.py index 34184dc19..d1b1e6ada 100644 --- a/integrations/opensearch/src/opensearch_haystack/bm25_retriever.py +++ b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/bm25_retriever.py @@ -5,8 +5,7 @@ from haystack import component, default_from_dict, default_to_dict from haystack.dataclasses import Document - -from opensearch_haystack.document_store import OpenSearchDocumentStore +from haystack_integrations.document_stores.opensearch import OpenSearchDocumentStore @component diff --git a/integrations/opensearch/src/opensearch_haystack/embedding_retriever.py b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/embedding_retriever.py similarity index 97% rename from integrations/opensearch/src/opensearch_haystack/embedding_retriever.py rename to integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/embedding_retriever.py index b6293fc52..15c8313ab 100644 --- a/integrations/opensearch/src/opensearch_haystack/embedding_retriever.py +++ b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/embedding_retriever.py @@ -5,8 +5,7 @@ from haystack import component, default_from_dict, default_to_dict from haystack.dataclasses import Document - -from opensearch_haystack.document_store import OpenSearchDocumentStore +from haystack_integrations.document_stores.opensearch import OpenSearchDocumentStore @component diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/__init__.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/__init__.py new file mode 100644 index 000000000..0384d7e5e --- /dev/null +++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/__init__.py @@ -0,0 +1,6 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from .document_store import OpenSearchDocumentStore + +__all__ = ["OpenSearchDocumentStore"] diff --git a/integrations/opensearch/src/opensearch_haystack/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py similarity index 96% rename from integrations/opensearch/src/opensearch_haystack/document_store.py rename to integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py index f6f60988b..dc6941854 100644 --- a/integrations/opensearch/src/opensearch_haystack/document_store.py +++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py @@ -7,13 +7,13 @@ import numpy as np from haystack import default_from_dict, default_to_dict from haystack.dataclasses import Document -from haystack.document_stores import DocumentStoreError, DuplicateDocumentError, DuplicatePolicy +from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError +from haystack.document_stores.types import DuplicatePolicy from haystack.utils.filters import convert +from haystack_integrations.document_stores.opensearch.filters import normalize_filters from opensearchpy import OpenSearch from opensearchpy.helpers import bulk -from opensearch_haystack.filters import _normalize_filters - logger = logging.getLogger(__name__) Hosts = Union[str, List[Union[str, Mapping[str, Union[str, int]]]]] @@ -127,7 +127,7 @@ def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Doc filters = convert(filters) if filters: - query = {"bool": {"filter": _normalize_filters(filters)}} + query = {"bool": {"filter": normalize_filters(filters)}} documents = self._search_documents(query=query, size=10_000) else: documents = self._search_documents(size=10_000) @@ -272,7 +272,7 @@ def _bm25_retrieval( } if filters: - body["query"]["bool"]["filter"] = _normalize_filters(filters) + body["query"]["bool"]["filter"] = normalize_filters(filters) documents = self._search_documents(**body) @@ -332,7 +332,7 @@ def _embedding_retrieval( } if filters: - body["query"]["bool"]["filter"] = _normalize_filters(filters) + body["query"]["bool"]["filter"] = normalize_filters(filters) docs = self._search_documents(**body) return docs diff --git a/integrations/opensearch/src/opensearch_haystack/filters.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/filters.py similarity index 99% rename from integrations/opensearch/src/opensearch_haystack/filters.py rename to integrations/opensearch/src/haystack_integrations/document_stores/opensearch/filters.py index 415304ec1..3aae3aacd 100644 --- a/integrations/opensearch/src/opensearch_haystack/filters.py +++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/filters.py @@ -8,7 +8,7 @@ from pandas import DataFrame -def _normalize_filters(filters: Dict[str, Any]) -> Dict[str, Any]: +def normalize_filters(filters: Dict[str, Any]) -> Dict[str, Any]: """ Converts Haystack filters in OpenSearch compatible filters. """ diff --git a/integrations/opensearch/src/opensearch_haystack/__init__.py b/integrations/opensearch/src/opensearch_haystack/__init__.py deleted file mode 100644 index a15411693..000000000 --- a/integrations/opensearch/src/opensearch_haystack/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-FileCopyrightText: 2023-present deepset GmbH -# -# SPDX-License-Identifier: Apache-2.0 -from opensearch_haystack.bm25_retriever import OpenSearchBM25Retriever -from opensearch_haystack.document_store import OpenSearchDocumentStore -from opensearch_haystack.embedding_retriever import OpenSearchEmbeddingRetriever - -__all__ = ["OpenSearchDocumentStore", "OpenSearchBM25Retriever", "OpenSearchEmbeddingRetriever"] diff --git a/integrations/opensearch/tests/test_bm25_retriever.py b/integrations/opensearch/tests/test_bm25_retriever.py index b043e38d4..3f84f41a9 100644 --- a/integrations/opensearch/tests/test_bm25_retriever.py +++ b/integrations/opensearch/tests/test_bm25_retriever.py @@ -4,9 +4,8 @@ from unittest.mock import Mock, patch from haystack.dataclasses import Document - -from opensearch_haystack.bm25_retriever import OpenSearchBM25Retriever -from opensearch_haystack.document_store import OpenSearchDocumentStore +from haystack_integrations.components.retrievers.opensearch import OpenSearchBM25Retriever +from haystack_integrations.document_stores.opensearch import OpenSearchDocumentStore def test_init_default(): @@ -18,20 +17,20 @@ def test_init_default(): assert not retriever._scale_score -@patch("opensearch_haystack.document_store.OpenSearch") +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") def test_to_dict(_mock_opensearch_client): document_store = OpenSearchDocumentStore(hosts="some fake host") retriever = OpenSearchBM25Retriever(document_store=document_store) res = retriever.to_dict() assert res == { - "type": "opensearch_haystack.bm25_retriever.OpenSearchBM25Retriever", + "type": "haystack_integrations.components.retrievers.opensearch.bm25_retriever.OpenSearchBM25Retriever", "init_parameters": { "document_store": { "init_parameters": { "hosts": "some fake host", "index": "default", }, - "type": "opensearch_haystack.document_store.OpenSearchDocumentStore", + "type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore", }, "filters": {}, "fuzziness": "AUTO", @@ -41,14 +40,14 @@ def test_to_dict(_mock_opensearch_client): } -@patch("opensearch_haystack.document_store.OpenSearch") +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") def test_from_dict(_mock_opensearch_client): data = { - "type": "opensearch_haystack.bm25_retriever.OpenSearchBM25Retriever", + "type": "haystack_integrations.components.retrievers.opensearch.bm25_retriever.OpenSearchBM25Retriever", "init_parameters": { "document_store": { "init_parameters": {"hosts": "some fake host", "index": "default"}, - "type": "opensearch_haystack.document_store.OpenSearchDocumentStore", + "type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore", }, "filters": {}, "fuzziness": "AUTO", diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py index b1e367745..e3a314141 100644 --- a/integrations/opensearch/tests/test_document_store.py +++ b/integrations/opensearch/tests/test_document_store.py @@ -8,12 +8,11 @@ import pytest from haystack.dataclasses.document import Document from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError -from haystack.document_stores.protocol import DuplicatePolicy +from haystack.document_stores.types import DuplicatePolicy from haystack.testing.document_store import DocumentStoreBaseTests +from haystack_integrations.document_stores.opensearch import OpenSearchDocumentStore from opensearchpy.exceptions import RequestError -from opensearch_haystack.document_store import OpenSearchDocumentStore - class TestDocumentStore(DocumentStoreBaseTests): """ @@ -88,22 +87,22 @@ def assert_documents_are_equal(self, received: List[Document], expected: List[Do super().assert_documents_are_equal(received, expected) - @patch("opensearch_haystack.document_store.OpenSearch") + @patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") def test_to_dict(self, _mock_opensearch_client): document_store = OpenSearchDocumentStore(hosts="some hosts") res = document_store.to_dict() assert res == { - "type": "opensearch_haystack.document_store.OpenSearchDocumentStore", + "type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore", "init_parameters": { "hosts": "some hosts", "index": "default", }, } - @patch("opensearch_haystack.document_store.OpenSearch") + @patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") def test_from_dict(self, _mock_opensearch_client): data = { - "type": "opensearch_haystack.document_store.OpenSearchDocumentStore", + "type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore", "init_parameters": { "hosts": "some hosts", "index": "default", diff --git a/integrations/opensearch/tests/test_embedding_retriever.py b/integrations/opensearch/tests/test_embedding_retriever.py index db360d757..0190ca208 100644 --- a/integrations/opensearch/tests/test_embedding_retriever.py +++ b/integrations/opensearch/tests/test_embedding_retriever.py @@ -4,9 +4,8 @@ from unittest.mock import Mock, patch from haystack.dataclasses import Document - -from opensearch_haystack.document_store import OpenSearchDocumentStore -from opensearch_haystack.embedding_retriever import OpenSearchEmbeddingRetriever +from haystack_integrations.components.retrievers.opensearch import OpenSearchEmbeddingRetriever +from haystack_integrations.document_stores.opensearch import OpenSearchDocumentStore def test_init_default(): @@ -17,20 +16,21 @@ def test_init_default(): assert retriever._top_k == 10 -@patch("opensearch_haystack.document_store.OpenSearch") +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") def test_to_dict(_mock_opensearch_client): document_store = OpenSearchDocumentStore(hosts="some fake host") retriever = OpenSearchEmbeddingRetriever(document_store=document_store) res = retriever.to_dict() + type_s = "haystack_integrations.components.retrievers.opensearch.embedding_retriever.OpenSearchEmbeddingRetriever" assert res == { - "type": "opensearch_haystack.embedding_retriever.OpenSearchEmbeddingRetriever", + "type": type_s, "init_parameters": { "document_store": { "init_parameters": { "hosts": "some fake host", "index": "default", }, - "type": "opensearch_haystack.document_store.OpenSearchDocumentStore", + "type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore", }, "filters": {}, "top_k": 10, @@ -38,14 +38,15 @@ def test_to_dict(_mock_opensearch_client): } -@patch("opensearch_haystack.document_store.OpenSearch") +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") def test_from_dict(_mock_opensearch_client): + type_s = "haystack_integrations.components.retrievers.opensearch.embedding_retriever.OpenSearchEmbeddingRetriever" data = { - "type": "opensearch_haystack.embedding_retriever.OpenSearchEmbeddingRetriever", + "type": type_s, "init_parameters": { "document_store": { "init_parameters": {"hosts": "some fake host", "index": "default"}, - "type": "opensearch_haystack.document_store.OpenSearchDocumentStore", + "type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore", }, "filters": {}, "top_k": 10, diff --git a/integrations/opensearch/tests/test_filters.py b/integrations/opensearch/tests/test_filters.py index 34a7682d5..d333dc584 100644 --- a/integrations/opensearch/tests/test_filters.py +++ b/integrations/opensearch/tests/test_filters.py @@ -3,8 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import pytest from haystack.errors import FilterError - -from opensearch_haystack.filters import _normalize_filters, _normalize_ranges +from haystack_integrations.document_stores.opensearch.filters import _normalize_ranges, normalize_filters filters_data = [ ( @@ -179,35 +178,35 @@ @pytest.mark.parametrize("filters, expected", filters_data) def test_normalize_filters(filters, expected): - result = _normalize_filters(filters) + result = normalize_filters(filters) assert result == expected def test_normalize_filters_invalid_operator(): with pytest.raises(FilterError): - _normalize_filters({"operator": "INVALID", "conditions": []}) + normalize_filters({"operator": "INVALID", "conditions": []}) def test_normalize_filters_malformed(): # Missing operator with pytest.raises(FilterError): - _normalize_filters({"conditions": []}) + normalize_filters({"conditions": []}) # Missing conditions with pytest.raises(FilterError): - _normalize_filters({"operator": "AND"}) + normalize_filters({"operator": "AND"}) # Missing comparison field with pytest.raises(FilterError): - _normalize_filters({"operator": "AND", "conditions": [{"operator": "==", "value": "article"}]}) + normalize_filters({"operator": "AND", "conditions": [{"operator": "==", "value": "article"}]}) # Missing comparison operator with pytest.raises(FilterError): - _normalize_filters({"operator": "AND", "conditions": [{"field": "meta.type", "operator": "=="}]}) + normalize_filters({"operator": "AND", "conditions": [{"field": "meta.type", "operator": "=="}]}) # Missing comparison value with pytest.raises(FilterError): - _normalize_filters({"operator": "AND", "conditions": [{"field": "meta.type", "value": "article"}]}) + normalize_filters({"operator": "AND", "conditions": [{"field": "meta.type", "value": "article"}]}) def test_normalize_ranges():