Merge branch 'main' into feat-streaming-vertexai-chat-gen

deepset-ai · Aug 27, 2024 · eaa76f4 · eaa76f4
2 parents ad55d37 + ee08a47
commit eaa76f4
Show file tree

Hide file tree

Showing 13 changed files with 221 additions and 74 deletions.
diff --git a/.github/utils/pyproject_to_requirements.py b/.github/utils/pyproject_to_requirements.py
@@ -0,0 +1,26 @@
+import argparse
+import sys
+from pathlib import Path
+import toml
+
+def main(pyproject_path: Path, exclude_optional_dependencies: bool = False):
+    content = toml.load(pyproject_path)
+    deps = set(content["project"]["dependencies"])
+
+    if not exclude_optional_dependencies:
+        optional_deps = content["project"].get("optional-dependencies", {})
+        for dep_list in optional_deps.values():
+            deps.update(dep_list)
+
+    print("\n".join(sorted(deps)))
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        prog="pyproject_to_requirements.py",
+        description="Convert pyproject.toml to requirements.txt"
+    )
+    parser.add_argument("pyproject_path", type=Path, help="Path to pyproject.toml file")
+    parser.add_argument("--exclude-optional-dependencies", action="store_true", help="Exclude optional dependencies")
+
+    args = parser.parse_args()
+    main(args.pyproject_path, args.exclude_optional_dependencies)
diff --git a/.github/workflows/CI_license_compliance.yml b/.github/workflows/CI_license_compliance.yml
@@ -0,0 +1,95 @@
+name: Core / License Compliance
+
+on:
+  pull_request:
+    paths:
+    - "integrations/**/pyproject.toml"
+  # Since we test PRs, there is no need to run the workflow at each
+  # merge on `main`. Let's use a cron job instead.
+  schedule:
+    - cron: "0 0 * * *" # every day at midnight
+
+env:
+  CORE_DATADOG_API_KEY: ${{ secrets.CORE_DATADOG_API_KEY }}
+  PYTHON_VERSION: "3.10"
+  EXCLUDE_PACKAGES: "(?i)^(deepeval|cohere|fastembed|ragas|tqdm|psycopg).*"
+
+  # Exclusions must be explicitly motivated
+  #
+  # - deepeval is Apache 2.0 but the license is not available on PyPI
+  # - cohere is MIT but the license is not available on PyPI
+  # - fastembed is Apache 2.0 but the license on PyPI is unclear ("Other/Proprietary License (Apache License)")
+  # - ragas is Apache 2.0 but the license is not available on PyPI
+
+  # - tqdm is MLP but there are no better alternatives
+  # - psycopg is LGPL-3.0 but FOSSA is fine with it
+
+jobs:
+  license_check_direct:
+    name: Direct dependencies only
+    env:
+      REQUIREMENTS_FILE: requirements_direct.txt
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout the code
+        uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "${{ env.PYTHON_VERSION }}"
+
+      - name: Get changed pyproject files (for pull requests only)
+        if: ${{ github.event_name == 'pull_request' }}
+        id: changed-files
+        uses: tj-actions/changed-files@v45
+        with:
+          files: |
+            integrations/**/pyproject.toml       
+
+      - name: Get direct dependencies from pyproject.toml files
+        run: |
+          pip install toml
+
+          # Determine the list of pyproject.toml files to process
+          if [ "${{ github.event_name }}" = "schedule" ]; then
+            echo "Scheduled run: processing all pyproject.toml files..."
+            FILES=$(find integrations -type f -name 'pyproject.toml')
+          else
+            echo "Pull request: processing changed pyproject.toml files..."
+            FILES="${{ steps.changed-files.outputs.all_changed_files }}"
+          fi
+
+          for file in $FILES; do
+            python .github/utils/pyproject_to_requirements.py $file >> ${{ env.REQUIREMENTS_FILE }}
+            echo "" >> ${{ env.REQUIREMENTS_FILE }}
+          done
+
+      - name: Check Licenses
+        id: license_check_report
+        uses: pilosus/action-pip-license-checker@v2
+        with:
+          github-token: ${{ secrets.GH_ACCESS_TOKEN }}
+          requirements: ${{ env.REQUIREMENTS_FILE }}
+          fail: "Copyleft,Other,Error"
+          exclude: "${{ env.EXCLUDE_PACKAGES }}"
+
+      # We keep the license inventory on FOSSA
+      - name: Send license report to Fossa
+        uses: fossas/[email protected]
+        continue-on-error: true # not critical
+        with:
+          api-key: ${{ secrets.FOSSA_LICENSE_SCAN_TOKEN }}
+
+      - name: Print report
+        if: ${{ always() }}
+        run: echo "${{ steps.license_check_report.outputs.report }}"
+
+      - name: Send event to Datadog for nightly failures
+        if: failure() && github.event_name == 'schedule'
+        uses: ./.github/actions/send_failure
+        with:
+          title: |
+            Core integrations license compliance nightly failure: ${{ github.workflow }}
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
+
diff --git a/README.md b/README.md
@@ -22,6 +22,8 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta
 
 ## Inventory
 
+[![License Compliance](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/CI_license_compliance.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/CI_license_compliance.yml)
+
 | Package                                                                                                        | Type                | PyPi Package                                                                                                                                             | Status                                                                                                                                                                                                                                               |
 |----------------------------------------------------------------------------------------------------------------|---------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | [amazon-bedrock-haystack](integrations/amazon_bedrock/)                                                        | Generator           | [![PyPI - Version](https://img.shields.io/pypi/v/amazon-bedrock-haystack.svg)](https://pypi.org/project/amazon-bedrock-haystack)                         | [![Test / amazon_bedrock](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/amazon_bedrock.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/amazon_bedrock.yml)                   |

diff --git a/...rations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py b/...rations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py
@@ -93,7 +93,7 @@ def __init__(
         self._initialized = False
 
         if is_hosted(api_url) and not self.model:  # manually set default model
-            self.model = "NV-Embed-QA"
+            self.model = "nvidia/nv-embedqa-e5-v5"
 
     def default_model(self):
         """Set default model in local NIM mode."""

diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py
@@ -77,7 +77,7 @@ def __init__(
         self._initialized = False
 
         if is_hosted(api_url) and not self.model:  # manually set default model
-            self.model = "NV-Embed-QA"
+            self.model = "nvidia/nv-embedqa-e5-v5"
 
     def default_model(self):
         """Set default model in local NIM mode."""

diff --git a/integrations/nvidia/tests/conftest.py b/integrations/nvidia/tests/conftest.py
@@ -2,9 +2,10 @@
 
 import pytest
 from haystack.utils import Secret
-from haystack_integrations.utils.nvidia import Model, NimBackend
 from requests_mock import Mocker
 
+from haystack_integrations.utils.nvidia import Model, NimBackend
+
 
 class MockBackend(NimBackend):
     def __init__(self, model: str, api_key: Optional[Secret] = None, model_kwargs: Optional[Dict[str, Any]] = None):

diff --git a/integrations/nvidia/tests/test_base_url.py b/integrations/nvidia/tests/test_base_url.py
@@ -1,4 +1,5 @@
 import pytest
+
 from haystack_integrations.components.embedders.nvidia import NvidiaDocumentEmbedder, NvidiaTextEmbedder
 from haystack_integrations.components.generators.nvidia import NvidiaGenerator
 

diff --git a/integrations/nvidia/tests/test_document_embedder.py b/integrations/nvidia/tests/test_document_embedder.py
@@ -3,6 +3,7 @@
 import pytest
 from haystack import Document
 from haystack.utils import Secret
+
 from haystack_integrations.components.embedders.nvidia import EmbeddingTruncateMode, NvidiaDocumentEmbedder
 
 from . import MockBackend
@@ -14,7 +15,7 @@ def test_init_default(self, monkeypatch):
         embedder = NvidiaDocumentEmbedder()
 
         assert embedder.api_key == Secret.from_env_var("NVIDIA_API_KEY")
-        assert embedder.model == "NV-Embed-QA"
+        assert embedder.model == "nvidia/nv-embedqa-e5-v5"
         assert embedder.api_url == "https://ai.api.nvidia.com/v1/retrieval/nvidia"
         assert embedder.prefix == ""
         assert embedder.suffix == ""
@@ -372,15 +373,34 @@ def test_run_integration_with_nim_backend(self):
             assert isinstance(doc.embedding, list)
             assert isinstance(doc.embedding[0], float)
 
+    @pytest.mark.parametrize(
+        "model, api_url",
+        [
+            ("NV-Embed-QA", None),
+            ("snowflake/arctic-embed-l", "https://integrate.api.nvidia.com/v1"),
+            ("nvidia/nv-embed-v1", "https://integrate.api.nvidia.com/v1"),
+            ("nvidia/nv-embedqa-mistral-7b-v2", "https://integrate.api.nvidia.com/v1"),
+            ("nvidia/nv-embedqa-e5-v5", "https://integrate.api.nvidia.com/v1"),
+            ("baai/bge-m3", "https://integrate.api.nvidia.com/v1"),
+        ],
+        ids=[
+            "NV-Embed-QA",
+            "snowflake/arctic-embed-l",
+            "nvidia/nv-embed-v1",
+            "nvidia/nv-embedqa-mistral-7b-v2",
+            "nvidia/nv-embedqa-e5-v5",
+            "baai/bge-m3",
+        ],
+    )
     @pytest.mark.skipif(
         not os.environ.get("NVIDIA_API_KEY", None),
         reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.",
     )
     @pytest.mark.integration
-    def test_run_integration_with_api_catalog(self):
+    def test_run_integration_with_api_catalog(self, model, api_url):
         embedder = NvidiaDocumentEmbedder(
-            model="NV-Embed-QA",
-            api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia",
+            model=model,
+            **({"api_url": api_url} if api_url else {}),
             api_key=Secret.from_env_var("NVIDIA_API_KEY"),
         )
         embedder.warm_up()

diff --git a/integrations/nvidia/tests/test_generator.py b/integrations/nvidia/tests/test_generator.py
@@ -5,9 +5,10 @@
 
 import pytest
 from haystack.utils import Secret
-from haystack_integrations.components.generators.nvidia import NvidiaGenerator
 from requests_mock import Mocker
 
+from haystack_integrations.components.generators.nvidia import NvidiaGenerator
+
 
 @pytest.fixture
 def mock_local_chat_completion(requests_mock: Mocker) -> None:

diff --git a/integrations/nvidia/tests/test_text_embedder.py b/integrations/nvidia/tests/test_text_embedder.py
@@ -2,6 +2,7 @@
 
 import pytest
 from haystack.utils import Secret
+
 from haystack_integrations.components.embedders.nvidia import EmbeddingTruncateMode, NvidiaTextEmbedder
 
 from . import MockBackend
@@ -169,15 +170,34 @@ def test_run_integration_with_nim_backend(self):
         assert all(isinstance(x, float) for x in embedding)
         assert "usage" in meta
 
+    @pytest.mark.parametrize(
+        "model, api_url",
+        [
+            ("NV-Embed-QA", None),
+            ("snowflake/arctic-embed-l", "https://integrate.api.nvidia.com/v1"),
+            ("nvidia/nv-embed-v1", "https://integrate.api.nvidia.com/v1"),
+            ("nvidia/nv-embedqa-mistral-7b-v2", "https://integrate.api.nvidia.com/v1"),
+            ("nvidia/nv-embedqa-e5-v5", "https://integrate.api.nvidia.com/v1"),
+            ("baai/bge-m3", "https://integrate.api.nvidia.com/v1"),
+        ],
+        ids=[
+            "NV-Embed-QA",
+            "snowflake/arctic-embed-l",
+            "nvidia/nv-embed-v1",
+            "nvidia/nv-embedqa-mistral-7b-v2",
+            "nvidia/nv-embedqa-e5-v5",
+            "baai/bge-m3",
+        ],
+    )
     @pytest.mark.skipif(
         not os.environ.get("NVIDIA_API_KEY", None),
         reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.",
     )
     @pytest.mark.integration
-    def test_run_integration_with_api_catalog(self):
+    def test_run_integration_with_api_catalog(self, model, api_url):
         embedder = NvidiaTextEmbedder(
-            model="NV-Embed-QA",
-            api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia",
+            model=model,
+            **({"api_url": api_url} if api_url else {}),
             api_key=Secret.from_env_var("NVIDIA_API_KEY"),
         )
         embedder.warm_up()

diff --git a/integrations/qdrant/CHANGELOG.md b/integrations/qdrant/CHANGELOG.md
@@ -1,5 +1,15 @@
 # Changelog
 
+## [integrations/qdrant-v4.2.0] - 2024-08-27
+
+### 🚜 Refactor
+
+- Qdrant Query API (#1025)
+
+### 🧪 Testing
+
+- Do not retry tests in `hatch run test` command (#954)
+
 ## [integrations/qdrant-v4.1.2] - 2024-07-15
 
 ### 🐛 Bug Fixes