Skip to content

Commit

Permalink
Merge branch 'main' into feat-streaming-vertexai-chat-gen
Browse files Browse the repository at this point in the history
  • Loading branch information
Amnah199 authored Aug 27, 2024
2 parents ad55d37 + ee08a47 commit eaa76f4
Show file tree
Hide file tree
Showing 13 changed files with 221 additions and 74 deletions.
26 changes: 26 additions & 0 deletions .github/utils/pyproject_to_requirements.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import argparse
import sys
from pathlib import Path
import toml

def main(pyproject_path: Path, exclude_optional_dependencies: bool = False):
content = toml.load(pyproject_path)
deps = set(content["project"]["dependencies"])

if not exclude_optional_dependencies:
optional_deps = content["project"].get("optional-dependencies", {})
for dep_list in optional_deps.values():
deps.update(dep_list)

print("\n".join(sorted(deps)))

if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog="pyproject_to_requirements.py",
description="Convert pyproject.toml to requirements.txt"
)
parser.add_argument("pyproject_path", type=Path, help="Path to pyproject.toml file")
parser.add_argument("--exclude-optional-dependencies", action="store_true", help="Exclude optional dependencies")

args = parser.parse_args()
main(args.pyproject_path, args.exclude_optional_dependencies)
95 changes: 95 additions & 0 deletions .github/workflows/CI_license_compliance.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
name: Core / License Compliance

on:
pull_request:
paths:
- "integrations/**/pyproject.toml"
# Since we test PRs, there is no need to run the workflow at each
# merge on `main`. Let's use a cron job instead.
schedule:
- cron: "0 0 * * *" # every day at midnight

env:
CORE_DATADOG_API_KEY: ${{ secrets.CORE_DATADOG_API_KEY }}
PYTHON_VERSION: "3.10"
EXCLUDE_PACKAGES: "(?i)^(deepeval|cohere|fastembed|ragas|tqdm|psycopg).*"

# Exclusions must be explicitly motivated
#
# - deepeval is Apache 2.0 but the license is not available on PyPI
# - cohere is MIT but the license is not available on PyPI
# - fastembed is Apache 2.0 but the license on PyPI is unclear ("Other/Proprietary License (Apache License)")
# - ragas is Apache 2.0 but the license is not available on PyPI

# - tqdm is MLP but there are no better alternatives
# - psycopg is LGPL-3.0 but FOSSA is fine with it

jobs:
license_check_direct:
name: Direct dependencies only
env:
REQUIREMENTS_FILE: requirements_direct.txt
runs-on: ubuntu-latest
steps:
- name: Checkout the code
uses: actions/checkout@v4

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "${{ env.PYTHON_VERSION }}"

- name: Get changed pyproject files (for pull requests only)
if: ${{ github.event_name == 'pull_request' }}
id: changed-files
uses: tj-actions/changed-files@v45
with:
files: |
integrations/**/pyproject.toml
- name: Get direct dependencies from pyproject.toml files
run: |
pip install toml
# Determine the list of pyproject.toml files to process
if [ "${{ github.event_name }}" = "schedule" ]; then
echo "Scheduled run: processing all pyproject.toml files..."
FILES=$(find integrations -type f -name 'pyproject.toml')
else
echo "Pull request: processing changed pyproject.toml files..."
FILES="${{ steps.changed-files.outputs.all_changed_files }}"
fi
for file in $FILES; do
python .github/utils/pyproject_to_requirements.py $file >> ${{ env.REQUIREMENTS_FILE }}
echo "" >> ${{ env.REQUIREMENTS_FILE }}
done
- name: Check Licenses
id: license_check_report
uses: pilosus/action-pip-license-checker@v2
with:
github-token: ${{ secrets.GH_ACCESS_TOKEN }}
requirements: ${{ env.REQUIREMENTS_FILE }}
fail: "Copyleft,Other,Error"
exclude: "${{ env.EXCLUDE_PACKAGES }}"

# We keep the license inventory on FOSSA
- name: Send license report to Fossa
uses: fossas/[email protected]
continue-on-error: true # not critical
with:
api-key: ${{ secrets.FOSSA_LICENSE_SCAN_TOKEN }}

- name: Print report
if: ${{ always() }}
run: echo "${{ steps.license_check_report.outputs.report }}"

- name: Send event to Datadog for nightly failures
if: failure() && github.event_name == 'schedule'
uses: ./.github/actions/send_failure
with:
title: |
Core integrations license compliance nightly failure: ${{ github.workflow }}
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}

2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta

## Inventory

[![License Compliance](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/CI_license_compliance.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/CI_license_compliance.yml)

| Package | Type | PyPi Package | Status |
|----------------------------------------------------------------------------------------------------------------|---------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| [amazon-bedrock-haystack](integrations/amazon_bedrock/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/amazon-bedrock-haystack.svg)](https://pypi.org/project/amazon-bedrock-haystack) | [![Test / amazon_bedrock](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/amazon_bedrock.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/amazon_bedrock.yml) |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def __init__(
self._initialized = False

if is_hosted(api_url) and not self.model: # manually set default model
self.model = "NV-Embed-QA"
self.model = "nvidia/nv-embedqa-e5-v5"

def default_model(self):
"""Set default model in local NIM mode."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def __init__(
self._initialized = False

if is_hosted(api_url) and not self.model: # manually set default model
self.model = "NV-Embed-QA"
self.model = "nvidia/nv-embedqa-e5-v5"

def default_model(self):
"""Set default model in local NIM mode."""
Expand Down
3 changes: 2 additions & 1 deletion integrations/nvidia/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

import pytest
from haystack.utils import Secret
from haystack_integrations.utils.nvidia import Model, NimBackend
from requests_mock import Mocker

from haystack_integrations.utils.nvidia import Model, NimBackend


class MockBackend(NimBackend):
def __init__(self, model: str, api_key: Optional[Secret] = None, model_kwargs: Optional[Dict[str, Any]] = None):
Expand Down
1 change: 1 addition & 0 deletions integrations/nvidia/tests/test_base_url.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest

from haystack_integrations.components.embedders.nvidia import NvidiaDocumentEmbedder, NvidiaTextEmbedder
from haystack_integrations.components.generators.nvidia import NvidiaGenerator

Expand Down
28 changes: 24 additions & 4 deletions integrations/nvidia/tests/test_document_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pytest
from haystack import Document
from haystack.utils import Secret

from haystack_integrations.components.embedders.nvidia import EmbeddingTruncateMode, NvidiaDocumentEmbedder

from . import MockBackend
Expand All @@ -14,7 +15,7 @@ def test_init_default(self, monkeypatch):
embedder = NvidiaDocumentEmbedder()

assert embedder.api_key == Secret.from_env_var("NVIDIA_API_KEY")
assert embedder.model == "NV-Embed-QA"
assert embedder.model == "nvidia/nv-embedqa-e5-v5"
assert embedder.api_url == "https://ai.api.nvidia.com/v1/retrieval/nvidia"
assert embedder.prefix == ""
assert embedder.suffix == ""
Expand Down Expand Up @@ -372,15 +373,34 @@ def test_run_integration_with_nim_backend(self):
assert isinstance(doc.embedding, list)
assert isinstance(doc.embedding[0], float)

@pytest.mark.parametrize(
"model, api_url",
[
("NV-Embed-QA", None),
("snowflake/arctic-embed-l", "https://integrate.api.nvidia.com/v1"),
("nvidia/nv-embed-v1", "https://integrate.api.nvidia.com/v1"),
("nvidia/nv-embedqa-mistral-7b-v2", "https://integrate.api.nvidia.com/v1"),
("nvidia/nv-embedqa-e5-v5", "https://integrate.api.nvidia.com/v1"),
("baai/bge-m3", "https://integrate.api.nvidia.com/v1"),
],
ids=[
"NV-Embed-QA",
"snowflake/arctic-embed-l",
"nvidia/nv-embed-v1",
"nvidia/nv-embedqa-mistral-7b-v2",
"nvidia/nv-embedqa-e5-v5",
"baai/bge-m3",
],
)
@pytest.mark.skipif(
not os.environ.get("NVIDIA_API_KEY", None),
reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.",
)
@pytest.mark.integration
def test_run_integration_with_api_catalog(self):
def test_run_integration_with_api_catalog(self, model, api_url):
embedder = NvidiaDocumentEmbedder(
model="NV-Embed-QA",
api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia",
model=model,
**({"api_url": api_url} if api_url else {}),
api_key=Secret.from_env_var("NVIDIA_API_KEY"),
)
embedder.warm_up()
Expand Down
3 changes: 2 additions & 1 deletion integrations/nvidia/tests/test_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@

import pytest
from haystack.utils import Secret
from haystack_integrations.components.generators.nvidia import NvidiaGenerator
from requests_mock import Mocker

from haystack_integrations.components.generators.nvidia import NvidiaGenerator


@pytest.fixture
def mock_local_chat_completion(requests_mock: Mocker) -> None:
Expand Down
26 changes: 23 additions & 3 deletions integrations/nvidia/tests/test_text_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pytest
from haystack.utils import Secret

from haystack_integrations.components.embedders.nvidia import EmbeddingTruncateMode, NvidiaTextEmbedder

from . import MockBackend
Expand Down Expand Up @@ -169,15 +170,34 @@ def test_run_integration_with_nim_backend(self):
assert all(isinstance(x, float) for x in embedding)
assert "usage" in meta

@pytest.mark.parametrize(
"model, api_url",
[
("NV-Embed-QA", None),
("snowflake/arctic-embed-l", "https://integrate.api.nvidia.com/v1"),
("nvidia/nv-embed-v1", "https://integrate.api.nvidia.com/v1"),
("nvidia/nv-embedqa-mistral-7b-v2", "https://integrate.api.nvidia.com/v1"),
("nvidia/nv-embedqa-e5-v5", "https://integrate.api.nvidia.com/v1"),
("baai/bge-m3", "https://integrate.api.nvidia.com/v1"),
],
ids=[
"NV-Embed-QA",
"snowflake/arctic-embed-l",
"nvidia/nv-embed-v1",
"nvidia/nv-embedqa-mistral-7b-v2",
"nvidia/nv-embedqa-e5-v5",
"baai/bge-m3",
],
)
@pytest.mark.skipif(
not os.environ.get("NVIDIA_API_KEY", None),
reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.",
)
@pytest.mark.integration
def test_run_integration_with_api_catalog(self):
def test_run_integration_with_api_catalog(self, model, api_url):
embedder = NvidiaTextEmbedder(
model="NV-Embed-QA",
api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia",
model=model,
**({"api_url": api_url} if api_url else {}),
api_key=Secret.from_env_var("NVIDIA_API_KEY"),
)
embedder.warm_up()
Expand Down
10 changes: 10 additions & 0 deletions integrations/qdrant/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# Changelog

## [integrations/qdrant-v4.2.0] - 2024-08-27

### 🚜 Refactor

- Qdrant Query API (#1025)

### 🧪 Testing

- Do not retry tests in `hatch run test` command (#954)

## [integrations/qdrant-v4.1.2] - 2024-07-15

### 🐛 Bug Fixes
Expand Down
Loading

0 comments on commit eaa76f4

Please sign in to comment.