Skip to content

Commit

Permalink
Merge branch 'main' into rm-pytorch-from-instructor-deps
Browse files Browse the repository at this point in the history
  • Loading branch information
anakin87 authored Aug 30, 2024
2 parents ad19916 + a902ae5 commit 3d9c11b
Show file tree
Hide file tree
Showing 33 changed files with 1,593 additions and 255 deletions.
26 changes: 26 additions & 0 deletions .github/utils/pyproject_to_requirements.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import argparse
import sys
from pathlib import Path
import toml

def main(pyproject_path: Path, exclude_optional_dependencies: bool = False):
content = toml.load(pyproject_path)
deps = set(content["project"]["dependencies"])

if not exclude_optional_dependencies:
optional_deps = content["project"].get("optional-dependencies", {})
for dep_list in optional_deps.values():
deps.update(dep_list)

print("\n".join(sorted(deps)))

if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog="pyproject_to_requirements.py",
description="Convert pyproject.toml to requirements.txt"
)
parser.add_argument("pyproject_path", type=Path, help="Path to pyproject.toml file")
parser.add_argument("--exclude-optional-dependencies", action="store_true", help="Exclude optional dependencies")

args = parser.parse_args()
main(args.pyproject_path, args.exclude_optional_dependencies)
95 changes: 95 additions & 0 deletions .github/workflows/CI_license_compliance.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
name: Core / License Compliance

on:
pull_request:
paths:
- "integrations/**/pyproject.toml"
# Since we test PRs, there is no need to run the workflow at each
# merge on `main`. Let's use a cron job instead.
schedule:
- cron: "0 0 * * *" # every day at midnight

env:
CORE_DATADOG_API_KEY: ${{ secrets.CORE_DATADOG_API_KEY }}
PYTHON_VERSION: "3.10"
EXCLUDE_PACKAGES: "(?i)^(deepeval|cohere|fastembed|ragas|tqdm|psycopg).*"

# Exclusions must be explicitly motivated
#
# - deepeval is Apache 2.0 but the license is not available on PyPI
# - cohere is MIT but the license is not available on PyPI
# - fastembed is Apache 2.0 but the license on PyPI is unclear ("Other/Proprietary License (Apache License)")
# - ragas is Apache 2.0 but the license is not available on PyPI

# - tqdm is MLP but there are no better alternatives
# - psycopg is LGPL-3.0 but FOSSA is fine with it

jobs:
license_check_direct:
name: Direct dependencies only
env:
REQUIREMENTS_FILE: requirements_direct.txt
runs-on: ubuntu-latest
steps:
- name: Checkout the code
uses: actions/checkout@v4

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "${{ env.PYTHON_VERSION }}"

- name: Get changed pyproject files (for pull requests only)
if: ${{ github.event_name == 'pull_request' }}
id: changed-files
uses: tj-actions/changed-files@v45
with:
files: |
integrations/**/pyproject.toml
- name: Get direct dependencies from pyproject.toml files
run: |
pip install toml
# Determine the list of pyproject.toml files to process
if [ "${{ github.event_name }}" = "schedule" ]; then
echo "Scheduled run: processing all pyproject.toml files..."
FILES=$(find integrations -type f -name 'pyproject.toml')
else
echo "Pull request: processing changed pyproject.toml files..."
FILES="${{ steps.changed-files.outputs.all_changed_files }}"
fi
for file in $FILES; do
python .github/utils/pyproject_to_requirements.py $file >> ${{ env.REQUIREMENTS_FILE }}
echo "" >> ${{ env.REQUIREMENTS_FILE }}
done
- name: Check Licenses
id: license_check_report
uses: pilosus/action-pip-license-checker@v2
with:
github-token: ${{ secrets.GH_ACCESS_TOKEN }}
requirements: ${{ env.REQUIREMENTS_FILE }}
fail: "Copyleft,Other,Error"
exclude: "${{ env.EXCLUDE_PACKAGES }}"

# We keep the license inventory on FOSSA
- name: Send license report to Fossa
uses: fossas/[email protected]
continue-on-error: true # not critical
with:
api-key: ${{ secrets.FOSSA_LICENSE_SCAN_TOKEN }}

- name: Print report
if: ${{ always() }}
run: echo "${{ steps.license_check_report.outputs.report }}"

- name: Send event to Datadog for nightly failures
if: failure() && github.event_name == 'schedule'
uses: ./.github/actions/send_failure
with:
title: |
Core integrations license compliance nightly failure: ${{ github.workflow }}
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}

2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta

## Inventory

[![License Compliance](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/CI_license_compliance.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/CI_license_compliance.yml)

| Package | Type | PyPi Package | Status |
|----------------------------------------------------------------------------------------------------------------|---------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| [amazon-bedrock-haystack](integrations/amazon_bedrock/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/amazon-bedrock-haystack.svg)](https://pypi.org/project/amazon-bedrock-haystack) | [![Test / amazon_bedrock](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/amazon_bedrock.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/amazon_bedrock.yml) |
Expand Down
13 changes: 13 additions & 0 deletions integrations/google_ai/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,24 @@

## [unreleased]

### 🐛 Bug Fixes

- Remove the use of deprecated gemini models (#1032)

### 🧪 Testing

- Do not retry tests in `hatch run test` command (#954)

### ⚙️ Miscellaneous Tasks

- Retry tests to reduce flakyness (#836)
- Update ruff invocation to include check parameter (#853)

### Docs

- Update GeminiGenerator docstrings (#964)
- Update GoogleChatGenerator docstrings (#962)

## [integrations/google_ai-v1.1.0] - 2024-06-05

### 🐛 Bug Fixes
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import logging
from typing import Any, Dict, List, Optional, Union
from typing import Any, Callable, Dict, List, Optional, Union

import google.generativeai as genai
from google.ai.generativelanguage import Content, Part
from google.ai.generativelanguage import Tool as ToolProto
from google.generativeai import GenerationConfig, GenerativeModel
from google.generativeai.types import HarmBlockThreshold, HarmCategory, Tool
from google.generativeai.types import GenerateContentResponse, HarmBlockThreshold, HarmCategory, Tool
from haystack.core.component import component
from haystack.core.serialization import default_from_dict, default_to_dict
from haystack.dataclasses.byte_stream import ByteStream
from haystack.dataclasses import ByteStream, StreamingChunk
from haystack.dataclasses.chat_message import ChatMessage, ChatRole
from haystack.utils import Secret, deserialize_secrets_inplace
from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable

logger = logging.getLogger(__name__)

Expand All @@ -21,10 +21,7 @@ class GoogleAIGeminiChatGenerator:
Completes chats using multimodal Gemini models through Google AI Studio.
It uses the [`ChatMessage`](https://docs.haystack.deepset.ai/docs/data-classes#chatmessage)
dataclass to interact with the model. You can use the following models:
- gemini-pro
- gemini-ultra
- gemini-pro-vision
dataclass to interact with the model.
### Usage example
Expand Down Expand Up @@ -103,27 +100,20 @@ def __init__(
self,
*,
api_key: Secret = Secret.from_env_var("GOOGLE_API_KEY"), # noqa: B008
model: str = "gemini-pro-vision",
model: str = "gemini-1.5-flash",
generation_config: Optional[Union[GenerationConfig, Dict[str, Any]]] = None,
safety_settings: Optional[Dict[HarmCategory, HarmBlockThreshold]] = None,
tools: Optional[List[Tool]] = None,
streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
):
"""
Initializes a `GoogleAIGeminiChatGenerator` instance.
To get an API key, visit: https://makersuite.google.com
It supports the following models:
* `gemini-pro`
* `gemini-pro-vision`
* `gemini-ultra`
:param api_key: Google AI Studio API key. To get a key,
see [Google AI Studio](https://makersuite.google.com).
:param model: Name of the model to use. Supported models are:
- gemini-pro
- gemini-ultra
- gemini-pro-vision
:param model: Name of the model to use. For available models, see https://ai.google.dev/gemini-api/docs/models/gemini.
:param generation_config: The generation configuration to use.
This can either be a `GenerationConfig` object or a dictionary of parameters.
For available parameters, see
Expand All @@ -132,6 +122,8 @@ def __init__(
A dictionary with `HarmCategory` as keys and `HarmBlockThreshold` as values.
For more information, see [the API reference](https://ai.google.dev/api)
:param tools: A list of Tool objects that can be used for [Function calling](https://ai.google.dev/docs/function_calling).
:param streaming_callback: A callback function that is called when a new token is received from the stream.
The callback function accepts StreamingChunk as an argument.
"""

genai.configure(api_key=api_key.resolve_value())
Expand All @@ -142,6 +134,7 @@ def __init__(
self._safety_settings = safety_settings
self._tools = tools
self._model = GenerativeModel(self._model_name, tools=self._tools)
self._streaming_callback = streaming_callback

def _generation_config_to_dict(self, config: Union[GenerationConfig, Dict[str, Any]]) -> Dict[str, Any]:
if isinstance(config, dict):
Expand All @@ -162,13 +155,16 @@ def to_dict(self) -> Dict[str, Any]:
:returns:
Dictionary with serialized data.
"""
callback_name = serialize_callable(self._streaming_callback) if self._streaming_callback else None

data = default_to_dict(
self,
api_key=self._api_key.to_dict(),
model=self._model_name,
generation_config=self._generation_config,
safety_settings=self._safety_settings,
tools=self._tools,
streaming_callback=callback_name,
)
if (tools := data["init_parameters"].get("tools")) is not None:
data["init_parameters"]["tools"] = []
Expand Down Expand Up @@ -213,6 +209,8 @@ def from_dict(cls, data: Dict[str, Any]) -> "GoogleAIGeminiChatGenerator":
data["init_parameters"]["safety_settings"] = {
HarmCategory(k): HarmBlockThreshold(v) for k, v in safety_settings.items()
}
if (serialized_callback_handler := data["init_parameters"].get("streaming_callback")) is not None:
data["init_parameters"]["streaming_callback"] = deserialize_callable(serialized_callback_handler)
return default_from_dict(cls, data)

def _convert_part(self, part: Union[str, ByteStream, Part]) -> Part:
Expand Down Expand Up @@ -274,16 +272,23 @@ def _message_to_content(self, message: ChatMessage) -> Content:
return Content(parts=[part], role=role)

@component.output_types(replies=List[ChatMessage])
def run(self, messages: List[ChatMessage]):
def run(
self,
messages: List[ChatMessage],
streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
):
"""
Generates text based on the provided messages.
:param messages:
A list of `ChatMessage` instances, representing the input messages.
:param streaming_callback:
A callback function that is called when a new token is received from the stream.
:returns:
A dictionary containing the following key:
- `replies`: A list containing the generated responses as `ChatMessage` instances.
"""
streaming_callback = streaming_callback or self._streaming_callback
history = [self._message_to_content(m) for m in messages[:-1]]
session = self._model.start_chat(history=history)

Expand All @@ -292,10 +297,22 @@ def run(self, messages: List[ChatMessage]):
content=new_message,
generation_config=self._generation_config,
safety_settings=self._safety_settings,
stream=streaming_callback is not None,
)

replies = self._get_stream_response(res, streaming_callback) if streaming_callback else self._get_response(res)

return {"replies": replies}

def _get_response(self, response_body: GenerateContentResponse) -> List[ChatMessage]:
"""
Extracts the responses from the Google AI response.
:param response_body: The response from Google AI request.
:returns: The extracted responses.
"""
replies = []
for candidate in res.candidates:
for candidate in response_body.candidates:
for part in candidate.content.parts:
if part.text != "":
replies.append(ChatMessage.from_system(part.text))
Expand All @@ -307,5 +324,23 @@ def run(self, messages: List[ChatMessage]):
name=part.function_call.name,
)
)
return replies

return {"replies": replies}
def _get_stream_response(
self, stream: GenerateContentResponse, streaming_callback: Callable[[StreamingChunk], None]
) -> List[ChatMessage]:
"""
Extracts the responses from the Google AI streaming response.
:param stream: The streaming response from the Google AI request.
:param streaming_callback: The handler for the streaming response.
:returns: The extracted response with the content of all streaming chunks.
"""
responses = []
for chunk in stream:
content = chunk.text if len(chunk.parts) > 0 and "text" in chunk.parts[0] else ""
streaming_callback(StreamingChunk(content=content, meta=chunk.to_dict()))
responses.append(content)

combined_response = "".join(responses).lstrip()
return [ChatMessage.from_system(content=combined_response)]
Loading

0 comments on commit 3d9c11b

Please sign in to comment.