Skip to content

Commit

Permalink
Merge branch 'main' into feature/ollama_streaming_support
Browse files Browse the repository at this point in the history
  • Loading branch information
sachinsachdeva authored Feb 7, 2024
2 parents 2d7aa9e + c5480af commit 379b12b
Show file tree
Hide file tree
Showing 20 changed files with 309 additions and 88 deletions.
45 changes: 45 additions & 0 deletions .github/utils/docstrings_checksum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import ast
import hashlib
from pathlib import Path
from typing import Iterator


def docstrings_checksum(python_files: Iterator[Path]):
files_content = (f.read_text() for f in python_files)
trees = (ast.parse(c) for c in files_content)

# Get all docstrings from async functions, functions,
# classes and modules definitions
docstrings = []
for tree in trees:
for node in ast.walk(tree):
if not isinstance(
node, (ast.AsyncFunctionDef, ast.FunctionDef, ast.ClassDef, ast.Module)
):
# Skip all node types that can't have docstrings to prevent failures
continue
docstring = ast.get_docstring(node)
if docstring:
docstrings.append(docstring)

# Sort them to be safe, since ast.walk() returns
# nodes in no specified order.
# See https://docs.python.org/3/library/ast.html#ast.walk
docstrings.sort()

return hashlib.md5(str(docstrings).encode("utf-8")).hexdigest()


if __name__ == "__main__":
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--root", help="Project root folder", required=True, type=Path)
args = parser.parse_args()

# Get all Python files
root: Path = args.root.absolute()
python_files = root.glob("integrations/**/*.py")

md5 = docstrings_checksum(python_files)
print(md5)
56 changes: 56 additions & 0 deletions .github/workflows/CI_docstring_labeler.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
name: Core / Add label on docstrings edit

on:
pull_request_target:
paths:
- "integrations/**/*.py"

jobs:
label:
runs-on: ubuntu-latest

steps:
- name: Checkout base commit
uses: actions/checkout@v4
with:
ref: ${{ github.base_ref }}

- name: Copy file
# We copy our script after base ref checkout so we keep executing
# the same version even after checking out the HEAD ref.
# This is done to prevent executing malicious code in forks' PRs.
run: cp .github/utils/docstrings_checksum.py "${{ runner.temp }}/docstrings_checksum.py"

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: Get docstrings
id: base-docstrings
run: |
CHECKSUM=$(python "${{ runner.temp }}/docstrings_checksum.py" --root "${{ github.workspace }}")
echo "checksum=$CHECKSUM" >> "$GITHUB_OUTPUT"
- name: Checkout HEAD commit
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.ref }}
# This must be set to correctly checkout a fork
repository: ${{ github.event.pull_request.head.repo.full_name }}

- name: Get docstrings
id: head-docstrings
run: |
CHECKSUM=$(python "${{ runner.temp }}/docstrings_checksum.py" --root "${{ github.workspace }}")
echo "checksum=$CHECKSUM" >> "$GITHUB_OUTPUT"
- name: Check if we should label
id: run-check
run: echo "should_run=${{ steps.base-docstrings.outputs.checksum != steps.head-docstrings.outputs.checksum }}" >> "$GITHUB_OUTPUT"

- name: Add label
if: ${{ steps.run-check.outputs.should_run == 'true' }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: gh pr edit ${{ github.event.pull_request.html_url }} --add-label "type:documentation"
4 changes: 4 additions & 0 deletions .github/workflows/google_ai.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,5 +53,9 @@ jobs:
if: matrix.python-version == '3.9' && runner.os == 'Linux'
run: hatch run lint:all

- name: Generate docs
if: matrix.python-version == '3.9' && runner.os == 'Linux'
run: hatch run docs

- name: Run tests
run: hatch run cov
4 changes: 4 additions & 0 deletions .github/workflows/google_vertex.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,9 @@ jobs:
if: matrix.python-version == '3.9' && runner.os == 'Linux'
run: hatch run lint:all

- name: Generate docs
if: matrix.python-version == '3.9' && runner.os == 'Linux'
run: hatch run docs

- name: Run tests
run: hatch run cov
25 changes: 15 additions & 10 deletions .github/workflows/unstructured.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,23 @@ jobs:
matrix:
os: [ubuntu-latest]
python-version: ["3.8", "3.9", "3.10", "3.11"]
services:
unstructured-api:
image: "quay.io/unstructured-io/unstructured-api:latest"
ports:
- 8000:8000
options: >-
--health-cmd "curl --fail http://localhost:8000/healthcheck || exit 1"
--health-interval 10s
--health-timeout 1s
--health-retries 10

steps:
- name: Free up disk space
run: |
sudo docker image prune --all --force
- name: Run Unstructured API (docker)
run: |
docker run -d \
--name unstructured-api \
-p 8000:8000 \
--health-cmd "curl --fail http://localhost:8000/healthcheck || exit 1" \
--health-interval 10s \
--health-timeout 1s \
--health-retries 10 \
quay.io/unstructured-io/unstructured-api:latest
- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/weaviate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,9 @@ jobs:
- name: Run Weaviate container
run: docker-compose up -d

- name: Generate docs
if: matrix.python-version == '3.9' && runner.os == 'Linux'
run: hatch run docs

- name: Run tests
run: hatch run cov
16 changes: 8 additions & 8 deletions integrations/cohere/tests/test_cohere_chat_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import cohere
import pytest
from haystack.components.generators.utils import default_streaming_callback
from haystack.components.generators.utils import print_streaming_chunk
from haystack.dataclasses import ChatMessage, ChatRole, StreamingChunk
from haystack_integrations.components.generators.cohere import CohereChatGenerator

Expand Down Expand Up @@ -72,13 +72,13 @@ def test_init_with_parameters(self):
component = CohereChatGenerator(
api_key="test-api-key",
model="command-nightly",
streaming_callback=default_streaming_callback,
streaming_callback=print_streaming_chunk,
api_base_url="test-base-url",
generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
)
assert component.api_key == "test-api-key"
assert component.model == "command-nightly"
assert component.streaming_callback is default_streaming_callback
assert component.streaming_callback is print_streaming_chunk
assert component.api_base_url == "test-base-url"
assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}

Expand All @@ -101,7 +101,7 @@ def test_to_dict_with_parameters(self):
component = CohereChatGenerator(
api_key="test-api-key",
model="command-nightly",
streaming_callback=default_streaming_callback,
streaming_callback=print_streaming_chunk,
api_base_url="test-base-url",
generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
)
Expand All @@ -110,7 +110,7 @@ def test_to_dict_with_parameters(self):
"type": "haystack_integrations.components.generators.cohere.chat.chat_generator.CohereChatGenerator",
"init_parameters": {
"model": "command-nightly",
"streaming_callback": "haystack.components.generators.utils.default_streaming_callback",
"streaming_callback": "haystack.components.generators.utils.print_streaming_chunk",
"api_base_url": "test-base-url",
"generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"},
},
Expand Down Expand Up @@ -144,13 +144,13 @@ def test_from_dict(self, monkeypatch):
"init_parameters": {
"model": "command",
"api_base_url": "test-base-url",
"streaming_callback": "haystack.components.generators.utils.default_streaming_callback",
"streaming_callback": "haystack.components.generators.utils.print_streaming_chunk",
"generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"},
},
}
component = CohereChatGenerator.from_dict(data)
assert component.model == "command"
assert component.streaming_callback is default_streaming_callback
assert component.streaming_callback is print_streaming_chunk
assert component.api_base_url == "test-base-url"
assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}

Expand All @@ -162,7 +162,7 @@ def test_from_dict_fail_wo_env_var(self, monkeypatch):
"init_parameters": {
"model": "command",
"api_base_url": "test-base-url",
"streaming_callback": "haystack.components.generators.utils.default_streaming_callback",
"streaming_callback": "haystack.components.generators.utils.print_streaming_chunk",
"generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"},
},
}
Expand Down
17 changes: 5 additions & 12 deletions integrations/cohere/tests/test_cohere_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,12 @@

import pytest
from cohere import COHERE_API_URL
from haystack.components.generators.utils import print_streaming_chunk
from haystack_integrations.components.generators.cohere import CohereGenerator

pytestmark = pytest.mark.generators


def default_streaming_callback(chunk):
"""
Default callback function for streaming responses from Cohere API.
Prints the tokens of the first completion to stdout as soon as they are received and returns the chunk unchanged.
"""
print(chunk.text, flush=True, end="") # noqa: T201


class TestCohereGenerator:
def test_init_default(self):
component = CohereGenerator(api_key="test-api-key")
Expand Down Expand Up @@ -61,7 +54,7 @@ def test_to_dict_with_parameters(self):
model="command-light",
max_tokens=10,
some_test_param="test-params",
streaming_callback=default_streaming_callback,
streaming_callback=print_streaming_chunk,
api_base_url="test-base-url",
)
data = component.to_dict()
Expand All @@ -72,7 +65,7 @@ def test_to_dict_with_parameters(self):
"max_tokens": 10,
"some_test_param": "test-params",
"api_base_url": "test-base-url",
"streaming_callback": "tests.test_cohere_generators.default_streaming_callback",
"streaming_callback": "haystack.components.generators.utils.print_streaming_chunk",
},
}

Expand Down Expand Up @@ -106,13 +99,13 @@ def test_from_dict(self, monkeypatch):
"max_tokens": 10,
"some_test_param": "test-params",
"api_base_url": "test-base-url",
"streaming_callback": "tests.test_cohere_generators.default_streaming_callback",
"streaming_callback": "haystack.components.generators.utils.print_streaming_chunk",
},
}
component: CohereGenerator = CohereGenerator.from_dict(data)
assert component.api_key == "test-key"
assert component.model == "command"
assert component.streaming_callback == default_streaming_callback
assert component.streaming_callback == print_streaming_chunk
assert component.api_base_url == "test-base-url"
assert component.model_parameters == {"max_tokens": 10, "some_test_param": "test-params"}

Expand Down
29 changes: 29 additions & 0 deletions integrations/google_ai/pydoc/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
loaders:
- type: haystack_pydoc_tools.loaders.CustomPythonLoader
search_path: [../src]
modules: [
"haystack_integrations.components.generators.google_ai.gemini",
"haystack_integrations.components.generators.google_ai.chat.gemini",
]
ignore_when_discovered: ["__init__"]
processors:
- type: filter
expression:
documented_only: true
do_not_filter_modules: false
skip_empty_modules: true
- type: smart
- type: crossref
renderer:
type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer
excerpt: Google AI integration for Haystack
category_slug: haystack-integrations
title: Google AI
slug: integrations-google-ai
order: 60
markdown:
descriptive_class_title: false
descriptive_module_title: true
add_method_class_prefix: true
add_member_class_prefix: false
filename: _readme_google_ai.md
5 changes: 4 additions & 1 deletion integrations/google_ai/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ git_describe_command = 'git describe --tags --match="integrations/google_ai-v[0-
dependencies = [
"coverage[toml]>=6.5",
"pytest",
"haystack-pydoc-tools",
]
[tool.hatch.envs.default.scripts]
test = "pytest {args:tests}"
Expand All @@ -61,7 +62,9 @@ cov = [
"test-cov",
"cov-report",
]

docs = [
"pydoc-markdown pydoc/config.yml"
]
[[tool.hatch.envs.all.matrix]]
python = ["3.7", "3.8", "3.9", "3.10", "3.11"]

Expand Down
34 changes: 34 additions & 0 deletions integrations/google_vertex/pydoc/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
loaders:
- type: haystack_pydoc_tools.loaders.CustomPythonLoader
search_path: [../src]
modules: [
"haystack_integrations.components.generators.google_vertex.gemini",
"haystack_integrations.components.generators.google_vertex.captioner",
"haystack_integrations.components.generators.google_vertex.code_generator",
"haystack_integrations.components.generators.google_vertex.image_generator",
"haystack_integrations.components.generators.google_vertex.question_answering",
"haystack_integrations.components.generators.google_vertex.text_generator",
"haystack_integrations.components.generators.google_vertex.chat.gemini",
]
ignore_when_discovered: ["__init__"]
processors:
- type: filter
expression:
documented_only: true
do_not_filter_modules: false
skip_empty_modules: true
- type: smart
- type: crossref
renderer:
type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer
excerpt: Google Vertex integration for Haystack
category_slug: haystack-integrations
title: Google Vertex
slug: integrations-google-vertex
order: 70
markdown:
descriptive_class_title: false
descriptive_module_title: true
add_method_class_prefix: true
add_member_class_prefix: false
filename: _readme_google_vertex.md
5 changes: 4 additions & 1 deletion integrations/google_vertex/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ git_describe_command = 'git describe --tags --match="integrations/google_vertex-
dependencies = [
"coverage[toml]>=6.5",
"pytest",
"haystack-pydoc-tools",
]
[tool.hatch.envs.default.scripts]
test = "pytest {args:tests}"
Expand All @@ -60,7 +61,9 @@ cov = [
"test-cov",
"cov-report",
]

docs = [
"pydoc-markdown pydoc/config.yml"
]
[[tool.hatch.envs.all.matrix]]
python = ["3.7", "3.8", "3.9", "3.10", "3.11"]

Expand Down
Loading

0 comments on commit 379b12b

Please sign in to comment.