Skip to content

Commit

Permalink
Merge branch 'main' into add-recursive-chunking
Browse files Browse the repository at this point in the history
  • Loading branch information
davidsbatista committed Dec 16, 2024
2 parents d9addfa + a5b57f4 commit 080a529
Show file tree
Hide file tree
Showing 10 changed files with 16 additions and 7 deletions.
2 changes: 1 addition & 1 deletion e2e/pipelines/test_dense_doc_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_dense_doc_search_pipeline(tmp_path, samples_path):
indexing_pipeline.add_component(instance=DocumentJoiner(), name="joiner")
indexing_pipeline.add_component(instance=DocumentCleaner(), name="cleaner")
indexing_pipeline.add_component(
instance=DocumentSplitter(split_by="sentence", split_length=250, split_overlap=30), name="splitter"
instance=DocumentSplitter(split_by="period", split_length=250, split_overlap=30), name="splitter"
)
indexing_pipeline.add_component(
instance=SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="embedder"
Expand Down
6 changes: 1 addition & 5 deletions e2e/pipelines/test_preprocessing_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
#
# SPDX-License-Identifier: Apache-2.0

import json

from haystack import Pipeline
from haystack.components.classifiers import DocumentLanguageClassifier
from haystack.components.converters import TextFileToDocument
Expand All @@ -25,9 +23,7 @@ def test_preprocessing_pipeline(tmp_path):
instance=MetadataRouter(rules={"en": {"field": "language", "operator": "==", "value": "en"}}), name="router"
)
preprocessing_pipeline.add_component(instance=DocumentCleaner(), name="cleaner")
preprocessing_pipeline.add_component(
instance=DocumentSplitter(split_by="sentence", split_length=1), name="splitter"
)
preprocessing_pipeline.add_component(instance=DocumentSplitter(split_by="period", split_length=1), name="splitter")
preprocessing_pipeline.add_component(
instance=SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="embedder"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def test_to_dict(self):

def test_from_dict(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False)
monkeypatch.delenv("HF_TOKEN", raising=False)
data = {
"type": "haystack.components.classifiers.zero_shot_document_classifier.TransformersZeroShotDocumentClassifier",
"init_parameters": {
Expand Down Expand Up @@ -73,6 +74,7 @@ def test_from_dict(self, monkeypatch):

def test_from_dict_no_default_parameters(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False)
monkeypatch.delenv("HF_TOKEN", raising=False)
data = {
"type": "haystack.components.classifiers.zero_shot_document_classifier.TransformersZeroShotDocumentClassifier",
"init_parameters": {"model": "cross-encoder/nli-deberta-v3-xsmall", "labels": ["positive", "negative"]},
Expand Down
1 change: 1 addition & 0 deletions test/components/generators/chat/test_hugging_face_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ def test_from_dict(self, model_info_mock):
@patch("haystack.components.generators.chat.hugging_face_local.pipeline")
def test_warm_up(self, pipeline_mock, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False)
monkeypatch.delenv("HF_TOKEN", raising=False)
generator = HuggingFaceLocalChatGenerator(
model="mistralai/Mistral-7B-Instruct-v0.2",
task="text2text-generation",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class TestHuggingFaceLocalGenerator:
@patch("haystack.utils.hf.model_info")
def test_init_default(self, model_info_mock, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False)
monkeypatch.delenv("HF_TOKEN", raising=False)
model_info_mock.return_value.pipeline_tag = "text2text-generation"
generator = HuggingFaceLocalGenerator()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def test_warm_up(self, similarity, monkeypatch):
Test that ranker loads the SentenceTransformer model correctly during warm up.
"""
monkeypatch.delenv("HF_API_TOKEN", raising=False)

monkeypatch.delenv("HF_TOKEN", raising=False)
mock_model_class = MagicMock()
mock_model_instance = MagicMock()
mock_model_class.return_value = mock_model_instance
Expand Down
1 change: 1 addition & 0 deletions test/components/rankers/test_transformers_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ def test_device_map_and_device_raises(self, caplog):
@patch("haystack.components.rankers.transformers_similarity.AutoModelForSequenceClassification.from_pretrained")
def test_device_map_dict(self, mocked_automodel, _mocked_autotokenizer, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False)
monkeypatch.delenv("HF_TOKEN", raising=False)
ranker = TransformersSimilarityRanker("model", model_kwargs={"device_map": {"layer_1": 1, "classifier": "cpu"}})

class MockedModel:
Expand Down
3 changes: 3 additions & 0 deletions test/components/readers/test_extractive.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,7 @@ def __init__(self):
@patch("haystack.components.readers.extractive.AutoModelForQuestionAnswering.from_pretrained")
def test_device_map_auto(mocked_automodel, _mocked_autotokenizer, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False)
monkeypatch.delenv("HF_TOKEN", raising=False)
reader = ExtractiveReader("deepset/roberta-base-squad2", model_kwargs={"device_map": "auto"})
auto_device = ComponentDevice.resolve_device(None)

Expand All @@ -537,6 +538,7 @@ def __init__(self):
@patch("haystack.components.readers.extractive.AutoModelForQuestionAnswering.from_pretrained")
def test_device_map_str(mocked_automodel, _mocked_autotokenizer, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False)
monkeypatch.delenv("HF_TOKEN", raising=False)
reader = ExtractiveReader("deepset/roberta-base-squad2", model_kwargs={"device_map": "cpu:0"})

class MockedModel:
Expand All @@ -554,6 +556,7 @@ def __init__(self):
@patch("haystack.components.readers.extractive.AutoModelForQuestionAnswering.from_pretrained")
def test_device_map_dict(mocked_automodel, _mocked_autotokenizer, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False)
monkeypatch.delenv("HF_TOKEN", raising=False)
reader = ExtractiveReader(
"deepset/roberta-base-squad2", model_kwargs={"device_map": {"layer_1": 1, "classifier": "cpu"}}
)
Expand Down
3 changes: 3 additions & 0 deletions test/components/routers/test_transformers_text_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def test_to_dict_with_cpu_device(self, mock_auto_config_from_pretrained):
def test_from_dict(self, mock_auto_config_from_pretrained, monkeypatch):
mock_auto_config_from_pretrained.return_value = MagicMock(label2id={"en": 0, "de": 1})
monkeypatch.delenv("HF_API_TOKEN", raising=False)
monkeypatch.delenv("HF_TOKEN", raising=False)
data = {
"type": "haystack.components.routers.transformers_text_router.TransformersTextRouter",
"init_parameters": {
Expand Down Expand Up @@ -84,6 +85,7 @@ def test_from_dict(self, mock_auto_config_from_pretrained, monkeypatch):
def test_from_dict_no_default_parameters(self, mock_auto_config_from_pretrained, monkeypatch):
mock_auto_config_from_pretrained.return_value = MagicMock(label2id={"en": 0, "de": 1})
monkeypatch.delenv("HF_API_TOKEN", raising=False)
monkeypatch.delenv("HF_TOKEN", raising=False)
data = {
"type": "haystack.components.routers.transformers_text_router.TransformersTextRouter",
"init_parameters": {"model": "papluca/xlm-roberta-base-language-detection"},
Expand All @@ -105,6 +107,7 @@ def test_from_dict_no_default_parameters(self, mock_auto_config_from_pretrained,
def test_from_dict_with_cpu_device(self, mock_auto_config_from_pretrained, monkeypatch):
mock_auto_config_from_pretrained.return_value = MagicMock(label2id={"en": 0, "de": 1})
monkeypatch.delenv("HF_API_TOKEN", raising=False)
monkeypatch.delenv("HF_TOKEN", raising=False)
data = {
"type": "haystack.components.routers.transformers_text_router.TransformersTextRouter",
"init_parameters": {
Expand Down
2 changes: 2 additions & 0 deletions test/components/routers/test_zero_shot_text_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def test_to_dict(self):

def test_from_dict(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False)
monkeypatch.delenv("HF_TOKEN", raising=False)
data = {
"type": "haystack.components.routers.zero_shot_text_router.TransformersZeroShotTextRouter",
"init_parameters": {
Expand Down Expand Up @@ -56,6 +57,7 @@ def test_from_dict(self, monkeypatch):

def test_from_dict_no_default_parameters(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False)
monkeypatch.delenv("HF_TOKEN", raising=False)
data = {
"type": "haystack.components.routers.zero_shot_text_router.TransformersZeroShotTextRouter",
"init_parameters": {"labels": ["query", "passage"]},
Expand Down

0 comments on commit 080a529

Please sign in to comment.