From ad443604666ada5547b86ad231da120162adbc9e Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Tue, 17 Dec 2024 08:51:01 +0100 Subject: [PATCH 1/4] better handling of exception during pipeline deployment ; refactoring --- src/hayhooks/server/utils/deploy_utils.py | 2 + tests/test_files/broken_rag_pipeline.yml | 26 ++++++ .../basic_rag_pipeline.yml | 0 .../chat_with_website.yml | 0 .../working_pipelines/minimal_retriever.yml | 79 +++++++++++++++++++ .../pipeline_qdrant.yml | 0 .../pipeline_qdrant_2.yml | 0 .../{ => working_pipelines}/st_retriever.yml | 0 .../test_pipeline_01.yml | 0 .../test_pipeline_02.yml | 0 tests/test_it_deploy.py | 16 ++-- tests/test_it_handling_deploy_exceptions.py | 14 ++++ 12 files changed, 129 insertions(+), 8 deletions(-) create mode 100644 tests/test_files/broken_rag_pipeline.yml rename tests/test_files/{ => working_pipelines}/basic_rag_pipeline.yml (100%) rename tests/test_files/{ => working_pipelines}/chat_with_website.yml (100%) create mode 100644 tests/test_files/working_pipelines/minimal_retriever.yml rename tests/test_files/{ => working_pipelines}/pipeline_qdrant.yml (100%) rename tests/test_files/{ => working_pipelines}/pipeline_qdrant_2.yml (100%) rename tests/test_files/{ => working_pipelines}/st_retriever.yml (100%) rename tests/test_files/{ => working_pipelines}/test_pipeline_01.yml (100%) rename tests/test_files/{ => working_pipelines}/test_pipeline_02.yml (100%) create mode 100644 tests/test_it_handling_deploy_exceptions.py diff --git a/src/hayhooks/server/utils/deploy_utils.py b/src/hayhooks/server/utils/deploy_utils.py index 2d42742..44ac36f 100644 --- a/src/hayhooks/server/utils/deploy_utils.py +++ b/src/hayhooks/server/utils/deploy_utils.py @@ -16,6 +16,8 @@ def deploy_pipeline_def(app, pipeline_def: PipelineDefinition): pipe = registry.add(pipeline_def.name, pipeline_def.source_code) except ValueError as e: raise HTTPException(status_code=409, detail=f"{e}") from e + except Exception as e: + raise HTTPException(status_code=500, detail=f"{e}") from e PipelineRunRequest = get_request_model(pipeline_def.name, pipe.inputs()) PipelineRunResponse = get_response_model(pipeline_def.name, pipe.outputs()) diff --git a/tests/test_files/broken_rag_pipeline.yml b/tests/test_files/broken_rag_pipeline.yml new file mode 100644 index 0000000..218ef9d --- /dev/null +++ b/tests/test_files/broken_rag_pipeline.yml @@ -0,0 +1,26 @@ +components: + llm: + init_parameters: + api_base_url: null + api_key: + env_vars: + # Missing OPENAI_API_KEY - should raise an exception + strict: true + type: env_var + generation_kwargs: {} + model: gpt-4o-mini + organization: null + streaming_callback: null + system_prompt: null + type: haystack.components.generators.openai.OpenAIGenerator + prompt_builder: + init_parameters: + required_variables: null + template: "{{ question }}" + variables: null + type: haystack.components.builders.prompt_builder.PromptBuilder +connections: +- receiver: llm.prompt + sender: prompt_builder.prompt +max_runs_per_component: 100 +metadata: {} diff --git a/tests/test_files/basic_rag_pipeline.yml b/tests/test_files/working_pipelines/basic_rag_pipeline.yml similarity index 100% rename from tests/test_files/basic_rag_pipeline.yml rename to tests/test_files/working_pipelines/basic_rag_pipeline.yml diff --git a/tests/test_files/chat_with_website.yml b/tests/test_files/working_pipelines/chat_with_website.yml similarity index 100% rename from tests/test_files/chat_with_website.yml rename to tests/test_files/working_pipelines/chat_with_website.yml diff --git a/tests/test_files/working_pipelines/minimal_retriever.yml b/tests/test_files/working_pipelines/minimal_retriever.yml new file mode 100644 index 0000000..ad7fb3e --- /dev/null +++ b/tests/test_files/working_pipelines/minimal_retriever.yml @@ -0,0 +1,79 @@ +components: + document_embedder: + init_parameters: + batch_size: 32 + config_kwargs: null + device: + device: cpu + type: single + model: sentence-transformers/paraphrase-MiniLM-L3-v2 + model_kwargs: null + normalize_embeddings: false + precision: float32 + prefix: "" + progress_bar: true + suffix: "" + token: + env_vars: + - HF_API_TOKEN + - HF_TOKEN + strict: false + type: env_var + tokenizer_kwargs: null + truncate_dim: null + trust_remote_code: false + type: haystack.components.embedders.sentence_transformers_text_embedder.SentenceTransformersTextEmbedder + document_retriever: + init_parameters: + document_store: + init_parameters: + api_key: null + embedding_dim: 384 + force_disable_check_same_thread: false + grpc_port: 6334 + hnsw_config: null + host: null + https: null + index: Document + init_from: null + location: null + metadata: {} + on_disk: false + on_disk_payload: null + optimizers_config: null + path: null + payload_fields_to_index: null + port: 6333 + prefer_grpc: false + prefix: null + progress_bar: true + quantization_config: null + recreate_index: false + replication_factor: null + return_embedding: false + scroll_size: 10000 + shard_number: null + similarity: cosine + sparse_idf: false + timeout: null + url: http://localhost:6333 + use_sparse_embeddings: false + wait_result_from_api: true + wal_config: null + write_batch_size: 100 + write_consistency_factor: null + type: haystack_integrations.document_stores.qdrant.document_store.QdrantDocumentStore + filter_policy: replace + filters: null + group_by: null + group_size: null + return_embedding: false + scale_score: false + score_threshold: null + top_k: 3 + type: haystack_integrations.components.retrievers.qdrant.retriever.QdrantEmbeddingRetriever +connections: + - receiver: document_retriever.query_embedding + sender: document_embedder.embedding +max_runs_per_component: 100 +metadata: {} diff --git a/tests/test_files/pipeline_qdrant.yml b/tests/test_files/working_pipelines/pipeline_qdrant.yml similarity index 100% rename from tests/test_files/pipeline_qdrant.yml rename to tests/test_files/working_pipelines/pipeline_qdrant.yml diff --git a/tests/test_files/pipeline_qdrant_2.yml b/tests/test_files/working_pipelines/pipeline_qdrant_2.yml similarity index 100% rename from tests/test_files/pipeline_qdrant_2.yml rename to tests/test_files/working_pipelines/pipeline_qdrant_2.yml diff --git a/tests/test_files/st_retriever.yml b/tests/test_files/working_pipelines/st_retriever.yml similarity index 100% rename from tests/test_files/st_retriever.yml rename to tests/test_files/working_pipelines/st_retriever.yml diff --git a/tests/test_files/test_pipeline_01.yml b/tests/test_files/working_pipelines/test_pipeline_01.yml similarity index 100% rename from tests/test_files/test_pipeline_01.yml rename to tests/test_files/working_pipelines/test_pipeline_01.yml diff --git a/tests/test_files/test_pipeline_02.yml b/tests/test_files/working_pipelines/test_pipeline_02.yml similarity index 100% rename from tests/test_files/test_pipeline_02.yml rename to tests/test_files/working_pipelines/test_pipeline_02.yml diff --git a/tests/test_it_deploy.py b/tests/test_it_deploy.py index 2e5ae88..0721e1e 100644 --- a/tests/test_it_deploy.py +++ b/tests/test_it_deploy.py @@ -6,19 +6,19 @@ client = TestClient(app) # Load pipeline definitions from test_files -test_files = Path(__file__).parent / "test_files" -pipeline_names = [file.stem for file in test_files.glob("*.yml")] +test_files = Path(__file__).parent / "test_files" / "working_pipelines" +pipeline_data = [{"name": file.stem, "source_code": file.read_text()} for file in test_files.glob("*.yml")] -@pytest.mark.parametrize("pipeline_name", pipeline_names) -def test_deploy_pipeline_def(pipeline_name: str): - pipeline_def = (Path(__file__).parent / "test_files" / f"{pipeline_name}.yml").read_text() - - deploy_response = client.post("/deploy", json={"name": pipeline_name, "source_code": pipeline_def}) +@pytest.mark.parametrize("pipeline_data", pipeline_data) +def test_deploy_pipeline_def(pipeline_data: dict): + deploy_response = client.post( + "/deploy", json={"name": pipeline_data["name"], "source_code": pipeline_data["source_code"]} + ) assert deploy_response.status_code == 200 status_response = client.get("/status") - assert pipeline_name in status_response.json()["pipelines"] + assert pipeline_data["name"] in status_response.json()["pipelines"] docs_response = client.get("/docs") assert docs_response.status_code == 200 diff --git a/tests/test_it_handling_deploy_exceptions.py b/tests/test_it_handling_deploy_exceptions.py new file mode 100644 index 0000000..9e9a8b6 --- /dev/null +++ b/tests/test_it_handling_deploy_exceptions.py @@ -0,0 +1,14 @@ +from fastapi.testclient import TestClient +from hayhooks.server import app +from pathlib import Path + +client = TestClient(app) + + +def test_gracefully_handle_deploy_exception(): + pipeline_name = "broken_rag_pipeline" + pipeline_def = (Path(__file__).parent / "test_files" / "broken_rag_pipeline.yml").read_text() + + deploy_response = client.post("/deploy", json={"name": pipeline_name, "source_code": pipeline_def}) + assert deploy_response.status_code == 500 + assert "Couldn't deserialize component 'llm'" in deploy_response.json()["detail"] From b800f31858eea38ad8dd895f993eb4a0949fef82 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Tue, 17 Dec 2024 08:51:09 +0100 Subject: [PATCH 2/4] refactoring --- src/hayhooks/server/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hayhooks/server/app.py b/src/hayhooks/server/app.py index 934797c..96daaa8 100644 --- a/src/hayhooks/server/app.py +++ b/src/hayhooks/server/app.py @@ -8,7 +8,7 @@ logger = logging.getLogger("uvicorn.info") -def create_app(): +def create_app() -> FastAPI: if root_path := os.environ.get("HAYHOOKS_ROOT_PATH"): app = FastAPI(root_path=root_path) else: From 66884e0a6e35f5858258f807e9eea5eff3c56d25 Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Tue, 17 Dec 2024 08:51:14 +0100 Subject: [PATCH 3/4] add tests for pipeline registry --- tests/test_registry.py | 66 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 tests/test_registry.py diff --git a/tests/test_registry.py b/tests/test_registry.py new file mode 100644 index 0000000..6468cf2 --- /dev/null +++ b/tests/test_registry.py @@ -0,0 +1,66 @@ +import pytest +from pathlib import Path +from haystack import Pipeline +from haystack.core.errors import PipelineError + +from hayhooks.server.pipelines.registry import _PipelineRegistry + + +@pytest.fixture +def pipeline_registry(): + return _PipelineRegistry() + + +@pytest.fixture +def sample_pipeline_yaml(): + return (Path(__file__).parent / "test_files" / "working_pipelines" / "basic_rag_pipeline.yml").read_text() + + +def test_add_pipeline(pipeline_registry, sample_pipeline_yaml): + result = pipeline_registry.add("test_pipeline", sample_pipeline_yaml) + + expected_pipeline = Pipeline.loads(sample_pipeline_yaml) + assert result == expected_pipeline + assert pipeline_registry.get("test_pipeline") == expected_pipeline + + +def test_add_duplicate_pipeline(pipeline_registry, sample_pipeline_yaml): + result = pipeline_registry.add("test_pipeline", sample_pipeline_yaml) + + expected_pipeline = Pipeline.loads(sample_pipeline_yaml) + assert result == expected_pipeline + + with pytest.raises(ValueError, match="A pipeline with name test_pipeline is already in the registry"): + pipeline_registry.add("test_pipeline", sample_pipeline_yaml) + + +def test_add_invalid_pipeline(pipeline_registry, mocker): + mocker.patch('haystack.Pipeline.loads', side_effect=PipelineError("Invalid pipeline")) + + with pytest.raises(ValueError, match="Unable to parse Haystack Pipeline test_pipeline"): + pipeline_registry.add("test_pipeline", "invalid yaml") + + +def test_remove_pipeline(pipeline_registry, sample_pipeline_yaml): + pipeline_registry.add("test_pipeline", sample_pipeline_yaml) + pipeline_registry.remove("test_pipeline") + + assert pipeline_registry.get("test_pipeline") is None + + +def test_remove_nonexistent_pipeline(pipeline_registry): + pipeline_registry.remove("nonexistent_pipeline") + + +def test_get_nonexistent_pipeline(pipeline_registry): + assert pipeline_registry.get("nonexistent_pipeline") is None + + +def test_get_names(pipeline_registry, sample_pipeline_yaml, mocker): + mocker.patch('haystack.Pipeline.loads', return_value=mocker.Mock(spec=Pipeline)) + + pipeline_registry.add("pipeline1", sample_pipeline_yaml) + pipeline_registry.add("pipeline2", sample_pipeline_yaml) + + names = pipeline_registry.get_names() + assert sorted(names) == ["pipeline1", "pipeline2"] From 2b4838814519ef5118c9a25d733ae7f4b02afb8e Mon Sep 17 00:00:00 2001 From: Michele Pangrazzi Date: Tue, 17 Dec 2024 08:51:34 +0100 Subject: [PATCH 4/4] add deps --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 607d7d0..e44b332 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ source = "vcs" dependencies = [ "coverage[toml]>=6.5", "pytest", + "pytest-mock", ] [tool.hatch.envs.default.scripts] test = "pytest {args:tests}"