From c7479cfd3c5276d0a908ecf9707074829c56ca0c Mon Sep 17 00:00:00 2001 From: anakin87 Date: Mon, 4 Mar 2024 15:56:06 +0100 Subject: [PATCH] unstructured: add missing from_dict method --- .../converters/unstructured/converter.py | 16 ++++++++++++-- .../unstructured/tests/test_converter.py | 21 +++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/integrations/unstructured/src/haystack_integrations/components/converters/unstructured/converter.py b/integrations/unstructured/src/haystack_integrations/components/converters/unstructured/converter.py index a4a132437..5485416df 100644 --- a/integrations/unstructured/src/haystack_integrations/components/converters/unstructured/converter.py +++ b/integrations/unstructured/src/haystack_integrations/components/converters/unstructured/converter.py @@ -8,9 +8,9 @@ from pathlib import Path from typing import Any, Dict, List, Literal, Optional, Union -from haystack import Document, component, default_to_dict +from haystack import Document, component, default_from_dict, default_to_dict from haystack.components.converters.utils import normalize_metadata -from haystack.utils import Secret +from haystack.utils import Secret, deserialize_secrets_inplace from tqdm import tqdm from unstructured.documents.elements import Element # type: ignore[import] @@ -91,6 +91,18 @@ def to_dict(self) -> Dict[str, Any]: progress_bar=self.progress_bar, ) + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "UnstructuredFileConverter": + """ + Deserializes the component from a dictionary. + :param data: + Dictionary to deserialize from. + :returns: + Deserialized component. + """ + deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) + return default_from_dict(cls, data) + @component.output_types(documents=List[Document]) def run( self, diff --git a/integrations/unstructured/tests/test_converter.py b/integrations/unstructured/tests/test_converter.py index 7a5e135ac..5d1a6c091 100644 --- a/integrations/unstructured/tests/test_converter.py +++ b/integrations/unstructured/tests/test_converter.py @@ -52,6 +52,27 @@ def test_to_dict(self): }, } + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("UNSTRUCTURED_API_KEY", "test-api-key") + converter_dict = { + "type": "haystack_integrations.components.converters.unstructured.converter.UnstructuredFileConverter", + "init_parameters": { + "api_url": "http://custom-url:8000/general", + "api_key": {"env_vars": ["UNSTRUCTURED_API_KEY"], "strict": False, "type": "env_var"}, + "document_creation_mode": "one-doc-per-element", + "separator": "|", + "unstructured_kwargs": {"foo": "bar"}, + "progress_bar": False, + }, + } + converter = UnstructuredFileConverter.from_dict(converter_dict) + assert converter.api_url == "http://custom-url:8000/general" + assert converter.api_key.resolve_value() == "test-api-key" + assert converter.document_creation_mode == "one-doc-per-element" + assert converter.separator == "|" + assert converter.unstructured_kwargs == {"foo": "bar"} + assert not converter.progress_bar + @pytest.mark.integration def test_run_one_doc_per_file(self, samples_path): pdf_path = samples_path / "sample_pdf.pdf"