From 73eb51730b4b0a0de64ae516c55ef0c62d184919 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20PERIN?= Date: Thu, 21 Dec 2023 21:40:44 +0100 Subject: [PATCH] feat: Add composite fields --- README.md | 9 +++ magicparse/__init__.py | 7 ++ magicparse/composite_processors.py | 49 +++++++++++++ magicparse/fields.py | 26 +++++++ magicparse/schema.py | 17 ++++- tests/test_composite_fields.py | 71 +++++++++++++++++++ tests/test_composite_processors.py | 107 +++++++++++++++++++++++++++++ 7 files changed, 285 insertions(+), 1 deletion(-) create mode 100644 magicparse/composite_processors.py create mode 100644 tests/test_composite_fields.py create mode 100644 tests/test_composite_processors.py diff --git a/README.md b/README.md index 5a37327..f293c49 100644 --- a/README.md +++ b/README.md @@ -121,3 +121,12 @@ assert rows == [{"name": "Joe"}, {"name": "William"}, {"name": "Jack"}, {"name": #### Post-processors - divide + +### Computed Fields + +Types, Pre-processors, Post-processors and validator is same as Field + +#### Composite-processors + +- concat + diff --git a/magicparse/__init__.py b/magicparse/__init__.py index cc43a53..715f845 100644 --- a/magicparse/__init__.py +++ b/magicparse/__init__.py @@ -3,6 +3,10 @@ from .schema import Schema, builtins as builtins_schemas from .post_processors import PostProcessor, builtins as builtins_post_processors from .pre_processors import PreProcessor, builtins as builtins_pre_processors +from .composite_processors import ( + CompositeProcessor, + builtins as builtins_composite_processors, +) from .transform import Transform from .type_converters import TypeConverter, builtins as builtins_type_converters from typing import Any, Dict, List, Tuple, Union @@ -44,6 +48,8 @@ def register(items: Union[Registrable, List[Registrable]]) -> None: PreProcessor.register(item) elif issubclass(item, Validator): Validator.register(item) + elif issubclass(item, CompositeProcessor): + CompositeProcessor.register(item) else: raise ValueError( "transforms must be a subclass of Transform (or a list of it)" @@ -55,3 +61,4 @@ def register(items: Union[Registrable, List[Registrable]]) -> None: register(builtins_type_converters) register(builtins_validators) register(builtins_post_processors) +register(builtins_composite_processors) diff --git a/magicparse/composite_processors.py b/magicparse/composite_processors.py new file mode 100644 index 0000000..45b2b64 --- /dev/null +++ b/magicparse/composite_processors.py @@ -0,0 +1,49 @@ +from abc import ABC + +from .transform import Transform + + +class CompositeProcessor(Transform, ABC): + @classmethod + def build(cls, options: dict) -> "CompositeProcessor": + try: + name = options["name"] + except: + raise ValueError("post-processor must have a 'name' key") + + try: + composite_processor = cls.registry[name] + except: + raise ValueError(f"invalid post-processor '{name}'") + + if "parameters" in options: + return composite_processor(**options["parameters"]) + else: + return composite_processor() + + +class Concat(CompositeProcessor): + def __init__(self, fields: list[str]) -> None: + if ( + not fields + or isinstance(fields, str) + or not isinstance(fields, list) + or not all(isinstance(field, str) for field in fields) + or len(fields) < 2 + ): + raise ValueError( + "composite-processor 'concat': " + "'fields' parameter must be a list[str] with at least two elements" + ) + + self.fields = fields + + def apply(self, row: dict) -> str: + return "".join(row[field] for field in self.fields) + + @staticmethod + def key() -> str: + return "concat" + + +builtins = [Concat] diff --git a/magicparse/fields.py b/magicparse/fields.py index da131da..bb5ad83 100644 --- a/magicparse/fields.py +++ b/magicparse/fields.py @@ -1,5 +1,7 @@ from abc import ABC, abstractmethod from typing import List + +from .composite_processors import CompositeProcessor from .type_converters import TypeConverter from .post_processors import PostProcessor from .pre_processors import PreProcessor @@ -96,3 +98,27 @@ def error(self, exception: Exception) -> dict: "field-key": self.key, "error": exception.args[0], } + + +class CompositeField(Field): + def __init__(self, options: dict) -> None: + super().__init__(options) + self.composite_processors = [ + CompositeProcessor.build(item) for item in options["composite-processors"] + ] + if len(self.composite_processors) == 0: + raise ValueError( + f"Composite field {self.key} require at least one composite processor." + ) + + def _read_raw_value(self, row) -> str: + result = row + for processor in self.composite_processors: + result[self.key] = processor.apply(result) + return result[self.key] + + def error(self, exception: Exception) -> dict: + return { + "field-key": self.key, + "error": exception.args[0], + } diff --git a/magicparse/schema.py b/magicparse/schema.py index c4aefb9..0f9c4f4 100644 --- a/magicparse/schema.py +++ b/magicparse/schema.py @@ -1,7 +1,7 @@ import codecs from abc import ABC, abstractmethod import csv -from .fields import Field +from .fields import Field, CompositeField from io import BytesIO from typing import Any, Dict, List, Tuple, Union, Iterable @@ -13,6 +13,9 @@ class Schema(ABC): def __init__(self, options: Dict[str, Any]) -> None: self.fields = [Field.build(item) for item in options["fields"]] + self.computed_fields = [ + CompositeField.build(item) for item in options.get("computed-fields", []) + ] self.has_header = options.get("has_header", False) self.encoding = options.get("encoding", "utf-8") @@ -70,6 +73,18 @@ def parse(self, data: Union[bytes, BytesIO]) -> Tuple[List[dict], List[dict]]: item[field.key] = value + for computed_field in self.computed_fields: + try: + value = computed_field.read_value(item) + except Exception as exc: + errors.append( + {"row-number": row_number, **computed_field.error(exc)} + ) + row_is_valid = False + continue + + item[computed_field.key] = value + if row_is_valid: result.append(item) diff --git a/tests/test_composite_fields.py b/tests/test_composite_fields.py new file mode 100644 index 0000000..1498fde --- /dev/null +++ b/tests/test_composite_fields.py @@ -0,0 +1,71 @@ +import pytest + +from magicparse.fields import CompositeField +from unittest import TestCase + + +class TestBuild(TestCase): + def test_without_composite_processor(self): + with self.assertRaises(KeyError): + CompositeField({"key": "output", "type": "str"}) + + def test_not_iterable_value_for_composite_processor(self): + with self.assertRaises(TypeError): + CompositeField({"key": "output", "type": "str", "composite-processors": 1}) + + def test_bad_value_for_composite_processor(self): + with self.assertRaises(ValueError): + CompositeField( + {"key": "output", "type": "str", "composite-processors": "really"} + ) + + def test_empty_composite_processor(self): + with self.assertRaises(ValueError): + CompositeField({"key": "output", "type": "str", "composite-processors": []}) + + def test_with_one_composite_processor(self): + field = CompositeField( + { + "key": "output", + "type": "str", + "composite-processors": [ + {"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}} + ], + } + ) + + computed = field.read_value({"code_1": "01", "code_2": "02"}) + + assert computed == "0102" + + def test_with_two_composite_processor(self): + field = CompositeField( + { + "key": "output", + "type": "str", + "composite-processors": [ + {"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}}, + {"name": "concat", "parameters": {"fields": ["output", "code_2"]}}, + ], + } + ) + + computed = field.read_value({"code_1": "01", "code_2": "02"}) + + assert computed == "010202" + + def test_error_format(self): + field = CompositeField( + { + "key": "output", + "type": "str", + "composite-processors": [ + {"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}} + ], + } + ) + + with pytest.raises(KeyError) as error: + field.read_value({}) + + assert field.error(error.value) == {"error": "code_1", "field-key": "output"} diff --git a/tests/test_composite_processors.py b/tests/test_composite_processors.py new file mode 100644 index 0000000..36b673d --- /dev/null +++ b/tests/test_composite_processors.py @@ -0,0 +1,107 @@ +import pytest +from unittest import TestCase + +from magicparse import CompositeProcessor + + +class TestBuild(TestCase): + class WithoutParamCompositeProcessor(CompositeProcessor): + @staticmethod + def key() -> str: + return "without-param" + + def apply(self, value): + pass + + class WithParamCompositeProcessor(CompositeProcessor): + def __init__(self, setting: str) -> None: + self.setting = setting + + @staticmethod + def key() -> str: + return "with-param" + + def apply(self, value): + pass + + def test_without_parameter(self): + CompositeProcessor.register(self.WithoutParamCompositeProcessor) + + composite_processor = CompositeProcessor.build({"name": "without-param"}) + assert isinstance(composite_processor, self.WithoutParamCompositeProcessor) + + def test_with_parameter(self): + CompositeProcessor.register(self.WithParamCompositeProcessor) + + composite_processor = CompositeProcessor.build( + {"name": "with-param", "parameters": {"setting": "value"}} + ) + assert isinstance(composite_processor, self.WithParamCompositeProcessor) + assert composite_processor.setting == "value" + + def test_unknown(self): + with pytest.raises(ValueError, match="invalid post-processor 'anything'"): + CompositeProcessor.build({"name": "anything"}) + + def test_no_name_provided(self): + with pytest.raises(ValueError, match="post-processor must have a 'name' key"): + CompositeProcessor.build({}) + + +class TestConcat(TestCase): + def test_no_params(self): + with pytest.raises(TypeError): + CompositeProcessor.build({"name": "concat"}) + + def test_empty_params(self): + with pytest.raises(TypeError): + CompositeProcessor.build({"name": "concat", "parameters": ""}) + + def test_fields_params_empty(self): + with pytest.raises(ValueError): + CompositeProcessor.build({"name": "concat", "parameters": {"fields": ""}}) + + def test_fields_params_not_a_list_of_str(self): + with pytest.raises(ValueError): + CompositeProcessor.build( + {"name": "concat", "parameters": {"fields": "xxx"}} + ) + + def test_fields_params_has_less_than_two_field(self): + with pytest.raises(ValueError): + CompositeProcessor.build( + {"name": "concat", "parameters": {"fields": ["code"]}} + ) + + def test_field_not_present(self): + processor = CompositeProcessor.build( + {"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}} + ) + with pytest.raises(KeyError): + processor.apply({}) + + def test_concat_two_fields(self): + processor = CompositeProcessor.build( + {"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}} + ) + + result = processor.apply({"code_1": "X", "code_2": "Y"}) + + assert result == "XY" + + def test_concat_three_fields(self): + processor = CompositeProcessor.build( + {"name": "concat", "parameters": {"fields": ["code_1", "code_2", "code_3"]}} + ) + + result = processor.apply({"code_1": "X", "code_2": "Y", "code_3": "Z"}) + + assert result == "XYZ" + + def test_concat_integer(self): + processor = CompositeProcessor.build( + {"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}} + ) + + with pytest.raises(TypeError): + processor.apply({"code_1": 1, "code_2": 2})