Skip to content

Commit

Permalink
feat: Add composite fields
Browse files Browse the repository at this point in the history
  • Loading branch information
sGeeK44 committed Dec 21, 2023
1 parent 1d7ab3e commit 79e1225
Show file tree
Hide file tree
Showing 7 changed files with 285 additions and 1 deletion.
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ schema = {
{"key": "initial-price", "column-number": 11, "type": "decimal", "post-processors": {"name": "divide", "parameters": {"denominator": 100}}},
{"key": "unit-of-measurement", "column-number": 12, "type": "int", "pre-processors": [{"name": "map", "parameters": {"values": {"K": 0, "A": 1, "L": 2}}}]},
{"key": "volume", "column-number": 13, "type": "decimal"},
{"key": "code", "type": "str", "composite-processors": [ {"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}}]}
]
}

Expand Down Expand Up @@ -121,3 +122,11 @@ assert rows == [{"name": "Joe"}, {"name": "William"}, {"name": "Jack"}, {"name":
#### Post-processors

- divide

### Computed Fields

Types, Pre-processors, Post-processors and validator is same as Field

#### Composite-processors

- concat
7 changes: 7 additions & 0 deletions magicparse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
from .schema import Schema, builtins as builtins_schemas
from .post_processors import PostProcessor, builtins as builtins_post_processors
from .pre_processors import PreProcessor, builtins as builtins_pre_processors
from .composite_processors import (
CompositeProcessor,
builtins as builtins_composite_processors,
)
from .transform import Transform
from .type_converters import TypeConverter, builtins as builtins_type_converters
from typing import Any, Dict, List, Tuple, Union
Expand Down Expand Up @@ -44,6 +48,8 @@ def register(items: Union[Registrable, List[Registrable]]) -> None:
PreProcessor.register(item)
elif issubclass(item, Validator):
Validator.register(item)
elif issubclass(item, CompositeProcessor):
CompositeProcessor.register(item)
else:
raise ValueError(
"transforms must be a subclass of Transform (or a list of it)"
Expand All @@ -55,3 +61,4 @@ def register(items: Union[Registrable, List[Registrable]]) -> None:
register(builtins_type_converters)
register(builtins_validators)
register(builtins_post_processors)
register(builtins_composite_processors)
49 changes: 49 additions & 0 deletions magicparse/composite_processors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from abc import ABC

from .transform import Transform


class CompositeProcessor(Transform, ABC):
@classmethod
def build(cls, options: dict) -> "CompositeProcessor":
try:
name = options["name"]
except:
raise ValueError("post-processor must have a 'name' key")

try:
composite_processor = cls.registry[name]
except:
raise ValueError(f"invalid post-processor '{name}'")

if "parameters" in options:
return composite_processor(**options["parameters"])
else:
return composite_processor()


class Concat(CompositeProcessor):
def __init__(self, fields: list[str]) -> None:
if (
not fields
or isinstance(fields, str)
or not isinstance(fields, list)
or not all(isinstance(field, str) for field in fields)
or len(fields) < 2
):
raise ValueError(
"composite-processor 'concat': "
"'fields' parameter must be a list[str] with at least two elements"
)

self.fields = fields

def apply(self, row: dict) -> str:
return "".join(row[field] for field in self.fields)

@staticmethod
def key() -> str:
return "concat"


builtins = [Concat]
26 changes: 26 additions & 0 deletions magicparse/fields.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from abc import ABC, abstractmethod
from typing import List

from .composite_processors import CompositeProcessor
from .type_converters import TypeConverter
from .post_processors import PostProcessor
from .pre_processors import PreProcessor
Expand Down Expand Up @@ -96,3 +98,27 @@ def error(self, exception: Exception) -> dict:
"field-key": self.key,
"error": exception.args[0],
}


class CompositeField(Field):
def __init__(self, options: dict) -> None:
super().__init__(options)
self.composite_processors = [
CompositeProcessor.build(item) for item in options["composite-processors"]
]
if len(self.composite_processors) == 0:
raise ValueError(
f"Composite field {self.key} require at least one composite processor."
)

def _read_raw_value(self, row) -> str:
result = row
for processor in self.composite_processors:
result[self.key] = processor.apply(result)
return result[self.key]

def error(self, exception: Exception) -> dict:
return {
"field-key": self.key,
"error": exception.args[0],
}
17 changes: 16 additions & 1 deletion magicparse/schema.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import codecs
from abc import ABC, abstractmethod
import csv
from .fields import Field
from .fields import Field, CompositeField
from io import BytesIO
from typing import Any, Dict, List, Tuple, Union, Iterable

Expand All @@ -13,6 +13,9 @@ class Schema(ABC):

def __init__(self, options: Dict[str, Any]) -> None:
self.fields = [Field.build(item) for item in options["fields"]]
self.computed_fields = [
CompositeField.build(item) for item in options.get("computed-fields", [])
]

self.has_header = options.get("has_header", False)
self.encoding = options.get("encoding", "utf-8")
Expand Down Expand Up @@ -70,6 +73,18 @@ def parse(self, data: Union[bytes, BytesIO]) -> Tuple[List[dict], List[dict]]:

item[field.key] = value

for computed_field in self.computed_fields:
try:
value = computed_field.read_value(item)
except Exception as exc:
errors.append(
{"row-number": row_number, **computed_field.error(exc)}
)
row_is_valid = False
continue

item[computed_field.key] = value

if row_is_valid:
result.append(item)

Expand Down
71 changes: 71 additions & 0 deletions tests/test_composite_fields.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import pytest

from magicparse.fields import CompositeField
from unittest import TestCase


class TestBuild(TestCase):
def test_without_composite_processor(self):
with self.assertRaises(KeyError):
CompositeField({"key": "output", "type": "str"})

def test_not_iterable_value_for_composite_processor(self):
with self.assertRaises(TypeError):
CompositeField({"key": "output", "type": "str", "composite-processors": 1})

def test_bad_value_for_composite_processor(self):
with self.assertRaises(ValueError):
CompositeField(
{"key": "output", "type": "str", "composite-processors": "really"}
)

def test_empty_composite_processor(self):
with self.assertRaises(ValueError):
CompositeField({"key": "output", "type": "str", "composite-processors": []})

def test_with_one_composite_processor(self):
field = CompositeField(
{
"key": "output",
"type": "str",
"composite-processors": [
{"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}}
],
}
)

computed = field.read_value({"code_1": "01", "code_2": "02"})

assert computed == "0102"

def test_with_two_composite_processor(self):
field = CompositeField(
{
"key": "output",
"type": "str",
"composite-processors": [
{"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}},
{"name": "concat", "parameters": {"fields": ["output", "code_2"]}},
],
}
)

computed = field.read_value({"code_1": "01", "code_2": "02"})

assert computed == "010202"

def test_error_format(self):
field = CompositeField(
{
"key": "output",
"type": "str",
"composite-processors": [
{"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}}
],
}
)

with pytest.raises(KeyError) as error:
field.read_value({})

assert field.error(error.value) == {"error": "code_1", "field-key": "output"}
107 changes: 107 additions & 0 deletions tests/test_composite_processors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import pytest
from unittest import TestCase

from magicparse import CompositeProcessor


class TestBuild(TestCase):
class WithoutParamCompositeProcessor(CompositeProcessor):
@staticmethod
def key() -> str:
return "without-param"

def apply(self, value):
pass

class WithParamCompositeProcessor(CompositeProcessor):
def __init__(self, setting: str) -> None:
self.setting = setting

@staticmethod
def key() -> str:
return "with-param"

def apply(self, value):
pass

def test_without_parameter(self):
CompositeProcessor.register(self.WithoutParamCompositeProcessor)

composite_processor = CompositeProcessor.build({"name": "without-param"})
assert isinstance(composite_processor, self.WithoutParamCompositeProcessor)

def test_with_parameter(self):
CompositeProcessor.register(self.WithParamCompositeProcessor)

composite_processor = CompositeProcessor.build(
{"name": "with-param", "parameters": {"setting": "value"}}
)
assert isinstance(composite_processor, self.WithParamCompositeProcessor)
assert composite_processor.setting == "value"

def test_unknown(self):
with pytest.raises(ValueError, match="invalid post-processor 'anything'"):
CompositeProcessor.build({"name": "anything"})

def test_no_name_provided(self):
with pytest.raises(ValueError, match="post-processor must have a 'name' key"):
CompositeProcessor.build({})


class TestConcat(TestCase):
def test_no_params(self):
with pytest.raises(TypeError):
CompositeProcessor.build({"name": "concat"})

def test_empty_params(self):
with pytest.raises(TypeError):
CompositeProcessor.build({"name": "concat", "parameters": ""})

def test_fields_params_empty(self):
with pytest.raises(ValueError):
CompositeProcessor.build({"name": "concat", "parameters": {"fields": ""}})

def test_fields_params_not_a_list_of_str(self):
with pytest.raises(ValueError):
CompositeProcessor.build(
{"name": "concat", "parameters": {"fields": "xxx"}}
)

def test_fields_params_has_less_than_two_field(self):
with pytest.raises(ValueError):
CompositeProcessor.build(
{"name": "concat", "parameters": {"fields": ["code"]}}
)

def test_field_not_present(self):
processor = CompositeProcessor.build(
{"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}}
)
with pytest.raises(KeyError):
processor.apply({})

def test_concat_two_fields(self):
processor = CompositeProcessor.build(
{"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}}
)

result = processor.apply({"code_1": "X", "code_2": "Y"})

assert result == "XY"

def test_concat_three_fields(self):
processor = CompositeProcessor.build(
{"name": "concat", "parameters": {"fields": ["code_1", "code_2", "code_3"]}}
)

result = processor.apply({"code_1": "X", "code_2": "Y", "code_3": "Z"})

assert result == "XYZ"

def test_concat_integer(self):
processor = CompositeProcessor.build(
{"name": "concat", "parameters": {"fields": ["code_1", "code_2"]}}
)

with pytest.raises(TypeError):
processor.apply({"code_1": 1, "code_2": 2})

0 comments on commit 79e1225

Please sign in to comment.