Skip to content

Commit

Permalink
feat: Add composite fields
Browse files Browse the repository at this point in the history
  • Loading branch information
sGeeK44 committed Dec 22, 2023
1 parent 9dccf4a commit 67d9f25
Show file tree
Hide file tree
Showing 7 changed files with 504 additions and 7 deletions.
92 changes: 86 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,84 @@ schema = {
"has_header": False,
"delimiter": ";",
"fields": [
{"key": "ean", "column-number": 2, "type": "str", "validators": [{"name": "regex-matches", "parameters": {"pattern": "^\\d{13}$"}}]},
{
"key": "ean",
"column-number": 2,
"type": "str",
"validators": [
{
"name": "regex-matches",
"parameters": {"pattern": "^\\d{13}$"},
}
],
},
{"key": "label", "column-number": 3, "type": "str"},
{"key": "family-code", "column-number": 8, "type": "str"},
{"key": "vat", "column-number": 10, "type": "decimal", "optional": False},
{"key": "initial-price", "column-number": 11, "type": "decimal", "post-processors": {"name": "divide", "parameters": {"denominator": 100}}},
{"key": "unit-of-measurement", "column-number": 12, "type": "int", "pre-processors": [{"name": "map", "parameters": {"values": {"K": 0, "A": 1, "L": 2}}}]},
{"key": "volume", "column-number": 13, "type": "decimal", "post-processors": {"name": "round", "parameters": {"precision": 3}}},
]
{
"key": "vat",
"column-number": 10,
"type": "decimal",
"optional": False,
},
{
"key": "initial-price",
"column-number": 11,
"type": "decimal",
"post-processors": [
{
"name": "divide",
"parameters": {"denominator": 100},
},
{
"name": "round",
"parameters": {"precision": 3},
}
]
},
{
"key": "unit-of-measurement",
"column-number": 12,
"type": "int",
"pre-processors": [
{
"name": "map",
"parameters": {"values": {"K": 0, "A": 1, "L": 2}},
}
],
}
],
"computed-fields": [
{
"key": "code",
"type": "str",
"builder": {
"name": "concat",
"parameters": {"fields": ["code_1", "code_2"]},
}
},
{
"key": "volume",
"type": "decimal",
"builder": {
"name": "divide",
"parameters": {
"numerator": "price",
"denominator": "price_by_unit",
},
}
},
{
"key": "price_by_unit",
"type": "decimal",
"builder": {
"name": "multiply",
"parameters": {
"x_factor": "price",
"y_factor": "unit",
}
}
}
],
}


Expand Down Expand Up @@ -122,3 +192,13 @@ assert rows == [{"name": "Joe"}, {"name": "William"}, {"name": "Jack"}, {"name":

- divide
- round

### Computed Fields

Types, Pre-processors, Post-processors and validator is same as Field

#### Builder

- concat
- divide
- multiply
7 changes: 7 additions & 0 deletions magicparse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
from .schema import Schema, builtins as builtins_schemas
from .post_processors import PostProcessor, builtins as builtins_post_processors
from .pre_processors import PreProcessor, builtins as builtins_pre_processors
from .builders import (
Builder,
builtins as builtins_composite_processors,
)
from .transform import Transform
from .type_converters import TypeConverter, builtins as builtins_type_converters
from typing import Any, Dict, List, Tuple, Union
Expand Down Expand Up @@ -44,6 +48,8 @@ def register(items: Union[Registrable, List[Registrable]]) -> None:
PreProcessor.register(item)
elif issubclass(item, Validator):
Validator.register(item)
elif issubclass(item, Builder):
Builder.register(item)
else:
raise ValueError(
"transforms must be a subclass of Transform (or a list of it)"
Expand All @@ -55,3 +61,4 @@ def register(items: Union[Registrable, List[Registrable]]) -> None:
register(builtins_type_converters)
register(builtins_validators)
register(builtins_post_processors)
register(builtins_composite_processors)
92 changes: 92 additions & 0 deletions magicparse/builders.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from abc import ABC
from decimal import Decimal

from .transform import Transform


class Builder(Transform, ABC):
@classmethod
def build(cls, options: dict) -> "Builder":
try:
name = options["name"]
except:
raise ValueError("builder must have a 'name' key")

try:
builder = cls.registry[name]
except:
raise ValueError(f"invalid builder '{name}'")

if "parameters" in options:
return builder(**options["parameters"])
else:
return builder()


class Concat(Builder):
def __init__(self, fields: list[str]) -> None:
if (
not fields
or isinstance(fields, str)
or not isinstance(fields, list)
or not all(isinstance(field, str) for field in fields)
or len(fields) < 2
):
raise ValueError(
"composite-processor 'concat': "
"'fields' parameter must be a list[str] with at least two elements"
)

self.fields = fields

def apply(self, row: dict) -> str:
return "".join(row[field] for field in self.fields)

@staticmethod
def key() -> str:
return "concat"


class Divide(Builder):
def __init__(self, numerator: str, denominator: str) -> None:
if not numerator or not isinstance(numerator, str):
raise ValueError(
"builder 'divide': " "'numerator' parameter must be a non null str"
)
if not denominator or not isinstance(denominator, str):
raise ValueError(
"builder 'divide': " "'denominator' parameter must be a non null str"
)
self.numerator = numerator
self.denominator = denominator

def apply(self, row: dict) -> Decimal:
return row[self.numerator] / row[self.denominator]

@staticmethod
def key() -> str:
return "divide"


class Multiply(Builder):
def __init__(self, x_factor: str, y_factor: str) -> None:
if not x_factor or not isinstance(x_factor, str):
raise ValueError(
"builder 'multiply': " "'x_factor' parameter must be a non null str"
)
if not y_factor or not isinstance(y_factor, str):
raise ValueError(
"builder 'multiply': " "'y_factor' parameter must be a non null str"
)
self.x_factor = x_factor
self.y_factor = y_factor

def apply(self, row: dict):
return row[self.x_factor] * row[self.y_factor]

@staticmethod
def key() -> str:
return "multiply"


builtins = [Concat, Divide, Multiply]
17 changes: 17 additions & 0 deletions magicparse/fields.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from abc import ABC, abstractmethod
from typing import List

from .builders import Builder
from .type_converters import TypeConverter
from .post_processors import PostProcessor
from .pre_processors import PreProcessor
Expand Down Expand Up @@ -96,3 +98,18 @@ def error(self, exception: Exception) -> dict:
"field-key": self.key,
"error": exception.args[0],
}


class ComputedField(Field):
def __init__(self, options: dict) -> None:
super().__init__(options)
self.builder = Builder.build(options["builder"])

def _read_raw_value(self, row) -> str:
return self.builder.apply(row)

def error(self, exception: Exception) -> dict:
return {
"field-key": self.key,
"error": exception.args[0],
}
17 changes: 16 additions & 1 deletion magicparse/schema.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import codecs
from abc import ABC, abstractmethod
import csv
from .fields import Field
from .fields import Field, ComputedField
from io import BytesIO
from typing import Any, Dict, List, Tuple, Union, Iterable

Expand All @@ -13,6 +13,9 @@ class Schema(ABC):

def __init__(self, options: Dict[str, Any]) -> None:
self.fields = [Field.build(item) for item in options["fields"]]
self.computed_fields = [
ComputedField.build(item) for item in options.get("computed-fields", [])
]

self.has_header = options.get("has_header", False)
self.encoding = options.get("encoding", "utf-8")
Expand Down Expand Up @@ -70,6 +73,18 @@ def parse(self, data: Union[bytes, BytesIO]) -> Tuple[List[dict], List[dict]]:

item[field.key] = value

for computed_field in self.computed_fields:
try:
value = computed_field.read_value(item)
except Exception as exc:
errors.append(
{"row-number": row_number, **computed_field.error(exc)}
)
row_is_valid = False
continue

item[computed_field.key] = value

if row_is_valid:
result.append(item)

Expand Down
Loading

0 comments on commit 67d9f25

Please sign in to comment.