Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add composite fields #19

Merged
merged 3 commits into from
Dec 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 87 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,84 @@ schema = {
"has_header": False,
"delimiter": ";",
"fields": [
{"key": "ean", "column-number": 2, "type": "str", "validators": [{"name": "regex-matches", "parameters": {"pattern": "^\\d{13}$"}}]},
{
"key": "ean",
"column-number": 2,
"type": "str",
"validators": [
{
"name": "regex-matches",
"parameters": {"pattern": "^\\d{13}$"},
}
],
},
{"key": "label", "column-number": 3, "type": "str"},
{"key": "family-code", "column-number": 8, "type": "str"},
{"key": "vat", "column-number": 10, "type": "decimal", "optional": False},
{"key": "initial-price", "column-number": 11, "type": "decimal", "post-processors": {"name": "divide", "parameters": {"denominator": 100}}},
{"key": "unit-of-measurement", "column-number": 12, "type": "int", "pre-processors": [{"name": "map", "parameters": {"values": {"K": 0, "A": 1, "L": 2}}}]},
{"key": "volume", "column-number": 13, "type": "decimal"},
]
{
"key": "vat",
"column-number": 10,
"type": "decimal",
"optional": False,
},
{
"key": "initial-price",
"column-number": 11,
"type": "decimal",
"post-processors": [
{
"name": "divide",
"parameters": {"denominator": 100},
},
{
"name": "round",
"parameters": {"precision": 3},
}
]
},
{
"key": "unit-of-measurement",
"column-number": 12,
"type": "int",
"pre-processors": [
{
"name": "map",
"parameters": {"values": {"K": 0, "A": 1, "L": 2}},
}
],
}
],
"computed-fields": [
{
"key": "code",
"type": "str",
"builder": {
"name": "concat",
"parameters": {"fields": ["code_1", "code_2"]},
}
},
{
"key": "volume",
"type": "decimal",
"builder": {
"name": "divide",
"parameters": {
"numerator": "price",
"denominator": "price_by_unit",
},
}
},
{
"key": "price_by_unit",
ducdetronquito marked this conversation as resolved.
Show resolved Hide resolved
"type": "decimal",
"builder": {
"name": "multiply",
"parameters": {
"x_factor": "price",
"y_factor": "unit",
}
}
}
],
}


Expand Down Expand Up @@ -121,3 +191,14 @@ assert rows == [{"name": "Joe"}, {"name": "William"}, {"name": "Jack"}, {"name":
#### Post-processors

- divide
- round

### Computed Fields

Types, Pre-processors, Post-processors and validator is same as Field

#### Builder

- concat
- divide
- multiply
7 changes: 7 additions & 0 deletions magicparse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
from .schema import Schema, builtins as builtins_schemas
from .post_processors import PostProcessor, builtins as builtins_post_processors
from .pre_processors import PreProcessor, builtins as builtins_pre_processors
from .builders import (
Builder,
builtins as builtins_composite_processors,
)
from .transform import Transform
from .type_converters import TypeConverter, builtins as builtins_type_converters
from typing import Any, Dict, List, Tuple, Union
Expand Down Expand Up @@ -44,6 +48,8 @@ def register(items: Union[Registrable, List[Registrable]]) -> None:
PreProcessor.register(item)
elif issubclass(item, Validator):
Validator.register(item)
elif issubclass(item, Builder):
Builder.register(item)
else:
raise ValueError(
"transforms must be a subclass of Transform (or a list of it)"
Expand All @@ -55,3 +61,4 @@ def register(items: Union[Registrable, List[Registrable]]) -> None:
register(builtins_type_converters)
register(builtins_validators)
register(builtins_post_processors)
register(builtins_composite_processors)
92 changes: 92 additions & 0 deletions magicparse/builders.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from abc import ABC
from decimal import Decimal

from .transform import Transform


class Builder(Transform, ABC):
@classmethod
def build(cls, options: dict) -> "Builder":
try:
name = options["name"]
except:
raise ValueError("builder must have a 'name' key")

try:
builder = cls.registry[name]
except:
raise ValueError(f"invalid builder '{name}'")

if "parameters" in options:
return builder(**options["parameters"])
else:
return builder()


class Concat(Builder):
def __init__(self, fields: list[str]) -> None:
if (
not fields
or isinstance(fields, str)
ducdetronquito marked this conversation as resolved.
Show resolved Hide resolved
or not isinstance(fields, list)
or not all(isinstance(field, str) for field in fields)
or len(fields) < 2
):
raise ValueError(
"composite-processor 'concat': "
"'fields' parameter must be a list[str] with at least two elements"
)

self.fields = fields

def apply(self, row: dict) -> str:
return "".join(row[field] for field in self.fields)

@staticmethod
def key() -> str:
return "concat"


class Divide(Builder):
def __init__(self, numerator: str, denominator: str) -> None:
if not numerator or not isinstance(numerator, str):
raise ValueError(
"builder 'divide': " "'numerator' parameter must be a non null str"
)
if not denominator or not isinstance(denominator, str):
raise ValueError(
"builder 'divide': " "'denominator' parameter must be a non null str"
)
self.numerator = numerator
self.denominator = denominator

def apply(self, row: dict) -> Decimal:
return row[self.numerator] / row[self.denominator]

@staticmethod
def key() -> str:
return "divide"


class Multiply(Builder):
def __init__(self, x_factor: str, y_factor: str) -> None:
sGeeK44 marked this conversation as resolved.
Show resolved Hide resolved
if not x_factor or not isinstance(x_factor, str):
raise ValueError(
"builder 'multiply': " "'x_factor' parameter must be a non null str"
)
if not y_factor or not isinstance(y_factor, str):
raise ValueError(
"builder 'multiply': " "'y_factor' parameter must be a non null str"
)
self.x_factor = x_factor
self.y_factor = y_factor

def apply(self, row: dict):
return row[self.x_factor] * row[self.y_factor]

@staticmethod
def key() -> str:
return "multiply"


builtins = [Concat, Divide, Multiply]
17 changes: 17 additions & 0 deletions magicparse/fields.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from abc import ABC, abstractmethod
from typing import List

from .builders import Builder
from .type_converters import TypeConverter
from .post_processors import PostProcessor
from .pre_processors import PreProcessor
Expand Down Expand Up @@ -96,3 +98,18 @@ def error(self, exception: Exception) -> dict:
"field-key": self.key,
"error": exception.args[0],
}


class ComputedField(Field):
def __init__(self, options: dict) -> None:
super().__init__(options)
self.builder = Builder.build(options["builder"])

def _read_raw_value(self, row) -> str:
return self.builder.apply(row)

def error(self, exception: Exception) -> dict:
return {
"field-key": self.key,
"error": exception.args[0],
}
22 changes: 21 additions & 1 deletion magicparse/post_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,24 @@ def key() -> str:
return "divide"


builtins = [Divide]
class Round(PostProcessor):
Number = TypeVar("Number", int, float, Decimal)

def __init__(self, precision: int) -> None:
if precision < 0:
raise ValueError(
"post-processor 'round': "
"'precision' parameter must be a positive or zero integer"
)

self.precision = precision

def apply(self, value: Number) -> Number:
return round(value, self.precision)

@staticmethod
def key() -> str:
return "round"


builtins = [Divide, Round]
17 changes: 16 additions & 1 deletion magicparse/schema.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import codecs
from abc import ABC, abstractmethod
import csv
from .fields import Field
from .fields import Field, ComputedField
from io import BytesIO
from typing import Any, Dict, List, Tuple, Union, Iterable

Expand All @@ -13,6 +13,9 @@ class Schema(ABC):

def __init__(self, options: Dict[str, Any]) -> None:
self.fields = [Field.build(item) for item in options["fields"]]
self.computed_fields = [
ComputedField.build(item) for item in options.get("computed-fields", [])
]

self.has_header = options.get("has_header", False)
self.encoding = options.get("encoding", "utf-8")
Expand Down Expand Up @@ -70,6 +73,18 @@ def parse(self, data: Union[bytes, BytesIO]) -> Tuple[List[dict], List[dict]]:

item[field.key] = value

for computed_field in self.computed_fields:
try:
value = computed_field.read_value(item)
except Exception as exc:
errors.append(
{"row-number": row_number, **computed_field.error(exc)}
)
row_is_valid = False
continue

item[computed_field.key] = value

if row_is_valid:
result.append(item)

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="magicparse",
version="0.9.1",
version="0.10.0",
description="Declarative parser",
author="ZG",
author_email="[email protected]",
Expand Down
Loading