From 35815c3db98cfb31a717e90aab964afbed255761 Mon Sep 17 00:00:00 2001 From: antoine-b-smartway Date: Thu, 18 Apr 2024 10:20:25 +0200 Subject: [PATCH] int-699 Clean temporary tests + clean whitespaces --- magicparse/schema.py | 6 +-- pyproject.toml | 3 +- tests/test_schema.py | 110 ++----------------------------------------- 3 files changed, 7 insertions(+), 112 deletions(-) diff --git a/magicparse/schema.py b/magicparse/schema.py index 348dc55..84017fc 100644 --- a/magicparse/schema.py +++ b/magicparse/schema.py @@ -44,7 +44,7 @@ def register(cls, schema: "Schema") -> None: cls.registry[schema.key()] = schema - + def parse(self, data: Union[bytes, BytesIO]) -> Tuple[List[dict], List[dict]]: items = [] errors = [] @@ -54,9 +54,9 @@ def parse(self, data: Union[bytes, BytesIO]) -> Tuple[List[dict], List[dict]]: errors.extend(row_errors) else: items.append(item) - + return items, errors - + def stream_parse(self, data: Union[bytes, BytesIO]) -> Iterable[Tuple[dict, list[dict]]]: if isinstance(data, bytes): stream = BytesIO(data) diff --git a/pyproject.toml b/pyproject.toml index 70870c3..199e8bf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,5 +32,4 @@ extend-ignore = ["E203", "E722"] exclude = [".git/", ".pytest_cache/", ".venv"] [tool.pytest.ini_options] -python_files = ["tests/*"] -log_cli = true +python_files = ["tests/*"] \ No newline at end of file diff --git a/tests/test_schema.py b/tests/test_schema.py index 545cc32..909d21a 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -1,13 +1,10 @@ from decimal import Decimal -from pyparsing import unicode_string -from magicparse import Schema, TypeConverter, register +from magicparse import Schema from magicparse.schema import ColumnarSchema, CsvSchema from magicparse.fields import ColumnarField, CsvField import pytest from unittest import TestCase -import logging -import psutil class TestBuild(TestCase): def test_default_csv_schema(self): @@ -291,7 +288,7 @@ def test_register(self): ) assert isinstance(schema, self.PipedSchema) -class TestSteamParse(TestCase): +class TestSteamParse(TestCase): def test_stream_parse_errors_do_not_halt_parsing(self): schema = Schema.build( @@ -310,105 +307,4 @@ def test_stream_parse_errors_do_not_halt_parsing(self): "error": "value 'a' is not a valid integer", }]), ({"age": 2}, []) - ] - -#TODO: TO REMOVE BELOW -LOGGER = logging.getLogger(__name__) -ITERATION_COUNT = 1000000 -class TestPerformanceToRemove(TestCase): - - class LogConverter(TypeConverter): - #LOGGER = logging.getLogger(__name__) - - @staticmethod - def key() -> str: - return "log" - - def apply(self, value): - LOGGER.critical("Read value " + value) - return value - - def test_parsing_order(self): - register(self.LogConverter) - - schema = Schema.build( - { - "file_type": "csv", - "fields": [{"key": "name", "type": "log", "column-number": 1}], - } - ) - input_csv = b"1\n2\n3\n4\n5" - rows, errors = schema.parse(input_csv) - assert not errors - - for row in rows: - LOGGER.critical("Write value " + row['name']) - - def test_streaming_order(self): - register(self.LogConverter) - - schema = Schema.build( - { - "file_type": "csv", - "fields": [{"key": "name", "type": "log", "column-number": 1}], - } - ) - input_csv = b"1\n2\n3\n4\n5\n" - - for row in schema.stream_parse(input_csv): - item, errors = row - LOGGER.critical("Write value " + item['name']) - - def test_parsing_memory_usage(self): - schema = Schema.build( - { - "file_type": "csv", - "fields": [ - {"key": "num", "type": "decimal", "column-number": 1}, - {"key": "name", "type": "str", "column-number": 2}, - {"key": "date", "type": "datetime", "column-number": 3}, - {"key": "description", "type": "str", "column-number": 4}, - ], - } - ) - input_csv = ''.join([f'{num},"This is my name {num}",2022-01-12T10:12:03+03:00,"This is a very long description to load the memory with data\n' for num in range(ITERATION_COUNT)]).encode('utf-8') - - process = psutil.Process() - memory_percent = process.memory_percent() - LOGGER.critical(f"Memory Usage: {memory_percent}%") - - rows, errors = schema.parse(input_csv) - assert errors == [] - i = 0 - for row in rows: - if i % (ITERATION_COUNT / 10) == 0 : - memory_percent = process.memory_percent() - LOGGER.critical(f"Memory Usage: {memory_percent}%") - i = i + 1 - - def test_streaming_memory_usage(self): - schema = Schema.build( - { - "file_type": "csv", - "fields": [ - {"key": "num", "type": "decimal", "column-number": 1}, - {"key": "name", "type": "str", "column-number": 2}, - {"key": "date", "type": "datetime", "column-number": 3}, - {"key": "description", "type": "str", "column-number": 4}, - ], - } - ) - input_csv = ''.join([f'{num},"This is my name {num}","2022-01-12T10:12:03+03:00","This is a very long description to load the memory with data\n' for num in range(ITERATION_COUNT)]).encode('utf-8') - - process = psutil.Process() - memory_percent = process.memory_percent() - LOGGER.critical(f"Memory Usage: {memory_percent}%") - - i = 0 - for row in schema.stream_parse(input_csv): - item, errors = row - if i % (ITERATION_COUNT / 10) == 0 : - memory_percent = process.memory_percent() - LOGGER.critical(f"Memory Usage: {memory_percent}%") - i = i + 1 - \ No newline at end of file + ] \ No newline at end of file