Skip to content

Commit

Permalink
int-699 Clean temporary tests + clean whitespaces
Browse files Browse the repository at this point in the history
  • Loading branch information
antoine-b-smartway committed Apr 18, 2024
1 parent b29dea8 commit 35815c3
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 112 deletions.
6 changes: 3 additions & 3 deletions magicparse/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def register(cls, schema: "Schema") -> None:

cls.registry[schema.key()] = schema


def parse(self, data: Union[bytes, BytesIO]) -> Tuple[List[dict], List[dict]]:
items = []
errors = []
Expand All @@ -54,9 +54,9 @@ def parse(self, data: Union[bytes, BytesIO]) -> Tuple[List[dict], List[dict]]:
errors.extend(row_errors)
else:
items.append(item)

return items, errors

def stream_parse(self, data: Union[bytes, BytesIO]) -> Iterable[Tuple[dict, list[dict]]]:
if isinstance(data, bytes):
stream = BytesIO(data)
Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,4 @@ extend-ignore = ["E203", "E722"]
exclude = [".git/", ".pytest_cache/", ".venv"]

[tool.pytest.ini_options]
python_files = ["tests/*"]
log_cli = true
python_files = ["tests/*"]
110 changes: 3 additions & 107 deletions tests/test_schema.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
from decimal import Decimal

from pyparsing import unicode_string
from magicparse import Schema, TypeConverter, register
from magicparse import Schema
from magicparse.schema import ColumnarSchema, CsvSchema
from magicparse.fields import ColumnarField, CsvField
import pytest
from unittest import TestCase
import logging
import psutil

class TestBuild(TestCase):
def test_default_csv_schema(self):
Expand Down Expand Up @@ -291,7 +288,7 @@ def test_register(self):
)
assert isinstance(schema, self.PipedSchema)

class TestSteamParse(TestCase):
class TestSteamParse(TestCase):

def test_stream_parse_errors_do_not_halt_parsing(self):
schema = Schema.build(
Expand All @@ -310,105 +307,4 @@ def test_stream_parse_errors_do_not_halt_parsing(self):
"error": "value 'a' is not a valid integer",
}]),
({"age": 2}, [])
]

#TODO: TO REMOVE BELOW
LOGGER = logging.getLogger(__name__)
ITERATION_COUNT = 1000000
class TestPerformanceToRemove(TestCase):

class LogConverter(TypeConverter):
#LOGGER = logging.getLogger(__name__)

@staticmethod
def key() -> str:
return "log"

def apply(self, value):
LOGGER.critical("Read value " + value)
return value

def test_parsing_order(self):
register(self.LogConverter)

schema = Schema.build(
{
"file_type": "csv",
"fields": [{"key": "name", "type": "log", "column-number": 1}],
}
)
input_csv = b"1\n2\n3\n4\n5"
rows, errors = schema.parse(input_csv)
assert not errors

for row in rows:
LOGGER.critical("Write value " + row['name'])

def test_streaming_order(self):
register(self.LogConverter)

schema = Schema.build(
{
"file_type": "csv",
"fields": [{"key": "name", "type": "log", "column-number": 1}],
}
)
input_csv = b"1\n2\n3\n4\n5\n"

for row in schema.stream_parse(input_csv):
item, errors = row
LOGGER.critical("Write value " + item['name'])

def test_parsing_memory_usage(self):
schema = Schema.build(
{
"file_type": "csv",
"fields": [
{"key": "num", "type": "decimal", "column-number": 1},
{"key": "name", "type": "str", "column-number": 2},
{"key": "date", "type": "datetime", "column-number": 3},
{"key": "description", "type": "str", "column-number": 4},
],
}
)
input_csv = ''.join([f'{num},"This is my name {num}",2022-01-12T10:12:03+03:00,"This is a very long description to load the memory with data\n' for num in range(ITERATION_COUNT)]).encode('utf-8')

process = psutil.Process()
memory_percent = process.memory_percent()
LOGGER.critical(f"Memory Usage: {memory_percent}%")

rows, errors = schema.parse(input_csv)
assert errors == []
i = 0
for row in rows:
if i % (ITERATION_COUNT / 10) == 0 :
memory_percent = process.memory_percent()
LOGGER.critical(f"Memory Usage: {memory_percent}%")
i = i + 1

def test_streaming_memory_usage(self):
schema = Schema.build(
{
"file_type": "csv",
"fields": [
{"key": "num", "type": "decimal", "column-number": 1},
{"key": "name", "type": "str", "column-number": 2},
{"key": "date", "type": "datetime", "column-number": 3},
{"key": "description", "type": "str", "column-number": 4},
],
}
)
input_csv = ''.join([f'{num},"This is my name {num}","2022-01-12T10:12:03+03:00","This is a very long description to load the memory with data\n' for num in range(ITERATION_COUNT)]).encode('utf-8')

process = psutil.Process()
memory_percent = process.memory_percent()
LOGGER.critical(f"Memory Usage: {memory_percent}%")

i = 0
for row in schema.stream_parse(input_csv):
item, errors = row
if i % (ITERATION_COUNT / 10) == 0 :
memory_percent = process.memory_percent()
LOGGER.critical(f"Memory Usage: {memory_percent}%")
i = i + 1

]

0 comments on commit 35815c3

Please sign in to comment.