Skip to content

Commit

Permalink
int-679 Skip empty lines from the parsing (#27)
Browse files Browse the repository at this point in the history
int-679 Skip empty lines from the parsing to prevent noise in the logs
  • Loading branch information
antoine-b-smartway authored Apr 24, 2024
1 parent 6c81c2a commit b005370
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 3 deletions.
14 changes: 11 additions & 3 deletions magicparse/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,11 @@ def stream_parse(
row_number += 1

for row in reader:
errors = []
row_number += 1
if not any(row):
continue

errors = []
item = {}
for field in self.fields:
try:
Expand Down Expand Up @@ -124,8 +127,13 @@ def key() -> str:

class ColumnarSchema(Schema):
def get_reader(self, stream: BytesIO) -> Iterable[str]:
stream_reader = codecs.getreader(self.encoding)
return stream_reader(stream)
stream_reader_factory = codecs.getreader(self.encoding)
stream_reader = stream_reader_factory(stream)
while True:
line = stream_reader.readline(None, False)
if not line:
break
yield line

@staticmethod
def key() -> str:
Expand Down
37 changes: 37 additions & 0 deletions tests/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,21 @@ def test_errors_do_not_halt_parsing(self):
}
]

def test_parse_should_skip_empty_lines(self):
schema = Schema.build(
{
"file_type": "csv",
"fields": [{"key": "name", "type": "str", "column-number": 1}],
}
)
rows, errors = schema.parse(
b"""1
"""
)
assert rows == [{"name": "1"}]
assert not errors


class TestColumnarParse(TestCase):
def test_with_no_data(self):
Expand Down Expand Up @@ -216,6 +231,28 @@ def test_errors_do_not_halt_parsing(self):
}
]

def test_parse_should_skip_empty_lines(self):
schema = Schema.build(
{
"file_type": "columnar",
"fields": [
{
"key": "name",
"type": "str",
"column-start": 0,
"column-length": 8,
}
],
}
)
rows, errors = schema.parse(
b"""8013109C
"""
)
assert rows == [{"name": "8013109C"}]
assert not errors


class TestQuotingSetting(TestCase):
def test_no_quote(self):
Expand Down

0 comments on commit b005370

Please sign in to comment.