Skip to content

Commit

Permalink
fix: No longer error if a NUL byte occurs in an input file, closes #927
Browse files Browse the repository at this point in the history
This affects the performance of all reads of input files, but the penalty doesn't seem consequential
  • Loading branch information
jpmckinney committed Oct 17, 2023
1 parent 6051379 commit 4457f6d
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Unreleased
* :doc:`/scripts/csvstat` adds a :code:`--non-nulls` option to only output counts of non-null values.
* :doc:`/scripts/csvstat` adds a :code:`--max-precision` option to only output the most decimal places.
* feat: Add a :code:`--null-value` option to commands with the :code:`--blanks` option, to convert additional values to NULL.
* fix: No longer errors if a NUL byte occurs in an input file.
* Add Python 3.12 support.

1.2.0 - October 4, 2023
Expand Down
11 changes: 5 additions & 6 deletions csvkit/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,7 @@ def __init__(self, init, *args, **kwargs):
self._lazy_kwargs = kwargs

def __getattr__(self, name):
if not self._is_lazy_opened:
self.f = self.init(*self._lazy_args, **self._lazy_kwargs)
self._is_lazy_opened = True

self._open()
return getattr(self.f, name)

def __iter__(self):
Expand All @@ -51,12 +48,14 @@ def close(self):
self._is_lazy_opened = False

def __next__(self):
self._open()
return next(self.f).replace('\0', '')

def _open(self):
if not self._is_lazy_opened:
self.f = self.init(*self._lazy_args, **self._lazy_kwargs)
self._is_lazy_opened = True

return next(self.f)


class CSVKitUtility:
description = ''
Expand Down
Binary file added examples/null_byte.csv
Binary file not shown.
4 changes: 4 additions & 0 deletions tests/test_utilities/test_csvcut.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,7 @@ def test_names_with_skip_lines(self):
' 2: b',
' 3: c',
])

def test_null_byte(self):
# Test that csvcut doesn't error on a null byte.
self.get_output(['-C', '', 'examples/null_byte.csv'])

0 comments on commit 4457f6d

Please sign in to comment.