From f94ee8ffee05930ba34b91472b79d7a8d244adf9 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Wed, 18 Oct 2023 00:30:49 -0400 Subject: [PATCH] in2csv: Add a --use-sheet-names option, closes #987 --- AUTHORS.rst | 1 + CHANGELOG.rst | 1 + csvkit/utilities/in2csv.py | 11 +++++++-- docs/scripts/in2csv.rst | 14 +++++++---- tests/test_utilities/test_in2csv.py | 38 +++++++++++++++++++++++++++++ 5 files changed, 58 insertions(+), 7 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index ce8e86f97..a4d1be01e 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -108,3 +108,4 @@ The following individuals have contributed code to csvkit: * Werner Robitza * Mark Mayo * Kitagawa Yasutaka +* rachekalmir diff --git a/CHANGELOG.rst b/CHANGELOG.rst index cdd6632da..fdf60aab8 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -11,6 +11,7 @@ Unreleased * :doc:`/scripts/csvstat` reports a "Most decimal places" statistic (or a :code:`maxprecision` column when :code:`--csv` is set). * :doc:`/scripts/csvstat` adds a :code:`--non-nulls` option to only output counts of non-null values. * :doc:`/scripts/csvstat` adds a :code:`--max-precision` option to only output the most decimal places. +* :doc:`/scripts/in2csv` adds a :code:`--use-sheet-names` option to use the sheet names as file names when :code:`--write-sheets` is set. * feat: Add a :code:`--null-value` option to commands with the :code:`--blanks` option, to convert additional values to NULL. * fix: Reconfigure the encoding of standard input according to the :code:`--encoding` option, which defaults to ``utf-8-sig``. Affected users no longer need to set the ``PYTHONIOENCODING`` environment variable. * fix: Prompt the user if additional input is expected (i.e. if no input file or piped data is provided) in :doc:`/scripts/csvjoin`, :doc:`/scripts/csvsql` and :doc:`/scripts/csvstack`. diff --git a/csvkit/utilities/in2csv.py b/csvkit/utilities/in2csv.py index 486a80d73..03e9b5f05 100644 --- a/csvkit/utilities/in2csv.py +++ b/csvkit/utilities/in2csv.py @@ -46,6 +46,9 @@ def add_arguments(self): self.argparser.add_argument( '--write-sheets', dest='write_sheets', help='The names of the Excel sheets to write to files, or "-" to write all sheets.') + self.argparser.add_argument( + '--use-sheet-names', dest='use_sheet_names', action='store_true', + help='Use the sheet names as file names when --write-sheets is set.') self.argparser.add_argument( '--encoding-xls', dest='encoding_xls', help='Specify the encoding of the input XLS file.') @@ -177,8 +180,12 @@ def main(self): tables = agate.Table.from_xlsx(self.input_file, sheet=sheets, **kwargs) base = splitext(self.input_file.name)[0] - for i, table in enumerate(tables.values()): - with open('%s_%d.csv' % (base, i), 'w') as f: + for i, (sheet_name, table) in enumerate(tables.items()): + if self.args.use_sheet_names: + filename = '%s_%s.csv' % (base, sheet_name) + else: + filename = '%s_%d.csv' % (base, i) + with open(filename, 'w') as f: table.to_csv(f, **self.writer_kwargs) self.input_file.close() diff --git a/docs/scripts/in2csv.rst b/docs/scripts/in2csv.rst index e3aa1d895..1c4ff1bff 100644 --- a/docs/scripts/in2csv.rst +++ b/docs/scripts/in2csv.rst @@ -22,11 +22,13 @@ The header line is required though the columns may be in any order: usage: in2csv [-h] [-d DELIMITER] [-t] [-q QUOTECHAR] [-u {0,1,2,3}] [-b] [-p ESCAPECHAR] [-z FIELD_SIZE_LIMIT] [-e ENCODING] [-L LOCALE] - [-S] [--blanks] [--date-format DATE_FORMAT] - [--datetime-format DATETIME_FORMAT] [-H] [-K SKIP_LINES] [-v] - [-l] [--zero] [-V] [-f FILETYPE] [-s SCHEMA] [-k KEY] [-n] - [--sheet SHEET] [--write-sheets WRITE_SHEETS] - [--encoding-xls ENCODING_XLS] [-y SNIFF_LIMIT] [-I] + [-S] [--blanks] [--null-value NULL_VALUES [NULL_VALUES ...]] + [--date-format DATE_FORMAT] [--datetime-format DATETIME_FORMAT] + [-H] [-K SKIP_LINES] [-v] [-l] [--zero] [-V] + [-f {csv,dbf,fixed,geojson,json,ndjson,xls,xlsx}] [-s SCHEMA] + [-k KEY] [-n] [--sheet SHEET] [--write-sheets WRITE_SHEETS] + [--use-sheet-names] [--encoding-xls ENCODING_XLS] + [-y SNIFF_LIMIT] [-I] [FILE] Convert common, but less awesome, tabular data formats to CSV. @@ -50,6 +52,8 @@ The header line is required though the columns may be in any order: --write-sheets WRITE_SHEETS The names of the Excel sheets to write to files, or "-" to write all sheets. + --use-sheet-names Use the sheet names as file names when --write-sheets + is set. --encoding-xls ENCODING_XLS Specify the encoding of the input XLS file. -y SNIFF_LIMIT, --snifflimit SNIFF_LIMIT diff --git a/tests/test_utilities/test_in2csv.py b/tests/test_utilities/test_in2csv.py index c78e0a7ed..95b691572 100644 --- a/tests/test_utilities/test_in2csv.py +++ b/tests/test_utilities/test_in2csv.py @@ -257,3 +257,41 @@ def test_convert_xlsx_with_write_sheets(self): path = 'examples/sheets_%d.csv' % suffix if os.path.exists(path): os.remove(path) + + def test_convert_xls_with_write_sheets_with_names(self): + try: + self.assertConverted('xls', 'examples/sheets.xls', 'examples/testxls_converted.csv', + ['--sheet', 'data', '--write-sheets', "ʤ,1", '--use-sheet-names']) + with open('examples/sheets_ʤ.csv', 'r') as f: + with open('examples/testxls_unicode_converted.csv', 'r') as g: + self.assertEqual(f.read(), g.read()) + with open('examples/sheets_data.csv', 'r') as f: + with open('examples/testxls_converted.csv', 'r') as g: + self.assertEqual(f.read(), g.read()) + self.assertFalse(os.path.exists('examples/sheets_0.csv')) + self.assertFalse(os.path.exists('examples/sheets_1.csv')) + self.assertFalse(os.path.exists('examples/sheets_2.csv')) + finally: + for suffix in ('ʤ', 'data'): + path = 'examples/sheets_%s.csv' % suffix + if os.path.exists(path): + os.remove(path) + + def test_convert_xlsx_with_write_sheets_with_names(self): + try: + self.assertConverted('xlsx', 'examples/sheets.xlsx', 'examples/testxlsx_noinference_converted.csv', + ['--no-inference', '--sheet', 'data', '--write-sheets', "ʤ,1", '--use-sheet-names']) + with open('examples/sheets_ʤ.csv', 'r') as f: + with open('examples/testxlsx_unicode_converted.csv', 'r') as g: + self.assertEqual(f.read(), g.read()) + with open('examples/sheets_data.csv', 'r') as f: + with open('examples/testxlsx_noinference_converted.csv', 'r') as g: + self.assertEqual(f.read(), g.read()) + self.assertFalse(os.path.exists('examples/sheets_0.csv')) + self.assertFalse(os.path.exists('examples/sheets_1.csv')) + self.assertFalse(os.path.exists('examples/sheets_2.csv')) + finally: + for suffix in ('ʤ', 'data'): + path = 'examples/sheets_%s.csv' % suffix + if os.path.exists(path): + os.remove(path)