From 95dc26dde97ddb2b28947f8b8353d8eace675a71 Mon Sep 17 00:00:00 2001
From: James McKinney <26463+jpmckinney@users.noreply.github.com>
Date: Sat, 27 Apr 2024 21:44:34 -0400
Subject: [PATCH] fix: csvformat supports --out-quoting 2. --quoting (and
 --out-quoting) support options from Python 3.12.

---
 CHANGELOG.rst                          | 12 +++-
 csvkit/cli.py                          | 18 ++---
 csvkit/utilities/csvformat.py          | 45 ++++++++----
 docs/common_arguments.rst              |  8 +--
 docs/contributing.rst                  |  2 +-
 docs/release.rst                       |  6 --
 docs/scripts/csvformat.rst             | 12 ++--
 tests/test_utilities/test_csvformat.py | 99 ++++++++++++++++++++++++++
 8 files changed, 159 insertions(+), 43 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 0cf6ace88..c8c7d0ba2 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,10 +1,16 @@
 2.0.0 - Unreleased
 ------------------
 
-**BACKWARDS-INCOMPATIBLE CHANGES**
+**BACKWARDS-INCOMPATIBLE CHANGES:**
 
 * :doc:`/scripts/csvclean` now writes its output to standard output and its errors to standard error, instead of to ``basename_out.csv`` and ``basename_err.csv`` files. Consequently, it no longer supports a :code:`--dry-run` flag to output summary information like ``No errors.``, ``42 errors logged to basename_err.csv`` or ``42 rows were joined/reduced to 24 rows after eliminating expected internal line breaks.``.
 
+Other changes:
+
+* feat: The :code:`--quoting` option accepts 4 (`csv.QUOTE_STRINGS <https://docs.python.org/3/library/csv.html#csv.QUOTE_STRINGS>`__) and 5 (`csv.QUOTE_NOTNULL <https://docs.python.org/3/library/csv.html#csv.QUOTE_NOTNULL>`__) on Python 3.12.
+* feat: :doc:`/scripts/csvformat`: The :code:`--out-quoting` option accepts 4 (`csv.QUOTE_STRINGS <https://docs.python.org/3/library/csv.html#csv.QUOTE_STRINGS>`__) and 5 (`csv.QUOTE_NOTNULL <https://docs.python.org/3/library/csv.html#csv.QUOTE_NOTNULL>`__) on Python 3.12.
+* fix: :doc:`/scripts/csvformat`: The :code:`--out-quoting` option works with 2 (`csv.QUOTE_NONUMERIC <https://docs.python.org/3/library/csv.html#csv.QUOTE_NOTNUMERIC>`__). Use the :code:`--locale` option to set the locale of any formatted numbers.
+
 1.5.0 - March 28, 2024
 ----------------------
 
@@ -21,7 +27,7 @@
    * :code:`--sniff-limit``
    * :code:`--no-inference``
 
-* feat: :doc:`/scripts/csvpy` removes the ``--linenumbers`` and ``--zero`` output options, which had no effect.
+* feat: :doc:`/scripts/csvpy` removes the :code:`--linenumbers` and :code:`--zero` output options, which had no effect.
 * feat: :doc:`/scripts/in2csv` adds a :code:`--reset-dimensions` option to `recalculate <https://openpyxl.readthedocs.io/en/stable/optimized.html#worksheet-dimensions>`_ the dimensions of an XLSX file, instead of trusting the file's metadata. csvkit's dependency `agate-excel <https://agate-excel.readthedocs.io/en/latest/>`_ 0.4.0 automatically recalculates the dimensions if the file's metadata expresses dimensions of "A1:A1" (a single cell).
 * fix: :doc:`/scripts/csvlook` only reads up to :code:`--max-rows` rows instead of the entire file.
 * fix: :doc:`/scripts/csvpy` supports the existing input options:
@@ -61,7 +67,7 @@
 1.2.0 - October 4, 2023
 -----------------------
 
-* fix: :doc:`/scripts/csvjoin` uses the correct columns when performing a ``--right`` join.
+* fix: :doc:`/scripts/csvjoin` uses the correct columns when performing a :code:`--right` join.
 * Add SQLAlchemy 2 support.
 * Drop Python 3.7 support (end-of-life was June 5, 2023).
 
diff --git a/csvkit/cli.py b/csvkit/cli.py
index 6dabc6bfe..275dbba37 100644
--- a/csvkit/cli.py
+++ b/csvkit/cli.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python
-
 import argparse
 import bz2
 import csv
@@ -22,6 +21,8 @@
 except ImportError:
     zstandard = None
 
+QUOTING_CHOICES = sorted(getattr(csv, name) for name in dir(csv) if name.startswith('QUOTE_'))
+
 
 class LazyFile:
     """
@@ -170,9 +171,9 @@ def _init_common_parser(self):
                 help='Character used to quote strings in the input CSV file.')
         if 'u' not in self.override_flags:
             self.argparser.add_argument(
-                '-u', '--quoting', dest='quoting', type=int, choices=[0, 1, 2, 3],
-                help='Quoting style used in the input CSV file. 0 = Quote Minimal, 1 = Quote All, '
-                     '2 = Quote Non-numeric, 3 = Quote None.')
+                '-u', '--quoting', dest='quoting', type=int, choices=QUOTING_CHOICES,
+                help='Quoting style used in the input CSV file: 0 quote minimal, 1 quote all, '
+                     '2 quote non-numeric, 3 quote none.')
         if 'b' not in self.override_flags:
             self.argparser.add_argument(
                 '-b', '--no-doublequote', dest='doublequote', action='store_false',
@@ -180,7 +181,7 @@ def _init_common_parser(self):
         if 'p' not in self.override_flags:
             self.argparser.add_argument(
                 '-p', '--escapechar', dest='escapechar',
-                help='Character used to escape the delimiter if --quoting 3 ("Quote None") is specified and to escape '
+                help='Character used to escape the delimiter if --quoting 3 ("quote none") is specified and to escape '
                      'the QUOTECHAR if --no-doublequote is specified.')
         if 'z' not in self.override_flags:
             self.argparser.add_argument(
@@ -337,12 +338,13 @@ def get_column_types(self):
             type_kwargs['null_values'].append(null_value)
 
         text_type = agate.Text(**type_kwargs)
+        number_type = agate.Number(locale=self.args.locale, **type_kwargs)
 
-        if self.args.no_inference:
+        if getattr(self.args, 'no_inference', None):
             types = [text_type]
+        elif getattr(self.args, 'out_quoting', None) == 2:
+            types = [number_type, text_type]
         else:
-            number_type = agate.Number(locale=self.args.locale, **type_kwargs)
-
             # See the order in the `agate.TypeTester` class.
             types = [
                 agate.Boolean(**type_kwargs),
diff --git a/csvkit/utilities/csvformat.py b/csvkit/utilities/csvformat.py
index 655f5c44b..387db6642 100644
--- a/csvkit/utilities/csvformat.py
+++ b/csvkit/utilities/csvformat.py
@@ -4,12 +4,12 @@
 
 import agate
 
-from csvkit.cli import CSVKitUtility, make_default_headers
+from csvkit.cli import QUOTING_CHOICES, CSVKitUtility, make_default_headers
 
 
 class CSVFormat(CSVKitUtility):
     description = 'Convert a CSV file to a custom output format.'
-    override_flags = ['L', 'blanks', 'date-format', 'datetime-format']
+    override_flags = ['blanks', 'date-format', 'datetime-format']
 
     def add_arguments(self):
         self.argparser.add_argument(
@@ -29,9 +29,9 @@ def add_arguments(self):
             '-Q', '--out-quotechar', dest='out_quotechar',
             help='Character used to quote strings in the output file.')
         self.argparser.add_argument(
-            '-U', '--out-quoting', dest='out_quoting', type=int, choices=[0, 1, 2, 3],
-            help='Quoting style used in the output file. 0 = Quote Minimal, 1 = Quote All, '
-                 '2 = Quote Non-numeric, 3 = Quote None.')
+            '-U', '--out-quoting', dest='out_quoting', type=int, choices=QUOTING_CHOICES,
+            help='Quoting style used in the output file: 0 quote minimal, 1 quote all, '
+                 '2 quote non-numeric, 3 quote none.')
         self.argparser.add_argument(
             '-B', '--out-no-doublequote', dest='out_doublequote', action='store_false',
             help='Whether or not double quotes are doubled in the output file.')
@@ -72,18 +72,33 @@ def main(self):
         if self.additional_input_expected():
             sys.stderr.write('No input file or piped data provided. Waiting for standard input:\n')
 
-        reader = agate.csv.reader(self.skip_lines(), **self.reader_kwargs)
         writer = agate.csv.writer(self.output_file, **self.writer_kwargs)
-        if self.args.no_header_row:
-            # Peek at a row to get the number of columns.
-            _row = next(reader)
-            headers = make_default_headers(len(_row))
-            reader = itertools.chain([headers, _row], reader)
 
-        if self.args.skip_header:
-            next(reader)
-
-        writer.writerows(reader)
+        if self.args.out_quoting == 2:
+            table = agate.Table.from_csv(
+                self.input_file,
+                skip_lines=self.args.skip_lines,
+                column_types=self.get_column_types(),
+                **self.reader_kwargs,
+            )
+
+            # table.to_csv() has no option to omit the column names.
+            if not self.args.skip_header:
+                writer.writerow(table.column_names)
+
+            writer.writerows(table.rows)
+        else:
+            reader = agate.csv.reader(self.skip_lines(), **self.reader_kwargs)
+            if self.args.no_header_row:
+                # Peek at a row to get the number of columns.
+                _row = next(reader)
+                headers = make_default_headers(len(_row))
+                reader = itertools.chain([headers, _row], reader)
+
+            if self.args.skip_header:
+                next(reader)
+
+            writer.writerows(reader)
 
 
 def launch_new_instance():
diff --git a/docs/common_arguments.rst b/docs/common_arguments.rst
index 6487ce046..152bd7ce4 100644
--- a/docs/common_arguments.rst
+++ b/docs/common_arguments.rst
@@ -13,14 +13,14 @@ csvkit's tools share a set of common command-line arguments. Not every argument
    -q QUOTECHAR, --quotechar QUOTECHAR
                          Character used to quote strings in the input CSV file.
    -u {0,1,2,3}, --quoting {0,1,2,3}
-                         Quoting style used in the input CSV file. 0 = Quote
-                         Minimal, 1 = Quote All, 2 = Quote Non-numeric, 3 =
-                         Quote None.
+                         Quoting style used in the input CSV file: 0 quote
+                         minimal, 1 quote all, 2 quote non-numeric, 3 quote
+                         none.
    -b, --no-doublequote  Whether or not double quotes are doubled in the input
                          CSV file.
    -p ESCAPECHAR, --escapechar ESCAPECHAR
                          Character used to escape the delimiter if --quoting 3
-                         ("Quote None") is specified and to escape the
+                         ("quote none") is specified and to escape the
                          QUOTECHAR if --no-doublequote is specified.
    -z FIELD_SIZE_LIMIT, --maxfieldsize FIELD_SIZE_LIMIT
                          Maximum length of a single field in the input CSV
diff --git a/docs/contributing.rst b/docs/contributing.rst
index b16825ff9..e1fd6d743 100644
--- a/docs/contributing.rst
+++ b/docs/contributing.rst
@@ -70,7 +70,7 @@ Currently, the following tools stream:
 
 * :doc:`/scripts/csvclean`
 * :doc:`/scripts/csvcut`
-* :doc:`/scripts/csvformat`
+* :doc:`/scripts/csvformat` unless :code:`--quoting 2` is set
 * :doc:`/scripts/csvgrep`
 * :doc:`/scripts/csvstack`
 * :doc:`/scripts/sql2csv`
diff --git a/docs/release.rst b/docs/release.rst
index 5a3db0da6..700468298 100644
--- a/docs/release.rst
+++ b/docs/release.rst
@@ -2,12 +2,6 @@
 Release process
 ===============
 
-.. admonition:: One-time setup
-
-   .. code-block:: bash
-
-      pip install --upgrade build twine
-
 #. All tests pass on continuous integration
 #. The changelog is up-to-date and dated
 #. If new options are added, regenerate the usage information in the documentation with, for example:
diff --git a/docs/scripts/csvformat.rst b/docs/scripts/csvformat.rst
index cbaf956ec..eca45c935 100644
--- a/docs/scripts/csvformat.rst
+++ b/docs/scripts/csvformat.rst
@@ -10,9 +10,9 @@ Convert a CSV file to a custom output format.:
 .. code-block:: none
 
    usage: csvformat [-h] [-d DELIMITER] [-t] [-q QUOTECHAR] [-u {0,1,2,3}] [-b]
-                    [-p ESCAPECHAR] [-z FIELD_SIZE_LIMIT] [-e ENCODING] [-S] [-H]
-                    [-K SKIP_LINES] [-v] [-l] [--zero] [-V] [-E]
-                    [-D OUT_DELIMITER] [-T] [-A] [-Q OUT_QUOTECHAR]
+                    [-p ESCAPECHAR] [-z FIELD_SIZE_LIMIT] [-e ENCODING]
+                    [-L LOCALE] [-S] [-H] [-K SKIP_LINES] [-v] [-l] [--zero] [-V]
+                    [-E] [-D OUT_DELIMITER] [-T] [-A] [-Q OUT_QUOTECHAR]
                     [-U {0,1,2,3}] [-B] [-P OUT_ESCAPECHAR]
                     [-M OUT_LINETERMINATOR]
                     [FILE]
@@ -36,9 +36,9 @@ Convert a CSV file to a custom output format.:
      -Q OUT_QUOTECHAR, --out-quotechar OUT_QUOTECHAR
                            Character used to quote strings in the output file.
      -U {0,1,2,3}, --out-quoting {0,1,2,3}
-                           Quoting style used in the output file. 0 = Quote
-                           Minimal, 1 = Quote All, 2 = Quote Non-numeric, 3 =
-                           Quote None.
+                           Quoting style used in the output file: 0 quote
+                           minimal, 1 quote all, 2 quote non-numeric, 3 quote
+                           none.
      -B, --out-no-doublequote
                            Whether or not double quotes are doubled in the output
                            CSV file.
diff --git a/tests/test_utilities/test_csvformat.py b/tests/test_utilities/test_csvformat.py
index 7123f2c23..4521f38a8 100644
--- a/tests/test_utilities/test_csvformat.py
+++ b/tests/test_utilities/test_csvformat.py
@@ -95,3 +95,102 @@ def test_lineterminator(self):
         self.assertLines(['-M', 'XYZ', 'examples/dummy.csv'], [
             'a,b,cXYZ1,2,3XYZ',
         ], newline_at_eof=False)
+
+
+class TestCSVFormatQuoteNonNumeric(CSVKitTestCase, EmptyFileTests):
+    Utility = CSVFormat
+
+    # New test compared to TestCSVFormat.
+    def test_locale(self):
+        self.assertLines(['-U', '2', '--locale', 'de_DE', 'examples/test_locale.csv'], [
+            '"a","b","c"',
+            '1.7,200000000,""',
+        ])
+
+
+    def test_launch_new_instance(self):
+        with patch.object(sys, 'argv', [self.Utility.__name__.lower(), 'examples/dummy.csv']):
+            launch_new_instance()
+
+    def test_skip_lines(self):
+        self.assertLines(['-U', '2', '--skip-lines', '3', '-D', '|', 'examples/test_skip_lines.csv'], [
+            '"a"|"b"|"c"',
+            '1|2|3',
+        ])
+
+    def test_skip_header(self):
+        self.assertLines(['-U', '2', '--skip-header', 'examples/dummy.csv'], [
+            '1,2,3',
+        ])
+
+    def test_skip_header_no_header_row(self):
+        self.assertLines(['-U', '2', '--no-header-row', '--skip-header', 'examples/no_header_row.csv'], [
+            '1,2,3',
+        ])
+
+    def test_no_header_row(self):
+        self.assertLines(['-U', '2', '--no-header-row', 'examples/no_header_row.csv'], [
+            '"a","b","c"',
+            '1,2,3',
+        ])
+
+    def test_linenumbers(self):
+        self.assertLines(['-U', '2', '--linenumbers', 'examples/dummy.csv'], [
+            '"line_number","a","b","c"',
+            '1,1,2,3',
+        ])
+
+    def test_delimiter(self):
+        self.assertLines(['-U', '2', '-D', '|', 'examples/dummy.csv'], [
+            '"a"|"b"|"c"',
+            '1|2|3',
+        ])
+
+    def test_tabs(self):
+        self.assertLines(['-U', '2', '-T', 'examples/dummy.csv'], [
+            '"a"\t"b"\t"c"',
+            '1\t2\t3',
+        ])
+
+    def test_asv(self):
+        self.assertLines(['-U', '2', '-A', 'examples/dummy.csv'], [
+            '"a"\x1f"b"\x1f"c"\x1e1\x1f2\x1f3\x1e',
+        ], newline_at_eof=False)
+
+    def test_quotechar(self):
+        input_file = io.BytesIO(b'a,b,c\n1*2,3,4\n')
+
+        with stdin_as_string(input_file):
+            self.assertLines(['-U', '2', '-Q', '*'], [
+                '*a*,*b*,*c*',
+                '*1**2*,3,4',
+            ])
+
+        input_file.close()
+
+    def test_doublequote(self):
+        input_file = io.BytesIO(b'a\n"a ""quoted"" string"')
+
+        with stdin_as_string(input_file):
+            self.assertLines(['-U', '2', '-P', '#', '-B'], [
+                '"a"',
+                '"a #"quoted#" string"',
+            ])
+
+        input_file.close()
+
+    def test_escapechar(self):
+        input_file = io.BytesIO(b'a,b,c\n1"2,3,4\n')
+
+        with stdin_as_string(input_file):
+            self.assertLines(['-U', '2', '-P', '#', '-U', '3'], [
+                'a,b,c',
+                '1#"2,3,4',
+            ])
+
+        input_file.close()
+
+    def test_lineterminator(self):
+        self.assertLines(['-U', '2', '-M', 'XYZ', 'examples/dummy.csv'], [
+            '"a","b","c"XYZ1,2,3XYZ',
+        ], newline_at_eof=False)