Skip to content

Commit

Permalink
feat(csvsort): Add --ignore-case option, closes #1175
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed Feb 15, 2024
1 parent 83523f7 commit 6dc83f1
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ Unreleased

* feat: Add support for Zstandard files with the ``.zst`` extension, if the ``zstandard`` package is installed.
* feat: :doc:`/scripts/csvformat` adds a :code:`--out-asv` (:code:`--A`) option to use the ASCII unit separator and record separator.
* feat: :doc:`/scripts/csvsort` adds a :code:`--ignore-case` (:code:`--i`) option to perform case-independent sorting.

1.4.0 - February 13, 2024
-------------------------
Expand Down
21 changes: 19 additions & 2 deletions csvkit/utilities/csvsort.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,17 @@
from csvkit.cli import CSVKitUtility, parse_column_identifiers


def ignore_case_sort(key):

def inner(row):
return tuple(
agate.NullOrder() if row[n] is None else (row[n].upper() if isinstance(row[n], str) else row[n])
for n in key
)

return inner


class CSVSort(CSVKitUtility):
description = 'Sort CSV files. Like the Unix "sort" command, but for tabular data.'

Expand All @@ -19,6 +30,9 @@ def add_arguments(self):
self.argparser.add_argument(
'-r', '--reverse', dest='reverse', action='store_true',
help='Sort in descending order.')
self.argparser.add_argument(
'-i', '--ignore-case', dest='ignore_case', action='store_true',
help='Perform case-independent sorting.')
self.argparser.add_argument(
'-y', '--snifflimit', dest='sniff_limit', type=int, default=1024,
help='Limit CSV dialect sniffing to the specified number of bytes. '
Expand All @@ -44,13 +58,16 @@ def main(self):
**self.reader_kwargs,
)

column_ids = parse_column_identifiers(
key = parse_column_identifiers(
self.args.columns,
table.column_names,
self.get_column_offset(),
)

table = table.order_by(column_ids, reverse=self.args.reverse)
if self.args.ignore_case:
key = ignore_case_sort(key)

table = table.order_by(key, reverse=self.args.reverse)
table.to_csv(self.output_file, **self.writer_kwargs)


Expand Down
3 changes: 2 additions & 1 deletion docs/scripts/csvsort.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Sort CSV files. Like the Unix "sort" command, but for tabular data:
[-S] [--blanks] [--null-value NULL_VALUES [NULL_VALUES ...]]
[--date-format DATE_FORMAT] [--datetime-format DATETIME_FORMAT]
[-H] [-K SKIP_LINES] [-v] [-l] [--zero] [-V] [-n] [-c COLUMNS]
[-r] [-y SNIFF_LIMIT] [-I]
[-r] [-i] [-y SNIFF_LIMIT] [-I]
[FILE]
Sort CSV files. Like the Unix "sort" command, but for tabular data.
Expand All @@ -32,6 +32,7 @@ Sort CSV files. Like the Unix "sort" command, but for tabular data:
ranges to sort by, e.g. "1,id,3-5". Defaults to all
columns.
-r, --reverse Sort in descending order.
-i, --ignore-case Perform case-independent sorting.
-y SNIFF_LIMIT, --snifflimit SNIFF_LIMIT
Limit CSV dialect sniffing to the specified number of
bytes. Specify "0" to disable sniffing.
Expand Down
6 changes: 6 additions & 0 deletions examples/test_ignore_case.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
a,b,c
100,2003-01-01,a
100,2003-01-01,A
20,2002-01-01,b
20,2001-01-01,c
3,2009-01-01,d
10 changes: 10 additions & 0 deletions tests/test_utilities/test_csvsort.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,16 @@ def test_sort_date(self):
new_order = [str(r[0]) for r in reader]
self.assertEqual(test_order, new_order)

def test_ignore_case(self):
self.assertRows(['-i', 'examples/test_ignore_case.csv'], [
['a', 'b', 'c'],
['3', '2009-01-01', 'd'],
['20', '2001-01-01', 'c'],
['20', '2002-01-01', 'b'],
['100', '2003-01-01', 'a'],
['100', '2003-01-01', 'A'],
])

def test_no_blanks(self):
reader = self.get_output_as_reader(['examples/blanks.csv'])
test_order = [
Expand Down

0 comments on commit 6dc83f1

Please sign in to comment.