Allow wildcard pattern-matching when specifying converters

JasonS09 · Jun 25, 2021 · 5cd6a57 · 5cd6a57
1 parent a3263b6
commit 5cd6a57
Show file tree

Hide file tree

Showing 4 changed files with 31 additions and 8 deletions.
diff --git a/astropy/io/ascii/core.py b/astropy/io/ascii/core.py
@@ -18,6 +18,7 @@
 import re
 import warnings
 import inspect
+import fnmatch
 
 from collections import OrderedDict
 from contextlib import suppress
@@ -1036,13 +1037,14 @@ def _validate_and_copy(col, converters):
 
     def _convert_vals(self, cols):
         for col in cols:
-            # If a specific dtype was specified for a column, then use that
-            # to set the defaults, otherwise use the generic defaults.
-            default_converters = ([convert_numpy(col.dtype)] if col.dtype
-                                  else self.default_converters)
-
-            # If the user supplied a specific convert then that takes precedence over defaults
-            converters = self.converters.get(col.name, default_converters)
+            if col.dtype is not None:
+                converters = [convert_numpy(col.dtype)]
+            else:
+                for key, converters in self.converters.items():
+                    if fnmatch.fnmatch(col.name, key):
+                        break
+                else:
+                    converters = self.default_converters
 
             col.converters = self._validate_and_copy(col, converters)
 

diff --git a/astropy/io/ascii/docs.py b/astropy/io/ascii/docs.py
@@ -39,7 +39,9 @@
         Line index for the end of data not counting comment or blank lines.
         This value can be negative to count from the end.
     converters : dict
-        Dictionary of converters
+        Dictionary of converters. Keys in the dictionary are columns names,
+        values are converter functions. In addition to single column names
+        you can use wildcards via `fnmatch` to select multiple columns.
     data_Splitter : `~astropy.io.ascii.BaseSplitter`
         Splitter class to split data columns
     header_Splitter : `~astropy.io.ascii.BaseSplitter`

diff --git a/astropy/io/ascii/tests/test_read.py b/astropy/io/ascii/tests/test_read.py
@@ -1640,3 +1640,13 @@ def test_read_masked_bool():
     assert col.dtype.kind == 'b'
     assert np.all(col.mask == [False, False, False, True, False])
     assert np.all(col == [True, False, True, False, False])
+
+
+def test_read_converters_wildcard():
+    '''Test converters where the column name is specified with
+    a wildcard.
+    '''
+    converters = {'F*': [ascii.convert_numpy(np.float32)]}
+    t = ascii.read(['Fabc Iabc', '1 2'], converters=converters)
+    assert np.issubdtype(t['Fabc'].dtype, np.float32)
+    assert not np.issubdtype(t['Iabc'].dtype, np.float32)
diff --git a/docs/io/ascii/read.rst b/docs/io/ascii/read.rst
@@ -577,6 +577,15 @@ The default converters for each column can be overridden with the
   ...               'col2': [ascii.convert_numpy(np.float32)]}
   >>> ascii.read('file.dat', converters=converters)  # doctest: +SKIP
 
+In addition to single column names you can use wildcards via `fnmatch` to
+select multiple columns. For example, we can set the format for all columns
+where the column name starts with i to `np.unit` while applying default
+converters to all other columns in the table::
+
+  >>> import numpy as np
+  >>> converters = {'i*': [ascii.convert_numpy(np.uint)]}
+  >>> ascii.read('file.dat', converters=converters)  # doctest: +SKIP
+
 
 .. _fortran_style_exponents: