UP: Got row_filter ready

mdeland · Nov 3, 2013 · cdc2876 · cdc2876
1 parent c55c648
commit cdc2876
Show file tree

Hide file tree

Showing 5 changed files with 65 additions and 67 deletions.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,4 +1,5 @@
 recursive-include dspy *
+recursive-include dspy/cmd *
 
 include MANIFEST.in
 include LICENSE

diff --git a/dspy/cmd/cut.py b/dspy/cmd/cut.py
@@ -1,3 +1,4 @@
+#! python
 """
 Reads a csv file or stdin, keeps/removes selected columns.
 Prints to stdout or a file.

diff --git a/dspy/cmd/row_filter.py b/dspy/cmd/row_filter.py
@@ -16,16 +16,16 @@ def _cli():
 
     Examples
     ---------
-    Keep rows in curriculum.csv where the subject contains the word 'algebra'
+    Keep rows in curriculum.csv where subject contains 'algebra'
     $ python row_filter.py -n subject -C algebra curriculum.csv
 
-    Keep rows in curriculum.csv where the subject doesn't contain the word 'algebra'
+    Keep rows in curriculum.csv where subject doesn't contain 'algebra'
     $ python row_filter.py -n subject -c algebra curriculum.csv
 
-    Keep rows in curriculum.csv where the subject equals the word 'algebra'
+    Keep rows in curriculum.csv where subject equals 'algebra'
     $ python row_filter.py -n subject -E algebra curriculum.csv
 
-    Keep rows in curriculum.csv where the subject doesn't equal the word 'algebra'
+    Keep rows in curriculum.csv where subject doesn't equal 'algebra'
     $ python row_filter.py -n subject -e algebra curriculum.csv
     """
     parser = argparse.ArgumentParser(
@@ -35,93 +35,83 @@ def _cli():
     parser.add_argument(
         'infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin,
         help='Convert this file.  If not specified, read from stdin.')
-
     parser.add_argument(
         '-o', '--outfile', default=sys.stdout, type=argparse.FileType('w'),
         help='Write to OUT_FILE rather than sys.stdout.')
 
     parser.add_argument(
         "-d", "--delimiter",
-        help="Use DELIMITER as the column delimiter.  [default: %(default)s]",
-        default=',')
-    parser.add_option(
-        "-n", "--name",
-        help="Name of the columm to filter on.  [default: %default]",
-        action="store", dest='name', default=None)
-    parser.add_option(
-        "-C", "--contains",
-        help="Column with name = NAME must contain CONTAINS else we kill that row. "
-        "[default: %default]", 
-        action='store', dest='contains', default=None)
-    parser.add_option(
-        "-E", "--equals",
-        help="Column with name = NAME must equal EQUALS else we kill that row. "
-        "[default: %default]", 
-        action='store', dest='equals', default=None)
-    parser.add_option(
-        "-e", "--notequals",
-        help="Column with name = NAME must not equal NOTEQUALS else we kill that row. "
-        "[default: %default]", 
-        action='store', dest='notequals', default=None)
-    parser.add_option(
-        "-c", "--notcontains",
-        help="Column with name = NAME must not contain NOTCONTAINS else we kill that row."
-        "  [default: %default]", 
-        action='store', dest='notcontains', default=None)
-    parser.add_option(
-        "-o", "--outfilename",
-        help="Write to this file rather than stdout.  [default: %default]",
-        action="store", dest='outfilename', default=None)
+        help="Use DELIMITER as the column delimiter in infile."
+        "  [default: %(default)s]", default=',')
 
-    (opt, args) = parser.parse_args()
+    parser.add_argument(
+        "-n", "--name", required=True, help="Name of the columm to filter on.")
 
-    ### Parse args
-    infilename = args[0] if args else None
+    spec = parser.add_mutually_exclusive_group(required=True)
+    spec.add_argument(
+        "-C", "--contains",
+        help="Column with name = NAME must contain CONTAINS else we kill that "
+        "row. ")
+    spec.add_argument(
+        "-E", "--equals",
+        help="Column with name = NAME must equal EQUALS else we kill that "
+        "row. ")
+    spec.add_argument(
+        "-c", "--not_contains",
+        help="Column with name = NAME must not contain NOTCONTAINS else we "
+        "kill that row.")
+    spec.add_argument(
+        "-e", "--not_equals",
+        help="Column with name = NAME must not equal NOTEQUALS else we kill "
+        "that row. ")
 
-    infile, outfile = common.get_inout_files(infilename, opt.outfilename, outmode='wb')
+    args = parser.parse_args()
 
-    column_filter(infile, outfile, opt.delimiter, opt)
+    for mode in ['contains', 'equals', 'not_contains', 'not_equals']:
+        if args.__dict__[mode]:
+            match_str = args.__dict__[mode]
+            break
 
-    common.close_files(infile, outfile)
+    column_filter(
+        args.infile, args.outfile, args.name, mode, match_str, args.delimiter)
 
 
-def column_filter(infile, outfile, delimiter, opt):
+def column_filter(infile, outfile, name, mode, match_str, delimiter):
     """
-    NOTE:  Written late at night after drinking...should be refactored!
+    Module interface.  See _cli for doc.  Add doc later if needed.
     """
     ## Get the csv reader and writer.  Use these to read/write the files.
     # reader.fieldnames gives you the header
     reader = csv.DictReader(infile, delimiter=delimiter)
-    writer = csv.DictWriter(outfile, delimiter=delimiter, fieldnames=reader.fieldnames)
+    writer = csv.DictWriter(
+        outfile, delimiter=delimiter, fieldnames=reader.fieldnames)
     writer.writeheader()
 
+    mode_fun = {
+        'contains': _check_contains, 'not_contains': _check_not_contains,
+        'equals': _check_equals, 'not_equals': _check_not_equals}
+
     ## Iterate through the file, printing out lines 
     for row in reader:
-        content = row[opt.name]
-        if _shouldwrite(content, opt):
+        if mode_fun[mode](row[name], match_str):
             writer.writerow(row)
 
 
-def _shouldwrite(content, opt):
-    if opt.equals and content:
-        shouldwrite = content == opt.equals
-    elif opt.contains and content:
-        shouldwrite = opt.contains in content
-    elif opt.notequals:
-        if not content:
-            shouldwrite = True
-        else: 
-            shouldwrite = content != opt.notequals
-    elif opt.notcontains:
-        if not content:
-            shouldwrite = True
-        else:
-            shouldwrite = opt.notcontains not in content
-    else:
-        raise ValueError(
-            "Unable to determine what to filter.  options = %s" % opt.__dict__)
-
-    return shouldwrite
+def _check_contains(item, match_str):
+    return match_str in item
+
+
+def _check_not_contains(item, match_str):
+    return not _check_contains(item, match_str)
+
+
+def _check_equals(item, match_str):
+    return match_str == item
+
+
+def _check_not_equals(item, match_str):
+    return not _check_equals(item, match_str)
+
 
 
 if __name__=='__main__':

diff --git a/makefile b/makefile
@@ -10,6 +10,9 @@ TESTDIR=dspy/tests
 
 all: clean test
 
+install:
+	$(PYTHON) setup.py install
+
 clean-ctags:
 	rm -f tags
 

diff --git a/setup.py b/setup.py
@@ -10,7 +10,10 @@
 setup(
     name=DISTNAME,
     version='0.1.0dev',
-    packages=['dspy',],
+    packages=[
+        'dspy',
+        'dspy.cmd'],
+    scripts=['dspy/cmd/cut.py'],
     license=LICENSE,
     url=URL,
     maintainer_email=EMAIL,