Skip to content

Commit

Permalink
UP: Got row_filter ready
Browse files Browse the repository at this point in the history
  • Loading branch information
langmore committed Nov 3, 2013
1 parent c55c648 commit cdc2876
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 67 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
recursive-include dspy *
recursive-include dspy/cmd *

include MANIFEST.in
include LICENSE
Expand Down
1 change: 1 addition & 0 deletions dspy/cmd/cut.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#! python
"""
Reads a csv file or stdin, keeps/removes selected columns.
Prints to stdout or a file.
Expand Down
122 changes: 56 additions & 66 deletions dspy/cmd/row_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,16 @@ def _cli():
Examples
---------
Keep rows in curriculum.csv where the subject contains the word 'algebra'
Keep rows in curriculum.csv where subject contains 'algebra'
$ python row_filter.py -n subject -C algebra curriculum.csv
Keep rows in curriculum.csv where the subject doesn't contain the word 'algebra'
Keep rows in curriculum.csv where subject doesn't contain 'algebra'
$ python row_filter.py -n subject -c algebra curriculum.csv
Keep rows in curriculum.csv where the subject equals the word 'algebra'
Keep rows in curriculum.csv where subject equals 'algebra'
$ python row_filter.py -n subject -E algebra curriculum.csv
Keep rows in curriculum.csv where the subject doesn't equal the word 'algebra'
Keep rows in curriculum.csv where subject doesn't equal 'algebra'
$ python row_filter.py -n subject -e algebra curriculum.csv
"""
parser = argparse.ArgumentParser(
Expand All @@ -35,93 +35,83 @@ def _cli():
parser.add_argument(
'infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin,
help='Convert this file. If not specified, read from stdin.')

parser.add_argument(
'-o', '--outfile', default=sys.stdout, type=argparse.FileType('w'),
help='Write to OUT_FILE rather than sys.stdout.')

parser.add_argument(
"-d", "--delimiter",
help="Use DELIMITER as the column delimiter. [default: %(default)s]",
default=',')
parser.add_option(
"-n", "--name",
help="Name of the columm to filter on. [default: %default]",
action="store", dest='name', default=None)
parser.add_option(
"-C", "--contains",
help="Column with name = NAME must contain CONTAINS else we kill that row. "
"[default: %default]",
action='store', dest='contains', default=None)
parser.add_option(
"-E", "--equals",
help="Column with name = NAME must equal EQUALS else we kill that row. "
"[default: %default]",
action='store', dest='equals', default=None)
parser.add_option(
"-e", "--notequals",
help="Column with name = NAME must not equal NOTEQUALS else we kill that row. "
"[default: %default]",
action='store', dest='notequals', default=None)
parser.add_option(
"-c", "--notcontains",
help="Column with name = NAME must not contain NOTCONTAINS else we kill that row."
" [default: %default]",
action='store', dest='notcontains', default=None)
parser.add_option(
"-o", "--outfilename",
help="Write to this file rather than stdout. [default: %default]",
action="store", dest='outfilename', default=None)
help="Use DELIMITER as the column delimiter in infile."
" [default: %(default)s]", default=',')

(opt, args) = parser.parse_args()
parser.add_argument(
"-n", "--name", required=True, help="Name of the columm to filter on.")

### Parse args
infilename = args[0] if args else None
spec = parser.add_mutually_exclusive_group(required=True)
spec.add_argument(
"-C", "--contains",
help="Column with name = NAME must contain CONTAINS else we kill that "
"row. ")
spec.add_argument(
"-E", "--equals",
help="Column with name = NAME must equal EQUALS else we kill that "
"row. ")
spec.add_argument(
"-c", "--not_contains",
help="Column with name = NAME must not contain NOTCONTAINS else we "
"kill that row.")
spec.add_argument(
"-e", "--not_equals",
help="Column with name = NAME must not equal NOTEQUALS else we kill "
"that row. ")

infile, outfile = common.get_inout_files(infilename, opt.outfilename, outmode='wb')
args = parser.parse_args()

column_filter(infile, outfile, opt.delimiter, opt)
for mode in ['contains', 'equals', 'not_contains', 'not_equals']:
if args.__dict__[mode]:
match_str = args.__dict__[mode]
break

common.close_files(infile, outfile)
column_filter(
args.infile, args.outfile, args.name, mode, match_str, args.delimiter)


def column_filter(infile, outfile, delimiter, opt):
def column_filter(infile, outfile, name, mode, match_str, delimiter):
"""
NOTE: Written late at night after drinking...should be refactored!
Module interface. See _cli for doc. Add doc later if needed.
"""
## Get the csv reader and writer. Use these to read/write the files.
# reader.fieldnames gives you the header
reader = csv.DictReader(infile, delimiter=delimiter)
writer = csv.DictWriter(outfile, delimiter=delimiter, fieldnames=reader.fieldnames)
writer = csv.DictWriter(
outfile, delimiter=delimiter, fieldnames=reader.fieldnames)
writer.writeheader()

mode_fun = {
'contains': _check_contains, 'not_contains': _check_not_contains,
'equals': _check_equals, 'not_equals': _check_not_equals}

## Iterate through the file, printing out lines
for row in reader:
content = row[opt.name]
if _shouldwrite(content, opt):
if mode_fun[mode](row[name], match_str):
writer.writerow(row)


def _shouldwrite(content, opt):
if opt.equals and content:
shouldwrite = content == opt.equals
elif opt.contains and content:
shouldwrite = opt.contains in content
elif opt.notequals:
if not content:
shouldwrite = True
else:
shouldwrite = content != opt.notequals
elif opt.notcontains:
if not content:
shouldwrite = True
else:
shouldwrite = opt.notcontains not in content
else:
raise ValueError(
"Unable to determine what to filter. options = %s" % opt.__dict__)

return shouldwrite
def _check_contains(item, match_str):
return match_str in item


def _check_not_contains(item, match_str):
return not _check_contains(item, match_str)


def _check_equals(item, match_str):
return match_str == item


def _check_not_equals(item, match_str):
return not _check_equals(item, match_str)



if __name__=='__main__':
Expand Down
3 changes: 3 additions & 0 deletions makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ TESTDIR=dspy/tests

all: clean test

install:
$(PYTHON) setup.py install

clean-ctags:
rm -f tags

Expand Down
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@
setup(
name=DISTNAME,
version='0.1.0dev',
packages=['dspy',],
packages=[
'dspy',
'dspy.cmd'],
scripts=['dspy/cmd/cut.py'],
license=LICENSE,
url=URL,
maintainer_email=EMAIL,
Expand Down

0 comments on commit cdc2876

Please sign in to comment.