diff --git a/json2csv.py b/json2csv.py index e70ef12..eac5e54 100755 --- a/json2csv.py +++ b/json2csv.py @@ -49,18 +49,24 @@ def __init__(self, outline): if 'collection' in outline: self.collection = outline['collection'] - def load(self, json_file): - self.process_each(json.load(json_file)) + def iterate_json(self, json_file): + json_data = json.load(json_file) - def process_each(self, data): - """Process each item of a json-loaded dict - """ - if self.collection and self.collection in data: - data = data[self.collection] + if self.collection and self.collection in json_data: + json_data = json_data[self.collection] + + for item in json_data: + yield item + + def transcribe(self, json_file, csv_writer, to_strings=True): + for item in self.iterate_json(json_file): + logging.info(item) + row = self.process_row(item) - for d in data: - logging.info(d) - self.rows.append(self.process_row(d)) + if to_strings: + row = {k: self.make_string(val) for k, val in row.items()} + + csv_writer.writerow(row) def process_row(self, item): """Process a row of json data against the key map @@ -75,13 +81,6 @@ def process_row(self, item): return row - def make_strings(self): - str_rows = [] - for row in self.rows: - str_rows.append({k: self.make_string(val) - for k, val in row.items()}) - return str_rows - def make_string(self, item): if isinstance(item, list) or isinstance(item, set) or isinstance(item, tuple): return self.SEP_CHAR.join([self.make_string(subitem) for subitem in item]) @@ -90,32 +89,19 @@ def make_string(self, item): else: return unicode(item) - def write_csv(self, filename='output.csv', make_strings=False): - """Write the processed rows to the given filename - """ - if (len(self.rows) <= 0): - raise AttributeError('No rows were loaded') - if make_strings: - out = self.make_strings() - else: - out = self.rows - with open(filename, 'wb+') as f: - writer = csv.DictWriter(f, self.key_map.keys()) - writer.writeheader() - writer.writerows(out) - class MultiLineJson2Csv(Json2Csv): - def load(self, json_file): - self.process_each(json_file) + def iterate_json(self, json_file): + """Load each line of a Mongo-like JSON file separately""" + for line in json_file: + item = json.loads(line) + + if self.collection and self.collection in item: + item = item[self.collection] + + yield item + - def process_each(self, data, collection=None): - """Load each line of an iterable collection (ie. file)""" - for line in data: - d = json.loads(line) - if self.collection in d: - d = d[self.collection] - self.rows.append(self.process_row(d)) def init_parser(): @@ -135,21 +121,22 @@ def init_parser(): return parser if __name__ == '__main__': - parser = init_parser() - args = parser.parse_args() + args = init_parser().parse_args() key_map = json.load(args.key_map) - loader = None + if args.each_line: loader = MultiLineJson2Csv(key_map) else: loader = Json2Csv(key_map) - loader.load(args.json_file) + output_csv = args.output_csv + if output_csv is None: + file_name, ext = os.path.splitext(args.json_file.name) + output_csv = file_name + '.csv' - outfile = args.output_csv - if outfile is None: - fileName, fileExtension = os.path.splitext(args.json_file.name) - outfile = fileName + '.csv' + with open(output_csv, 'wb+') as f: + writer = csv.DictWriter(f, loader.key_map.keys()) + writer.writeheader() - loader.write_csv(filename=outfile, make_strings=args.strings) + loader.transcribe(args.json_file, writer, args.strings)