-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtsvtojson.py
93 lines (77 loc) · 2.94 KB
/
tsvtojson.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import sys
def main(args, output):
min_oil_months = int(args[1])
with open(args[0]) as f:
hdr = f.readline().rstrip().split('\t')
data = [dict(zip(hdr, l.rstrip().split('\t'))) for l in f]
by_well = []
last_uid = ''
record = None
for d in data:
uid = d['UID']
if uid != last_uid:
if record is not None:
strip_zeros(record)
if (len(record['Oil']) > min_oil_months):
by_well.append(record)
last_uid = uid
record = dict()
record['UID'] = uid
record['API'] = d['API'] or 'null'
record['Name'] = d['Name'].replace("'", r"\'") or 'null'
record['Operator'] = d['Operator'] or 'null'
record['Latitude'] = d['Latitude'] or 'null'
record['Longitude'] = d['Longitude'] or 'null'
record['Month'] = list()
record['Oil'] = list()
record['Gas'] = list()
record['Water'] = list()
record['Month'].append(d['Month'])
record['Oil'].append(d['Oil'])
record['Gas'].append(d['Gas'])
record['Water'].append(d['Water'])
if last_uid:
strip_zeros(record)
if (len(record['Oil']) > min_oil_months):
by_well.append(record)
if len(args) == 3:
output.write('var ' + args[2] + ' = ')
output.write("{\n\t'header': [")
output.write(','.join("\n\t\t{ 'uid': '" + w['UID'] + "', 'api': '" +
w['API'] + "', 'name': '" + w['Name'] + "', 'operator': '" +
w['Operator'] + "', 'lat': " + w['Latitude'] +
", 'lon': " + w['Longitude'] + " }" for w in by_well))
output.write("\n\t],\n\t'month': [")
output.write(','.join("\n\t\t[ " + ', '.join("'" + x + "'" for x in w['Month']) +
" ]" for w in by_well))
output.write("\n\t],\n\t'oil': [")
output.write(','.join("\n\t\t[ " + ', '.join(w['Oil']) +
" ]" for w in by_well))
output.write("\n\t],\n\t'gas': [")
output.write(','.join("\n\t\t[ " + ', '.join(w['Gas']) +
" ]" for w in by_well))
output.write("\n\t],\n\t'water': [")
output.write(','.join("\n\t\t[ " + ', '.join(w['Water']) +
" ]" for w in by_well))
output.write("\n\t]\n}")
def strip_zeros(rec):
i = 0
while i < len(rec['Oil']) and rec['Oil'][i] == '0':
i += 1
rec['Month'] = rec['Month'][i:]
rec['Oil'] = rec['Oil'][i:]
rec['Gas'] = rec['Gas'][i:]
rec['Water'] = rec['Water'][i:]
i = len(rec['Oil'])
while i > 0 and rec['Oil'][i - 1] == '0':
i -= 1
rec['Month'] = rec['Month'][0:i]
rec['Oil'] = rec['Oil'][0:i]
rec['Gas'] = rec['Gas'][0:i]
rec['Water'] = rec['Water'][0:i]
if __name__ == '__main__':
if len(sys.argv) < 3 or len(sys.argv) > 4:
print('Usage: {0} tsv-file min-nonzero-oil [var-name]'.format(
sys.argv[0]), file=sys.stderr)
sys.exit(0)
sys.exit(main(sys.argv[1:], sys.stdout))