-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFormatData.py
74 lines (65 loc) · 2.03 KB
/
FormatData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import sys
import re
g = open('../VT/VT_2017-10_data.csv', 'r')
#f = open('VT_2017-12_data_summary.csv', 'w')
line = g.readline()
#print line
vars = [x.strip() for x in line.split(',')]
#vars = line.split(",")
header = {}
i = 0
for var in vars:
header[var] = i
i = i + 1
print header
print len(header)
g.close()
g = open('MASON-DATA-Dec-2017.csv','w');
i = 0
writeline = ''
for x in range(0, len(header)):
writeline = writeline + header.keys()[header.values().index(x)] + ','
writeline = writeline + '\n'
g.write(writeline)
for date in range(01,32):
f = open('../Mason/201712' + format(date,'02d') + '.anon', 'r')
line = f.readline()
while line:
vars = line.split()
jobs = []
jobVals = {}
# process var 0 and 1
jobType = vars[1].split(';')
if len(jobType) == 4:
if jobType[1] != 'E':
line = f.readline()
continue;
# pass
# not the final job entry
user = jobType[3].split('=')
if (user[0] in header):
jobVals[header[user[0]]] = user[1]
for x in range(2, len(vars)):
valPair = vars[x].split('=')
if len(valPair) == 2:
if(valPair[0] in header):
jobVals[header[valPair[0]]] = valPair[1]
if len(valPair) >= 3:
if (valPair[0] in header):
jobVals[header[valPair[0]]] = valPair[1] + '=' + valPair[2]
writeline = ''
for x in range(0, len(header)):
if(x in jobVals):
writeline = writeline + jobVals[x] + ','
else:
writeline = writeline + ','
print 'line no :' + str(i) + ' ' + str(x) + ' not found';
writeline = writeline + '\n'
g.write(writeline)
jobs.append(jobVals)
line = f.readline()
i = i + 1
#print 'lineno ' + str(i)
f.close()
g.close()
#print jobVals