-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrdahmm_eval_single.py
executable file
·231 lines (206 loc) · 8.83 KB
/
rdahmm_eval_single.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#!/usr/local/bin/python
#==========================================================================
# Given a RDAHMM Model dataset, generate necessary input files and
# execute RDAHMM evaluation command for each station, as well as produce
# necessary files for front end plotting.
# To be invoked by the overall rdahmm_eval.py using subprocess,
# Ingested data, model and destination evaluation directories are defined
# in properties.
#
# input: scripps dataset name, e.g WNAM_Clean_DetrendNeuTimeSeries_comb
# output: RDAHMM input files and evaluation results for each station
#
# usage:
# rdahmm_eval_single.py scripps_dataset_name
#
# output:
# /path/to/rdahmm/eval/daily_project_stationID_eval_date/*
#===========================================================================
import os, sys, string, glob
import sqlite3 as db
import datetime, csv
from properties import properties
import zipfile
numargv = len(sys.argv)
if numargv == 1:
sys.exit("usage: rdahmm_eval_single.py scripps_dataset_name")
elif numargv == 2:
dataset = sys.argv[1]
else:
sys.exit("Invalid number of parameters!")
data_path = properties('data_path') + "/" + dataset + "/"
model_path = properties('model_path') + "/" + dataset + "/"
eval_path = properties('eval_path') + "/" + dataset + "/"
rdahmm_bin = properties('rdahmm_bin')
today = datetime.date.today().isoformat()
if not os.path.exists(eval_path):
cmd = "mkdir -p " + eval_path
os.system(cmd)
for station in os.listdir(model_path):
stationID = station[-4:]
# check if the station still exists
reffile = model_path + "daily_project_" + stationID + "/daily_project_" + stationID + ".input.ref"
with open(reffile, "r") as f:
refline = f.read()
f.close
if "9999 9999 -9999" in refline:
continue
dbfile = data_path + stationID + ".sqlite"
if not os.path.exists(dbfile):
continue
# station eval directory is named as: daily_project_twhl_2012-05-08
stationDir = eval_path + "daily_project_" + stationID + "_" + today + "/"
if not os.path.exists(stationDir):
cmd = "mkdir -p " + stationDir
os.system(cmd)
# use station eval directory as current working directory
os.chdir(stationDir)
# copy station dbfile from data_path to eval directory
dbfile = data_path + stationID + ".sqlite"
cmd = "cp -p " + dbfile + " ."
os.system(cmd)
stationdb = stationDir + stationID + ".sqlite"
# copy model results from model_path to eval directory
modelfiles = model_path + "daily_project_" + stationID + "/daily_project_*"
cmd = "cp -p " + modelfiles + " ."
os.system(cmd)
# connect to the station database to generate evaluation input data file
conn = db.connect(stationdb)
cur = conn.cursor()
# record start and end time of current evaluation
# sql = "SELECT MIN(Timestamp) FROM StationGPSTimeSeries"
# start_epoch = cur.execute(sql).fetchone()[0]
# sql = "SELECT MAX(Timestamp) FROM StationGPSTimeSeries"
# end_epoch = cur.execute(sql).fetchone()[0]
# generate eval input file using all data up to date
sql = "SELECT North, East, Up FROM StationGPSTimeSeries ORDER BY Timestamp ASC"
rows = cur.execute(sql).fetchall()
dataCount = str(len(rows))
evalfile = stationDir + "daily_project_" + stationID + "_" + today + ".all.input"
csvWriter = csv.writer(open(evalfile, 'w'), delimiter = ' ')
csvWriter.writerows(rows)
del csvWriter
# following is commented out, since it's handled by plotting part
# write start and end time of current evaluation in files
# startfile = evalfile + ".starttime"
# endfile = evalfile + ".endtime"
# with (open(startfile, 'w')) as f:
# f.write(start_epoch)
# f.close
# with (open(endfile, 'w')) as f:
# f.write(end_epoch)
# f.close
# execute RDAHMM model command with properly replaced parameters
dimensionCount = "3"
proBaseName = "daily_project_" + stationID + "_" + today
modelBaseName = "daily_project_" + stationID
rdahmm_eval_parm = properties('rdahmm_eval_parm')
rdahmm_eval_parm = string.replace(rdahmm_eval_parm, "<proBaseName>", proBaseName)
rdahmm_eval_parm = string.replace(rdahmm_eval_parm, "<dataCount>", dataCount)
rdahmm_eval_parm = string.replace(rdahmm_eval_parm, "<dimensionCount>", dimensionCount)
rdahmm_eval_parm = string.replace(rdahmm_eval_parm, "<modelBaseName>", modelBaseName)
rdahmm_eval_cmd = rdahmm_bin + " " + rdahmm_eval_parm
#print rdahmm_eval_cmd
# os.system can be replaced with other non-blocking invocation method
# for parallelism of individual stations, but synchronization will get
# much more complicated.
os.system(rdahmm_eval_cmd)
# check if results .Q file contains 0, if yes rerun with -addstate option
qfile = proBaseName + ".Q"
with open(qfile, 'r') as qf:
qlines = qf.read()
if "0" in qlines:
rdahmm_eval_cmd = rdahmm_eval_cmd + " -addstate"
#print rdahmm_eval_cmd
os.system(rdahmm_eval_cmd)
qf.close
# start to produce plotting related files
# 1. zip file of the model results if not already exists
modelzip = eval_path + "daily_project_" + stationID + ".zip"
if not os.path.exists(modelzip):
modelname = "daily_project_" + stationID + "/"
myzip = zipfile.ZipFile(modelzip, 'w')
for filename in os.listdir(model_path+modelname):
myzip.write(filename, modelname+filename, zipfile.ZIP_DEFLATED)
myzip.close()
# 2. eval result .Q file is renamed (use copy here for now) to .all.Q
cmd = "cp -p " + proBaseName + ".Q " + proBaseName + ".all.Q"
os.system(cmd)
# 3. required .all.raw file for plot_go.sh, this is pretty silly
sql = "SELECT Timestamp, North, East, Up, Nsig, Esig, Usig, Interploated FROM StationGPSTimeSeries ORDER BY Timestamp ASC"
rows = cur.execute(sql).fetchall()
rawfile = stationDir + "daily_project_" + stationID + "_" + today + ".all.raw"
csvWriter = csv.writer(open(rawfile, 'w'), delimiter = ' ')
newrows = []
for row in rows:
newrow = list(row)
newrow.insert(0, stationID)
if newrow[-1] == 0:
newrow[1] = newrow[1] + "T12:00:00"
else:
newrow[1] = newrow[1] + "T22:22:22"
newrows.append(newrow[:-1])
csvWriter.writerows(newrows)
del csvWriter
# 4. required .plotswf.input file for plotting, again silly
plotswffile = stationDir + "daily_project_" + stationID + "_" + today + ".plotswf.input"
allQfile = stationDir + "daily_project_" + stationID + "_" + today + ".all.Q"
with open(allQfile, 'r') as qfile:
qrows = qfile.read()
qfile.close
qrows = string.split(qrows)
csvWriter = csv.writer(open(plotswffile, 'w'), delimiter = ' ')
newrows = []
for i in range(len(rows)):
newrow = list(rows[i])
# if the row is interpolated, skip
if newrow[-1] == 1:
continue
# replace Nsig/Esig/Usig with N/E/U, for plotting purpose
newrow[4:7] = newrow[1:4]
newrow.insert(0, qrows[i])
newrows.append(newrow[:-1])
csvWriter.writerows(newrows)
del csvWriter
cur.close()
conn.close()
# 5. required .dygraphs.js file for plotting, use existing
# dygraphsJsCreator.perl to generation from .plotswf.input
plotjsfile = stationDir + "daily_project_" + stationID + "_" + today + ".dygraphs.js"
dygraphsJscmd = properties('dygraphsJs')
cmd = dygraphsJscmd + " " + plotswffile + " > " + plotjsfile
os.system(cmd)
# 6. delete old evaluation result directory
evaldirs = eval_path + "daily_project_" + stationID + "_" + "*/"
# todaydir = string.replace(stationDir, "//", "/")
for dir in glob.glob(evaldirs):
if not dir == stationDir:
cmd = "rm -r " + dir
#print cmd
os.system(cmd)
#sys.exit(0)
# 6.7 delete old eval zip files
oldzips = eval_path + "daily_project_" + stationID + "_" + "*.zip"
cmd = "rm " + oldzips
#print cmd
os.system(cmd)
# 7. zip file of the evaluation directory
evalzip = eval_path + "daily_project_" + stationID + "_" + today + ".zip"
evalname = "daily_project_" + stationID + "_" + today + "/"
myzip = zipfile.ZipFile(evalzip, 'w')
for filename in os.listdir(stationDir):
myzip.write(filename, evalname+filename, zipfile.ZIP_DEFLATED)
myzip.close()
# zip file for the whole dataset, i.e. eval zips for all stations
os.chdir(eval_path)
# delete old total zip files first
oldzips = properties('eval_path') + "/" + dataset + "_" + "*.zip"
cmd = "rm " + oldzips
#print cmd
os.system(cmd)
totalzip = properties('eval_path') + "/" + dataset + "_" + today + ".zip"
myzip = zipfile.ZipFile(totalzip, 'w', allowZip64=True)
for filename in os.listdir(eval_path):
if filename[-14:] == today + ".zip":
myzip.write(filename, dataset+"_" +today+"/"+filename, zipfile.ZIP_DEFLATED)
myzip.close()