-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreatePAT_extended_exec.R
executable file
·295 lines (239 loc) · 9.06 KB
/
createPAT_extended_exec.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
#!/usr/local/bin/Rscript
# This Rscript will ingest pat_<id>_latest7.rda files and use them to create pat
# files with extended time ranges: 45-day and monthly.
#
# See test/Makefile for testing options
#
# ----- . AirSensor 1.1.x . first pass
VERSION = "0.3.0"
# The following packages are attached here so they show up in the sessionInfo
suppressPackageStartupMessages({
library(MazamaCoreUtils)
library(AirSensor)
})
# ----- Get command line arguments ---------------------------------------------
if ( interactive() ) {
# RStudio session
# NOTE: Remeber to set the working directory for logging with setwd()
opt <- list(
archiveBaseDir = file.path(getwd(), "data"),
logDir = file.path(getwd(), "logs"),
version = FALSE
)
} else {
option_list <- list(
optparse::make_option(
c("-o","--archiveBaseDir"),
default = getwd(),
help = "Output base directory for generated .RData files [default = \"%default\"]"
),
optparse::make_option(
c("-l","--logDir"),
default = getwd(),
help = "Output directory for generated .log file [default = \"%default\"]"
),
optparse::make_option(
c("-V","--version"),
action="store_true",
default = FALSE,
help = "Print out version number [default = \"%default\"]"
)
)
# Parse arguments
opt <- optparse::parse_args(optparse::OptionParser(option_list=option_list))
}
# Print out version and quit
if ( opt$version ) {
cat(paste0("createPAT_extended_exec.R ",VERSION,"\n"))
quit()
}
# ----- Validate parameters ----------------------------------------------------
if ( dir.exists(opt$archiveBaseDir) ) {
setArchiveBaseDir(opt$archiveBaseDir)
} else {
stop(paste0("archiveBaseDir not found: ",opt$archiveBaseDir))
}
if ( !dir.exists(opt$logDir) )
stop(paste0("logDir not found: ",opt$logDir))
# ----- Set up logging ---------------------------------------------------------
logger.setup(
traceLog = file.path(opt$logDir, paste0("createPAT_extended_TRACE.log")),
debugLog = file.path(opt$logDir, paste0("createPAT_extended_DEBUG.log")),
infoLog = file.path(opt$logDir, paste0("createPAT_extended_INFO.log")),
errorLog = file.path(opt$logDir, paste0("createPAT_extended_ERROR.log"))
)
# For use at the very end
errorLog <- file.path(opt$logDir, paste0("createPAT_extended_ERROR.log"))
if ( interactive() ) {
logger.setLevel(TRACE)
}
# Silence other warning messages
options(warn = -1) # -1=ignore, 0=save/print, 1=print, 2=error
# Start logging
logger.info("Running createPAT_extended_exec.R version %s",VERSION)
optString <- paste(capture.output(str(opt)), collapse = "\n")
logger.debug("Script options: \n\n%s\n", optString)
sessionString <- paste(capture.output(sessionInfo()), collapse = "\n")
logger.debug("R session:\n\n%s\n", sessionString)
# ------ Get timestamps --------------------------------------------------------
tryCatch(
expr = {
# All datestamps are UTC
timezone <- "UTC"
# NOTE: Always extend month boundaries by one UTC day on each end to make
# NOTE: sure we have complete days in any local time.
# Get dates and date stamps
now <- lubridate::now(tzone = timezone)
now_m45 <- now - lubridate::ddays(45)
cur_monthStart <- lubridate::floor_date(now, "month") - lubridate::ddays(1)
cur_monthEnd <- lubridate::ceiling_date(now, "month") + lubridate::ddays(1)
cur_monthStamp <- strftime(now, "%Y%m", tz = timezone)
cur_mmStamp <- stringr::str_sub(cur_monthStamp, 5, 6)
prev_midMonth <- cur_monthStart - lubridate::ddays(14)
prev_monthStart <- lubridate::floor_date(prev_midMonth, "month") - lubridate::ddays(1)
prev_monthEnd <- lubridate::ceiling_date(prev_midMonth, "month") + lubridate::ddays(1)
prev_monthStamp <- strftime(prev_midMonth, "%Y%m", tz = timezone)
prev_mmStamp <- stringr::str_sub(prev_monthStamp, 5, 6)
cur_yearStamp <- strftime(now, "%Y", tz = timezone)
prev_yearStamp <- strftime(prev_midMonth, "%Y", tz = timezone)
},
error = function(e) {
msg <- paste('Failed to create timestamps: ', e)
logger.fatal(msg)
stop(msg)
}
)
# Set up data directories
tryCatch(
expr = {
logger.trace("Setting up data directories")
latestDataDir <- paste0(opt$archiveBaseDir, "/pat/latest")
cur_monthlyDir <- paste0(opt$archiveBaseDir, "/pat/", cur_yearStamp, '/', cur_mmStamp)
prev_monthlyDir <- paste0(opt$archiveBaseDir, "/pat/", prev_yearStamp, '/', prev_mmStamp)
logger.trace("latestDataDir = %s", latestDataDir)
logger.trace("cur_monthlyDir = %s", cur_monthlyDir)
logger.trace("prev_monthlyDir = %s", prev_monthlyDir)
if ( !dir.exists(cur_monthlyDir) )
dir.create(cur_monthlyDir, showWarnings = FALSE, recursive = TRUE)
if ( !dir.exists(prev_monthlyDir) )
dir.create(prev_monthlyDir, showWarnings = FALSE, recursive = TRUE)
},
error = function(e) {
msg <- paste('Failed to set up data directories: ', e)
logger.fatal(msg)
stop(msg)
}
)
# ------ Load PAS object -------------------------------------------------------
tryCatch(
expr = {
# SCAQMD Database
logger.info('Loading PAS data ...')
pas <- pas_load()
},
error = function(e) {
msg <- paste('Fatal PAS load Execution: ', e)
logger.fatal(msg)
stop(msg)
}
)
# Get Unique IDs
deviceDeploymentIDs <-
pas %>%
pas_getDeviceDeploymentIDs()
# ------ Create 45-day PAT objects ---------------------------------------------
tryCatch(
expr = {
# init counts
count <- 0
successCount <- 0
for( deviceDeploymentID in deviceDeploymentIDs ) {
# update count
count <- count + 1
tryCatch(
expr = {
# Contruct file paths
tryCatch(
expr = {
latest7Path <- file.path(latestDataDir, paste0("pat_", deviceDeploymentID, "_latest7.rda"))
latest45Path <- file.path(latestDataDir, paste0("pat_", deviceDeploymentID, "_latest45.rda"))
cur_monthPath <- file.path(cur_monthlyDir, paste0("pat_", deviceDeploymentID, "_", cur_monthStamp, ".rda"))
prev_monthPath <- file.path(prev_monthlyDir, paste0("pat_", deviceDeploymentID, "_", prev_monthStamp, ".rda"))
},
error = function(e) {
msg <- paste('Failed to construct file path: ', e)
logger.fatal(msg)
stop(msg)
}
)
# Load latest7 from path
if ( file.exists(latest7Path) ) {
latest7 <- get(load(latest7Path))
} else {
logger.trace("Skipping %s, missing %s", deviceDeploymentID, latest7Path)
next
}
logger.trace(
"%4d/%d Updating %s",
count,
length(deviceDeploymentIDs),
latest45Path
)
# Load latest45 from path
if ( !file.exists(latest45Path) ) {
pat_full <- latest7 # default when starting from scratch
} else {
latest45 <- get(load(latest45Path))
# NOTE: PWFSL monitors may come and go so the pwfsl_closest~ data might
# NOTE: be different in latest7 and latest45. We update the latest45
# NOTE: record to always use the latest7$pwfsl_closest~ data so that
# NOTE: pat_join() doesn't fail with:
# NOTE: "`pat` objects must be of the same monitor"
latest45$meta$pwfsl_closestMonitorID <- latest7$meta$pwfsl_closestMonitorID
latest45$meta$pwfsl_closestDistance <- latest7$meta$pwfsl_closestDistance
# Join
pat_full <- pat_join(latest45, latest7)
}
# Update the latest45 file (trimmed to day boundaries)
pat <-
pat_full %>%
pat_filterDate(now_m45, now, timezone = latest7$meta$timezone[1])
save(list = "pat", file = latest45Path)
# Update the current month file
pat <-
pat_full %>%
pat_filterDate(cur_monthStart, cur_monthEnd, timezone = timezone)
save(list = "pat", file = cur_monthPath)
# Update the previous month file until 7-days into the current month
if ( lubridate::day(now) < 7 ) {
pat <-
pat_full %>%
pat_filterDate(prev_monthStart, prev_monthEnd, timezone = timezone)
save(list = "pat", file = prev_monthPath)
}
successCount <- successCount + 1
},
error = function(e) {
logger.warn(e)
}
)
}
},
error = function(e) {
msg <- paste("Error creating extended PAT files: ", e)
logger.fatal(msg)
},
finally = {
if ( successCount == 0 ) {
logger.fatal("0 extended PAT files were generated.")
} else {
logger.info("%d extended PAT files were generated.", successCount)
logger.info("Completed successfully!")
# Guarantee that the errorLog exists
if ( !file.exists(errorLog) )
dummy <- file.create(errorLog)
}
ptm <- proc.time()
logger.info("User: %.0f, System: %.0f, Elapsed: %.0f seconds", ptm[1], ptm[2], ptm[3])
}
)