From e7bb725bc77a41faec3877d86ad0ec45c0d3f288 Mon Sep 17 00:00:00 2001 From: Jonathan Callahan Date: Thu, 18 Jul 2019 14:56:37 -0700 Subject: [PATCH] clean up localExecutables --- localExecutables/README | 2 - localExecutables/airnow_combineMonths_exec.R | 146 ------------------ localExecutables/airsis_getYearlyData_exec.R | 123 --------------- .../epa_createAnnualDataframes_exec.R | 107 ------------- localExecutables/wrcc_createLibrary_exec.R | 20 --- localExecutables/wrcc_getYearlyData_exec.R | 125 --------------- {localExecutables => local_TODO}/openaq.R | 0 .../openaq_createLatestDataframes_exec.R | 0 .../openaq_createMonthlyDataframes_exec.R | 0 9 files changed, 523 deletions(-) delete mode 100644 localExecutables/README delete mode 100755 localExecutables/airnow_combineMonths_exec.R delete mode 100755 localExecutables/airsis_getYearlyData_exec.R delete mode 100755 localExecutables/epa_createAnnualDataframes_exec.R delete mode 100644 localExecutables/wrcc_createLibrary_exec.R delete mode 100755 localExecutables/wrcc_getYearlyData_exec.R rename {localExecutables => local_TODO}/openaq.R (100%) rename {localExecutables => local_TODO}/openaq_createLatestDataframes_exec.R (100%) rename {localExecutables => local_TODO}/openaq_createMonthlyDataframes_exec.R (100%) diff --git a/localExecutables/README b/localExecutables/README deleted file mode 100644 index 7cfef29b..00000000 --- a/localExecutables/README +++ /dev/null @@ -1,2 +0,0 @@ -This directory houses example executable Rscripts that can be set up in -cron jobs to regularly update data files. diff --git a/localExecutables/airnow_combineMonths_exec.R b/localExecutables/airnow_combineMonths_exec.R deleted file mode 100755 index d73d3103..00000000 --- a/localExecutables/airnow_combineMonths_exec.R +++ /dev/null @@ -1,146 +0,0 @@ -#!/usr/bin/env Rscript - -# This script will combine airnow monthly data into a yearly data file and save it as an RData file -# -# # Example -# airsis_getYearlyData_exec.R --startdate=20150101 --enddate=2016010223 --outputDir="~/Data/airsis" --fileName="airsis_2015" - -VERSION <- "0.0.1" - -suppressPackageStartupMessages({ - library(methods) - library(optparse) - library(PWFSLSmoke) -}) - -########################################## - -# For debugging -if ( FALSE ) { - - opt <- list(parameterName = "PM2.5", - year = "2017", - inputDir = "/Users/helen/Data/airnow/2017/PM25", - outputDir = "/Users/helen/Data/airnow/2017/PM25", - logDir = "/Users/helen/Data/airnow/2017/PM25") - -} - -combineAirnowMonths <- function(opt) { - - firstMonth <- TRUE - for (month in 1:12){ - - # Build the filepath - fileName <- paste0("airnow_", opt$parameterName, "_", opt$year, "_", stringr::str_pad(month, 2, pad = 0), ".RData") - filePath <- file.path(opt$inputDir, fileName) - logger.debug(paste0("loading ", filePath)) - print(paste0("loading ", filePath)) - if ( !file.exists(filePath) ) { - logger.debug(paste0("File ", filePath, " does not exist.")) - print(paste0("File ", filePath, " does not exist.")) - } else { - - # Load the correct ws_monitor object - airnowMonth <- load(filePath) - airnowMonth <- eval(parse(text = airnowMonth)) - - if ( firstMonth ) { - airnowYear <- airnowMonth - firstMonth <- FALSE - } else { - # NOTE: The way we grow the ws_monitor object is an example of what NOT to do in R but - # NOTE: we are limited by the fact that monitor_join() can only join 2 monitors. - print("combining months...") - airnowYear <- monitor_combine(list(airnowYear, airnowMonth)) - } - logger.debug(paste0("successfully combined month ", month)) - print(paste0("successfully combined month ", month)) - } - } - - # Set the name of airnowYear to fileName - if (is.null(opt$fileName)) { - fileName <- paste0('airnow_', opt$year) - } else { - fileName <- opt$fileName - } - - assign(fileName, airnowYear) - filePath <- file.path(opt$outputDir, fileName) - - # Save the RData file - save(list = fileName, file = filePath) - -} - -################################################################################ -# Main program - -# ----- Parse command line options --------------------------------------------- - -# Set up option parser -option_list <- list( - make_option(c("-n","--parameterName"),default='PM2.5', help="parameter name"), - make_option(c("-y","--year"), default=2016, help="Specify a single year to combine months for"), - make_option(c("-d","--inputDir"), default=getwd(), help="Directory containing airnow monthly .RData files [default=\"%default\"]"), - make_option(c("-o","--outputDir"), default=getwd(), help="Output directory for generated .RData files [default=\"%default\"]"), - make_option(c("-f","--fileName"), default="airnow_[year]", help="Name for the resultant .RData file"), - make_option(c("-l","--logDir"), default=getwd(), help="Output directory for generated .log file [default=\"%default\"]"), - # make_option(c("-s","--spatialDataDir"), default='~/Data/Spatial', - # help="Directory containing spatial datasets used by MazamaSpatialUtils [default=\"%default\"]"), - make_option(c("-V","--version"), action="store_true", default=FALSE, help="Print out version number [default=\"%default\"]") -) - -# Parse arguments -opt <- parse_args(OptionParser(option_list=option_list)) - -# Print out version and quit -if ( opt$version ) { - cat(paste0('createCSV_exec.R ',VERSION,'\n')) - quit() -} - -# Sanity checks -if ( !dir.exists(opt$inputDir) ) stop(paste0("inputDir not found: ",opt$downloadDir)) -if ( !dir.exists(opt$outputDir) ) stop(paste0("outputDir not found: ",opt$outputDir)) -if ( !dir.exists(opt$logDir) ) stop(paste0("logDir not found: ",opt$logDir)) -# -# # Add year subdirectories -# opt$downloadDir <- file.path(opt$downloadDir,opt$year) -# opt$outputDir <- file.path(opt$outputDir,opt$year) -# opt$logDir <- file.path(opt$logDir,opt$year) -# -# # Make sure the year subdirectories exist -# dir.create(opt$downloadDir, showWarnings=FALSE) -# dir.create(opt$outputDir, showWarnings=FALSE) -# dir.create(opt$logDir, showWarnings=FALSE) - -# Assign log file names -debugLog <- file.path(opt$logDir, paste0('airnow_combineMonths', '_DEBUG.log')) -infoLog <- file.path(opt$logDir, paste0('airnow_combineMonths', '_INFO.log')) -errorLog <- file.path(opt$logDir, paste0('airnow_combineMonths', '_ERROR.log')) - -# Set up logging -logger.setup(debugLog=debugLog, infoLog=infoLog, errorLog=errorLog) - -# Silence other warning messages -options(warn=-1) # -1=ignore, 0=save/print, 1=print, 2=error - - -# ----- Save airnow ws_monitor object as a RData file ------ - -result <- try( combineAirnowMonths(opt) ) - -if ( "try-error" %in% class(result) ) { - msg <- paste("Error combining airnow months: ", geterrmessage()) - logger.fatal(msg) -} else { - # Guarantee that the errorLog exists - if ( !file.exists(errorLog) ) dummy <- file.create(errorLog) - logger.info("Completed successfully!") -} - - - - diff --git a/localExecutables/airsis_getYearlyData_exec.R b/localExecutables/airsis_getYearlyData_exec.R deleted file mode 100755 index d1b8f70c..00000000 --- a/localExecutables/airsis_getYearlyData_exec.R +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env Rscript - -# Updated Jan 3, 2018 -# -# This script will download data for all airsis monitors for the specified time frame -# -# # Example -# airsis_getYearlyData_exec.R --startdate=20150101 --enddate=2016010223 --outputDir="~/Data/airsis" --fileName="airsis_2015" - -VERSION <- "0.0.1" - -suppressPackageStartupMessages({ - library(methods) - library(optparse) - library(PWFSLSmoke) - library(MazamaSpatialUtils) -}) - -########################################## - -saveAirsisData <- function(opt) { - - # Make a list of ws_monitor objects for all units - monitors <- list() - - for ( provider in c("USFS", "APCD", "ARB2", "EPA")) { - - unitids <- airsis_availableUnits(opt$startdate, opt$enddate, provider = provider) - logger.debug("----- retrieving monitro data from ", provider, " -----") - for ( unitid in unitids ) { - logger.debug(paste0("----- trying ", provider, " ", unitid, " -----")) - result <- try (monitor <- airsis_createMonitorObject(startdate = opt$startdate, enddate = opt$enddate, provider = provider, unitID = unitid)) - if ("try-error" %in% class(result)) { - print(paste0("error loading ", provider, " ", unitid, ": ", geterrmessage())) - } else { - monitors[[paste0(unitid, "_", provider)]] <- monitor - logger.debug("successfully loaded monitor data") - } - } - - } - - all_monitors <- monitor_combine(monitors) - - # Set the name of all_monitors to fileName - if (is.null(opt$fileName)) { - fileName <- paste0('airsis_', opt$startdate, "_", opt$enddate) - } else { - fileName <- opt$fileName - } - - assign(fileName, all_monitors) - filePath <- paste0(opt$outputDir, '/', fileName, '.RData') - - # Save the RData file - save(list = fileName, file = filePath) - -} - -################################################################################ -# Main program - -# ----- Parse command line options --------------------------------------------- - -# Set up option parser -option_list <- list( - make_option(c("--startdate"),default=NULL, help="starting date of the data to be downloaded"), - make_option(c("--enddate"), default=NULL, help="ending date of the data to be downloaded"), - make_option(c("--outputDir"), default=getwd(), help="Output directory for generated RData files [default=\"%default\"]"), - make_option(c("--fileName"), default=NULL, help="name for the RData file"), - make_option(c("--logDir"), default=getwd(), help="Output directory for generated .log file [default=\"%default\"]"), - make_option(c("--spatialDataDir"), default='~/Data/Spatial', help="Directory containing spatial datasets used by MazamaSpatialUtils [default=\"%default\"]"), - make_option(c("-V","--version"), action="store_true", default=FALSE, help="Print out version number [default\"%default\"]") -) - -# Parse arguments -opt <- parse_args(OptionParser(option_list=option_list)) - -# Print out version and quit -if ( opt$version ) { - cat(paste0('createCSV_exec.R ',VERSION,'\n')) - quit() -} - -# Sanity checks -if ( is.null(opt$startdate) ) stop(paste0("startdate is required")) -if ( is.null(opt$enddate) ) stop(paste0("enddate is required")) -if ( !file.exists(opt$outputDir) ) stop(paste0("outputDir not found: ",opt$outputDir)) -if ( !file.exists(opt$logDir) ) stop(paste0("logDir not found: ",opt$logDir)) - -# Assign log file names -debugLog <- file.path(opt$logDir, paste0('airsis_getYearlyData_', '_DEBUG.log')) -infoLog <- file.path(opt$logDir, paste0('airsis_getYearlyData_', '_INFO.log')) -errorLog <- file.path(opt$logDir, paste0('airsis_getYearlyData_', '_ERROR.log')) - -# Set up logging -logger.setup(debugLog=debugLog, infoLog=infoLog, errorLog=errorLog) - -# Silence other warning messages -options(warn=-1) # -1=ignore, 0=save/print, 1=print, 2=error - -# Set up MazamaSpatialUtils -setSpatialDataDir(opt$spatialDataDir) ##FOR bash - -loadSpatialData("NaturalEarthAdm1") - - -# ----- Save airnow ws_monitor object as a RData file ------ - -result <- try( saveAirsisData(opt) ) - -if ( "try-error" %in% class(result) ) { - msg <- paste("Error saving airsis data: ", geterrmessage()) - logger.fatal(msg) -} else { - # Guarantee that the errorLog exists - if ( !file.exists(errorLog) ) dummy <- file.create(errorLog) - logger.info("Completed successfully!") -} - - - - diff --git a/localExecutables/epa_createAnnualDataframes_exec.R b/localExecutables/epa_createAnnualDataframes_exec.R deleted file mode 100755 index 939674ab..00000000 --- a/localExecutables/epa_createAnnualDataframes_exec.R +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/local/bin/Rscript - -# This Rscript will go to fetch air-quality data for a specified year and specified parameter - -# This script is desgined to be run on demand as a cron job or 'at' job, see the example below - -# 1 2 3 4 5 /Users/jonathan/Projects/PWFSL/PWFSLSmoke/localExecutables/epa_createAnnualDataframes_exec.R --parameterName=PM2.5 --parameterCode=88101 --year=2010 --downloadDir=/Users/jonathan/Data/EPA/raw --outputDir=/Users/jonathan/Data/EPA/RData --logDir=/Users/jonathan/Data/EPA/RData - -VERSION = "1.0.1" - -library(methods) # always included for Rscripts -library(optparse) # to parse command line flags - -# The following packages are attached here so they show up in the sessionInfo -suppressPackageStartupMessages({ - library(PWFSLSmoke) -}) - -# Set up OptionParser -option_list <- list( - make_option(c("-n","--parameterName"),default='PM2.5', help="parameter name"), - make_option(c("-c","--parameterCode"), default='88101', help="a character string of parameter code"), - make_option(c("-y","--year"), default=2016, help="Specify a single year to download data for"), - make_option(c("-d","--downloadDir"), default=getwd(), help="Output directory for downloaded EPA .zip files [default=\"%default\"]"), - make_option(c("-o","--outputDir"), default=getwd(), help="Output directory for generated .RData files [default=\"%default\"]"), - make_option(c("-l","--logDir"), default=getwd(), help="Output directory for generated .log file [default=\"%default\"]"), - make_option(c("-s","--spatialDataDir"), default='~/Data/Spatial', help="Directory containing spatial datasets used by MazamaSpatialUtils [default=\"%default\"]"), - make_option(c("-V","--version"), action="store_true", default=FALSE, help="Print out version number [default=\"%default\"]") -) - -# Parse arguments -opt <- parse_args(OptionParser(option_list=option_list)) - -# For debugging -if ( FALSE ) { - - opt <- list(parameterName = "PM2.5", - parameterCode = "88101", - year = "2010", - downloadDir = "/Users/jonathan/Data/EPA/raw", - outputDir = "/Users/jonathan/Data/EPA/RData", - logDir = "/Users/jonathan/Data/EPA/RData", - spatialDataDir = "~/Data/Spatial") - -} - -# Print out version and quit -if ( opt$version ) { - cat(paste0('createGeoJSON_exec.R ',VERSION,'\n')) - quit() -} - -# Sanity checks -if ( !dir.exists(opt$downloadDir) ) stop(paste0("downloadDir not found: ",opt$downloadDir)) -if ( !dir.exists(opt$outputDir) ) stop(paste0("outputDir not found: ",opt$outputDir)) -if ( !dir.exists(opt$logDir) ) stop(paste0("logDir not found: ",opt$logDir)) - -# Add year subdirectories -opt$downloadDir <- file.path(opt$downloadDir,opt$year) -opt$outputDir <- file.path(opt$outputDir,opt$year) -opt$logDir <- file.path(opt$logDir,opt$year) - -# Make sure the year subdirectories exist -dir.create(opt$downloadDir, showWarnings=FALSE) -dir.create(opt$outputDir, showWarnings=FALSE) -dir.create(opt$logDir, showWarnings=FALSE) - -# Assign log file names -debugLog <- file.path(opt$logDir, paste0('epa_createAnnualDataframes_',opt$year,'_DEBUG.log')) -infoLog <- file.path(opt$logDir, paste0('epa_createAnnualDataframes_',opt$year,'_INFO.log')) -errorLog <- file.path(opt$logDir, paste0('epa_createAnnualDataframes_',opt$year,'_ERROR.log')) - -# Set up logging -logger.setup(debugLog=debugLog, infoLog=infoLog, errorLog=errorLog) - -# Silence other warning messages -options(warn=-1) # -1=ignore, 0=save/print, 1=print, 2=error - -# Set up MazamaSpatialUtils -setSpatialDataDir(opt$spatialDataDir) -loadSpatialData("NaturalEarthAdm1") - -logger.info('Running epa_createAnnualDataframes_exec.R version %s',VERSION) -sessionString <- paste(capture.output(sessionInfo()), collapse='\n') -logger.debug('R session:\n\n%s\n', sessionString) - - -# ------Downloading and processing data -------------------------- - -filename <- paste0('hourly','_',opt$parameterCode,'_',opt$year,'.zip') -zipFile <- file.path(opt$downloadDir, filename) -if ( !file.exists(zipFile) ) { - logger.info("Downloading EPA data...") - zipFile <- epa_downloadData(opt$year, opt$parameterCode, - downloadDir=opt$downloadDir) -} - -ws_monitor <- epa_createMonitorObject(zipFile, zeroMinimum=TRUE, addGoogleMeta=FALSE) - -# Create appropriate data object and file name and write the data to disk -basename <- paste('epa', opt$parameterName, opt$parameterCode, opt$year, sep='_') -assign(basename, ws_monitor) -filename <- paste0(basename, '.RData') -filepath <- file.path(opt$outputDir, filename) -save(list=basename, file=filepath) - -logger.info('Completed successfully!') diff --git a/localExecutables/wrcc_createLibrary_exec.R b/localExecutables/wrcc_createLibrary_exec.R deleted file mode 100644 index c9bf990f..00000000 --- a/localExecutables/wrcc_createLibrary_exec.R +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env Rscript - -defaultDataDir <- '~/Data/WRCC' -defaultOutputDir <- '~/Data/WRCC' -defaultLogLevel <- futile.logger::INFO -defaultTranscript <- 'WRCC_TRANSCRIPT.txt' - -# Set up OptionParser -option_list <- list( - optparse::make_option(c("--data"), default=defaultOutputDir, help="Directory where raw data files are located [default \"%default\"]"), - optparse::make_option(c("--output"), default=defaultOutputDir, help="Directory where data files and transcript will be written [default \"%default\"]"), - optparse::make_option(c("--log"), default=defaultLogLevel, help="The level to log at [default \"%default\"]"), - optparse::make_option(c("--logpath"), default=defaultTranscript, help="File name for the transcript log [default \"%default\"]") -) - -# Parse arguments -opt <- optparse::parse_args(optparse::OptionParser(option_list=option_list)) - -silence <- PWFSLSmoke::wrcc_buildLibrary(dataDir=opt$data, outputDir=opt$output, logLevel=opt$log, - transcript=opt$logpath) \ No newline at end of file diff --git a/localExecutables/wrcc_getYearlyData_exec.R b/localExecutables/wrcc_getYearlyData_exec.R deleted file mode 100755 index 8d6b1814..00000000 --- a/localExecutables/wrcc_getYearlyData_exec.R +++ /dev/null @@ -1,125 +0,0 @@ -#!/usr/bin/env Rscript - -# Updated Jan 3, 2018 -# -# This script will download data for all airsis monitors for the specified time frame -# -# # Example -# airsis_getYearlyData_exec.R --startdate=20150101 --enddate=2016010223 --outputDir="~/Data/airsis" --fileName="airsis_2015" - -VERSION <- "0.0.1" - -suppressPackageStartupMessages({ - library(methods) - library(optparse) - library(PWFSLSmoke) - library(MazamaSpatialUtils) -}) - -########################################## - -saveWrccData <- function(opt, unitids = c("sm11", "sm13", "sm15", "sm16", "sm17", "sm19", "sm20", "sm21", "sm22", - "sm23", "sm24", "sm65", "sm66", "sm67", "sm68", "sm96", "sm84", "sm215", - "sm216", "sm217", "e231", "e418", "e591", "e592", "e840", "e866", "e882", - "e925", "e969", "s139", "s152", "s153", "s1306", "s1307", "s269", "s278", - "s2264", "s2265", "s2922", "s2923", "s2924", "s328", "s386", - "s539", "s549", "s833", "s835", "s855", "s856", "s915", "s916", - "917", "s960", "s315", "s316", "s317", "s318", "sm25", "sm86", "sm52", - "sm65", "smf1", "smn1", "smn2", "smn3", "smy1", "smrs", "sml1", "sml2")) { - - # unitids = all possible unit ids, from https://wrcc.dri.edu/cgi-bin/smoke.pl - - monitors <- list() - - for ( unitid in unitids ) { - logger.debug(paste0("----- trying ", unitid, " -----")) - result <- try (monitor <- wrcc_createMonitorObject(startdate = opt$startdate, enddate = opt$enddate, unitID = unitid)) - if ("try-error" %in% class(result)) { - print(paste0("error loading ", unitid, ": ", geterrmessage())) - } else { - monitors[[unitid]] <- monitor - logger.debug("successfully loaded monitor data") - } - } - - all_monitors <- monitor_combine(monitors) - - # Set the name of all_monitors to fileName - if (is.null(opt$fileName)) { - fileName <- paste0('wrcc_', opt$startdate, "_", opt$enddate) - } else { - fileName <- opt$fileName - } - - assign(fileName, all_monitors) - filePath <- paste0(opt$outputDir, '/', fileName, '.RData') - - # Save the RData file - save(list = fileName, file = filePath) - -} - -################################################################################ -# Main program - -# ----- Parse command line options --------------------------------------------- - -# Set up option parser -option_list <- list( - make_option(c("--startdate"),default=NULL, help="starting date of the data to be downloaded"), - make_option(c("--enddate"), default=NULL, help="ending date of the data to be downloaded"), - make_option(c("--outputDir"), default=getwd(), help="Output directory for generated RData files [default=\"%default\"]"), - make_option(c("--fileName"), default=NULL, help="name for the RData file"), - make_option(c("--logDir"), default=getwd(), help="Output directory for generated .log file [default=\"%default\"]"), - make_option(c("--spatialDataDir"), default='~/Data/Spatial', help="Directory containing spatial datasets used by MazamaSpatialUtils [default=\"%default\"]"), - make_option(c("-V","--version"), action="store_true", default=FALSE, help="Print out version number [default\"%default\"]") -) - -# Parse arguments -opt <- parse_args(OptionParser(option_list=option_list)) - -# Print out version and quit -if ( opt$version ) { - cat(paste0('wrcc_getYearlyData_exec.R ',VERSION,'\n')) - quit() -} - -# Sanity checks -if ( is.null(opt$startdate) ) stop(paste0("startdate is required")) -if ( is.null(opt$enddate) ) stop(paste0("enddate is required")) -if ( !file.exists(opt$outputDir) ) stop(paste0("outputDir not found: ",opt$outputDir)) -if ( !file.exists(opt$logDir) ) stop(paste0("logDir not found: ",opt$logDir)) - -# Assign log file names -debugLog <- file.path(opt$logDir, paste0('wrcc_getYearlyData_', '_DEBUG.log')) -infoLog <- file.path(opt$logDir, paste0('wrcc_getYearlyData_', '_INFO.log')) -errorLog <- file.path(opt$logDir, paste0('wrcc_getYearlyData_', '_ERROR.log')) - -# Set up logging -logger.setup(debugLog=debugLog, infoLog=infoLog, errorLog=errorLog) - -# Silence other warning messages -options(warn=-1) # -1=ignore, 0=save/print, 1=print, 2=error - -# Set up MazamaSpatialUtils -setSpatialDataDir(opt$spatialDataDir) - -loadSpatialData("NaturalEarthAdm1") - - -# ----- Save airnow ws_monitor object as a RData file ------ - -result <- try( saveWrccData(opt) ) - -if ( "try-error" %in% class(result) ) { - msg <- paste("Error saving wrcc data: ", geterrmessage()) - logger.fatal(msg) -} else { - # Guarantee that the errorLog exists - if ( !file.exists(errorLog) ) dummy <- file.create(errorLog) - logger.info("Completed successfully!") -} - - - - diff --git a/localExecutables/openaq.R b/local_TODO/openaq.R similarity index 100% rename from localExecutables/openaq.R rename to local_TODO/openaq.R diff --git a/localExecutables/openaq_createLatestDataframes_exec.R b/local_TODO/openaq_createLatestDataframes_exec.R similarity index 100% rename from localExecutables/openaq_createLatestDataframes_exec.R rename to local_TODO/openaq_createLatestDataframes_exec.R diff --git a/localExecutables/openaq_createMonthlyDataframes_exec.R b/local_TODO/openaq_createMonthlyDataframes_exec.R similarity index 100% rename from localExecutables/openaq_createMonthlyDataframes_exec.R rename to local_TODO/openaq_createMonthlyDataframes_exec.R