diff --git a/R/airsis_EBAMQualityControl.R b/R/airsis_EBAMQualityControl.R index 41dfbce8..001980d5 100644 --- a/R/airsis_EBAMQualityControl.R +++ b/R/airsis_EBAMQualityControl.R @@ -52,15 +52,15 @@ airsis_EBAMQualityControl <- function(df, # Latitude and longitude must be in range if (remove_Lon_zero) { - goodLonMask <- !is.na(df$Longitude) & df$Longitude >= valid_Longitude[1] & df$Longitude <= valid_Longitude[2] & df$Longitude != 0 + goodLonMask <- !is.na(df$Longitude) & (df$Longitude >= valid_Longitude[1]) & (df$Longitude <= valid_Longitude[2]) & (df$Longitude != 0) } else { - goodLonMask <- !is.na(df$Longitude) & df$Longitude >= valid_Longitude[1] & df$Longitude <= valid_Longitude[2] + goodLonMask <- !is.na(df$Longitude) & (df$Longitude >= valid_Longitude[1]) & (df$Longitude <= valid_Longitude[2]) } if (remove_Lat_zero) { - goodLatMask <- !is.na(df$Latitude) & df$Latitude >= valid_Latitude[1] & df$Latitude <= valid_Latitude[2] & df$Latitude != 0 + goodLatMask <- !is.na(df$Latitude) & (df$Latitude >= valid_Latitude[1]) & (df$Latitude <= valid_Latitude[2]) & (df$Latitude != 0) } else { - goodLatMask <- !is.na(df$Latitude) & df$Latitude >= valid_Latitude[1] & df$Latitude <= valid_Latitude[2] + goodLatMask <- !is.na(df$Latitude) & (df$Latitude >= valid_Latitude[1]) & (df$Latitude <= valid_Latitude[2]) } badRows <- !(goodLonMask & goodLatMask) @@ -88,7 +88,7 @@ airsis_EBAMQualityControl <- function(df, # ----- Type ---------------------------------------------------------------- - goodTypeMask <- !is.na(df$Type) & df$Type == "PM 2.5" + goodTypeMask <- !is.na(df$Type) & (df$Type == "PM 2.5") badRows <- !goodTypeMask badRowCount <- sum(badRows) diff --git a/R/airsis_ESAMQualityControl.R b/R/airsis_ESAMQualityControl.R index 98f1fdb2..a0290350 100644 --- a/R/airsis_ESAMQualityControl.R +++ b/R/airsis_ESAMQualityControl.R @@ -56,20 +56,20 @@ airsis_ESAMQualityControl <- function(df, # Latitude and longitude must be in range if (remove_Lon_zero) { - goodLonMask <- !is.na(df$Longitude) & df$Longitude >= valid_Longitude[1] & df$Longitude <= valid_Longitude[2] & df$Longitude != 0 + goodLonMask <- !is.na(df$Longitude) & (df$Longitude >= valid_Longitude[1]) & (df$Longitude <= valid_Longitude[2]) & (df$Longitude != 0) } else { - goodLonMask <- !is.na(df$Longitude) & df$Longitude >= valid_Longitude[1] & df$Longitude <= valid_Longitude[2] + goodLonMask <- !is.na(df$Longitude) & (df$Longitude >= valid_Longitude[1]) & (df$Longitude <= valid_Longitude[2]) } if (remove_Lat_zero) { - goodLatMask <- !is.na(df$Latitude) & df$Latitude >= valid_Latitude[1] & df$Latitude <= valid_Latitude[2] & df$Latitude != 0 + goodLatMask <- !is.na(df$Latitude) & (df$Latitude >= valid_Latitude[1]) & (df$Latitude <= valid_Latitude[2]) & (df$Latitude != 0) } else { - goodLatMask <- !is.na(df$Latitude) & df$Latitude >= valid_Latitude[1] & df$Latitude <= valid_Latitude[2] + goodLatMask <- !is.na(df$Latitude) & (df$Latitude >= valid_Latitude[1]) & (df$Latitude <= valid_Latitude[2]) } badRows <- !(goodLonMask & goodLatMask) badRowCount <- sum(badRows) - if (badRowCount > 0) { + if ( badRowCount > 0 ) { logger.info("Discarding %s rows with invalid location information", badRowCount) badLocations <- paste('(',df$Longitude[badRows],',',df$Latitude[badRows],')',sep='') logger.debug("Bad locations: %s", paste0(badLocations, collapse=", ")) diff --git a/R/monitor_subsetData.R b/R/monitor_subsetData.R index 6f60fd14..9dc32885 100644 --- a/R/monitor_subsetData.R +++ b/R/monitor_subsetData.R @@ -58,38 +58,44 @@ monitor_subsetData <- function(data, tlim=NULL, vlim=NULL, monitorIDs=NULL, } # If specified, remove any data columns that have no valid data after time range subsetting - if ( dropMonitors & !is.null(dim(data[,-1])) ) { + if ( dropMonitors ) { - anyMask <- c(TRUE, apply(data[,-1],2,function(x) { any(!is.na(x),na.rm=TRUE) })) - # Sanity check - if ( sum(anyMask) == 1 ) { - # All data missing, only 'datetime' has valid values - warning("All data are missing values.") - return(NULL) - } - data <- data[,anyMask] - - if ( !is.null(vlim) ) { - # NOTE: The apply() function converts the first argument to a matrix. - # NOTE: If we pass in a dataframe whose first column is POSIXct then things fail (Is it converting all subsequent columns to POSIXct?) - # NOTE: Therefore, we strip off the first column when we use apply() and then add it back - vlimMask <- apply(data[,-1], 2, function(x) { any((x > vlim[1]) & (x <= vlim[2]), na.rm=TRUE) } ) - data <- data[,c(TRUE,vlimMask)] - } - - } else if ( dropMonitors & is.null(dim(data[,-1])) ) { - - anyLogical <- c(TRUE, any(!is.na(data[,-1]),na.rm=TRUE)) - data <- data[,anyLogical] - - if ( !is.null(vlim) ) { + if ( ncol(data) > 2 ) { + # Multiple monitors, we can use apply() without worrying our subsetting will return a vector + + anyMask <- c(TRUE, apply(data[,-1],2,function(x) { any(!is.na(x),na.rm=TRUE) })) + # Sanity check + if ( sum(anyMask) == 1 ) { + # All data missing, only 'datetime' has valid values + warning("All data are missing values.") + return(NULL) + } + data <- data[,anyMask] + + if ( !is.null(vlim) ) { + # NOTE: The apply() function converts the first argument to a matrix. + # NOTE: If we pass in a dataframe whose first column is POSIXct then things fail (Is it converting all subsequent columns to POSIXct?) + # NOTE: Therefore, we strip off the first column when we use apply() and then add it back + vlimMask <- apply(data[,-1], 2, function(x) { any((x > vlim[1]) & (x <= vlim[2]), na.rm=TRUE) } ) + data <- data[,c(TRUE,vlimMask)] + } + + } else { + # Need to be careful with a single monitor + + anyMask <- c(TRUE, any(!is.na(data[,-1]),na.rm=TRUE)) + data <- data[,anyMask] - dataParam <- data[,-1] - - dataParam[dataParam > vlim[2] & !is.na(dataParam)] <- NA - dataParam[dataParam <= vlim[1] & !is.na(dataParam)] <- NA - - data[,-1] <- dataParam + if ( !is.null(vlim) ) { + + dataParam <- data[,-1] + + dataParam[dataParam > vlim[2] & !is.na(dataParam)] <- NA + dataParam[dataParam <= vlim[1] & !is.na(dataParam)] <- NA + + data[,-1] <- dataParam + + } } @@ -106,10 +112,7 @@ monitor_subsetData <- function(data, tlim=NULL, vlim=NULL, monitorIDs=NULL, # TODO: 2017-01-06 Are we still using rownames in the 'data' dataframe? # Add back YYYYmmddHHMM rownames discarded by dplyr::filter - rowNames <- sapply(data$datetime, function(x){ stringr::str_replace_all(x, "-", "") } ) - rowNames <- sapply(rowNames, function(x){ stringr::str_replace(x, " ", "") } ) - rowNames <- sapply(rowNames, function(x){ stringr::str_split_fixed(x, ":", 3)[1] } ) - rownames(data) <- rowNames + rownames(data) <- strftime(data$datetime, "%Y%m%d%H", tz="UTC") return(data) diff --git a/R/wrcc_EBAMQualityControl.R b/R/wrcc_EBAMQualityControl.R index 4ad9400b..d60458f9 100644 --- a/R/wrcc_EBAMQualityControl.R +++ b/R/wrcc_EBAMQualityControl.R @@ -63,20 +63,20 @@ wrcc_EBAMQualityControl <- function(df, # Latitude and longitude must be in range if (remove_Lon_zero) { - goodLonMask <- !is.na(df$GPSLon) & df$GPSLon >= valid_Longitude[1] & df$GPSLon <= valid_Longitude[2] & df$GPSLon != 0 + goodLonMask <- !is.na(df$GPSLon) & (df$GPSLon >= valid_Longitude[1]) & (df$GPSLon <= valid_Longitude[2]) & (df$GPSLon != 0) } else { - goodLonMask <- !is.na(df$GPSLon) & df$GPSLon >= valid_Longitude[1] & df$GPSLon <= valid_Longitude[2] + goodLonMask <- !is.na(df$GPSLon) & (df$GPSLon >= valid_Longitude[1]) & (df$GPSLon <= valid_Longitude[2]) } if (remove_Lat_zero) { - goodLatMask <- !is.na(df$GPSLat) & df$GPSLat >= valid_Latitude[1] & df$GPSLat <= valid_Latitude[2] & df$GPSLat != 0 + goodLatMask <- !is.na(df$GPSLat) & (df$GPSLat >= valid_Latitude[1]) & (df$GPSLat <= valid_Latitude[2]) & (df$GPSLat != 0) } else { - goodLatMask <- !is.na(df$GPSLat) & df$GPSLat >= valid_Latitude[1] & df$GPSLat <= valid_Latitude[2] + goodLatMask <- !is.na(df$GPSLat) & (df$GPSLat >= valid_Latitude[1]) & (df$GPSLat <= valid_Latitude[2]) } badRows <- !(goodLonMask & goodLatMask) badRowCount <- sum(badRows) - if (badRowCount > 0) { + if ( badRowCount > 0 ) { logger.info("Discarding %s rows with invalid location information", badRowCount) badLocations <- paste('(',df$GPSLon[badRows],',',df$GPSLat[badRows],')',sep='') logger.debug("Bad locations: %s", paste0(badLocations, collapse=", ")) @@ -94,7 +94,7 @@ wrcc_EBAMQualityControl <- function(df, # ----- Type ---------------------------------------------------------------- # Type: 0=E-BAM PM2.5, 1=E-BAM PM10, 9=E-Sampler. We only want PM2.5 measurements - goodTypeMask <- df$Type == 0 + goodTypeMask <- !is.na(df$Type) & (df$Type == 0) badRows <- !goodTypeMask badRowCount <- sum(badRows) if (badRowCount > 0) { diff --git a/R/wrcc_ESAMQualityControl.R b/R/wrcc_ESAMQualityControl.R index 407a7060..8e6c68d0 100644 --- a/R/wrcc_ESAMQualityControl.R +++ b/R/wrcc_ESAMQualityControl.R @@ -63,20 +63,20 @@ wrcc_ESAMQualityControl <- function(df, # Latitude and longitude must be in range if (remove_Lon_zero) { - goodLonMask <- !is.na(df$GPSLon) & df$GPSLon >= valid_Longitude[1] & df$GPSLon <= valid_Longitude[2] & df$GPSLon != 0 + goodLonMask <- !is.na(df$GPSLon) & df$GPSLon >= valid_Longitude[1] & df$GPSLon <= valid_Longitude[2] & (df$GPSLon != 0) } else { goodLonMask <- !is.na(df$GPSLon) & df$GPSLon >= valid_Longitude[1] & df$GPSLon <= valid_Longitude[2] } if (remove_Lat_zero) { - goodLatMask <- !is.na(df$GPSLat) & df$GPSLat >= valid_Latitude[1] & df$GPSLat <= valid_Latitude[2] & df$GPSLat != 0 + goodLatMask <- !is.na(df$GPSLat) & df$GPSLat >= valid_Latitude[1] & df$GPSLat <= valid_Latitude[2] & (df$GPSLat != 0) } else { goodLatMask <- !is.na(df$GPSLat) & df$GPSLat >= valid_Latitude[1] & df$GPSLat <= valid_Latitude[2] } badRows <- !(goodLonMask & goodLatMask) badRowCount <- sum(badRows) - if (badRowCount > 0) { + if ( badRowCount > 0 ) { logger.info("Discarding %s rows with invalid location information", badRowCount) badLocations <- paste('(',df$GPSLon[badRows],',',df$GPSLat[badRows],')',sep='') logger.debug("Bad locations: %s", paste0(badLocations, collapse=", ")) @@ -94,10 +94,10 @@ wrcc_ESAMQualityControl <- function(df, # ----- Type ---------------------------------------------------------------- # Type: 0=E-BAM PM2.5, 1=E-BAM PM10, 9=E-Sampler. We only want PM2.5 measurements - goodTypeMask <- df$Type == 9 + goodTypeMask <- !is.na(df$Type) & (df$Type == 9) badRows <- !goodTypeMask badRowCount <- sum(badRows) - if (badRowCount > 0) { + if ( badRowCount > 0 ) { logger.info("Discarding %s rows with invalid Type information", badRowCount) logger.debug("Bad Types: %s", paste0(sort(df$Type[badRows]), collapse=", ")) } diff --git a/R/wrcc_downloadData.R b/R/wrcc_downloadData.R index a1f47bd8..d4d40b54 100644 --- a/R/wrcc_downloadData.R +++ b/R/wrcc_downloadData.R @@ -27,8 +27,9 @@ # USFSRegionalMonitors <- c() # MiscellaneousMonitors <- c() -wrcc_downloadData <- function(stationID=NULL, startdate=20100101, +wrcc_downloadData <- function(startdate=20100101, enddate=strftime(lubridate::now(),"%Y%m%d",tz="GMT"), + stationID=NULL, baseUrl="http://www.wrcc.dri.edu/cgi-bin/wea_list2.pl") { # Sanity check @@ -73,8 +74,13 @@ wrcc_downloadData <- function(stationID=NULL, startdate=20100101, if ( class(rawBytes) == "character" ) { logger.debug(rawBytes) - logger.error("WRCC FTP request returns an error") - stop(rawBytes) + if ( stringr::str_detect(rawBytes, "WRCC data access information") ) { + logger.warn("No data available") + stop("No data available") + } else { + logger.error("WRCC FTP request returns an error") + stop(rawBytes) + } } # Convert raw bytes into a string diff --git a/R/wrcc_parseData.R b/R/wrcc_parseData.R index 51bf49d1..f7911f4a 100644 --- a/R/wrcc_parseData.R +++ b/R/wrcc_parseData.R @@ -77,9 +77,9 @@ wrcc_parseData <- function(fileString) { # Sanity check if ( length(monitorTypeCode) > 1 ) { - logger.warn("More than one monitor type detected: %s", paste(monitorTypeCode,sep=", ")) + logger.warn("More than one monitor type detected: %s", paste(monitorTypeCode,collapse=", ")) # Pick the most common Type - typeTable <- table(monitorTypeCode) + typeTable <- table(df$Type) monitorTypeCode <- names(typeTable)[which(typeTable == max(typeTable))] } diff --git a/man/wrcc_downloadData.Rd b/man/wrcc_downloadData.Rd index abc2cc55..c4ed2d42 100644 --- a/man/wrcc_downloadData.Rd +++ b/man/wrcc_downloadData.Rd @@ -4,17 +4,17 @@ \alias{wrcc_downloadData} \title{Download Data from WRCC} \usage{ -wrcc_downloadData(stationID = NULL, startdate = 20100101, - enddate = strftime(lubridate::now(), "\%Y\%m\%d", tz = "GMT"), +wrcc_downloadData(startdate = 20100101, enddate = strftime(lubridate::now(), + "\%Y\%m\%d", tz = "GMT"), stationID = NULL, baseUrl = "http://www.wrcc.dri.edu/cgi-bin/wea_list2.pl") } \arguments{ -\item{stationID}{station identifier (will be upcased)} - \item{startdate}{desired start date (integer or character representing YYYYMMDD[HH])} \item{enddate}{desired end date (integer or character representing YYYYMMDD[HH])} +\item{stationID}{station identifier (will be upcased)} + \item{baseUrl}{base URL for data queries} } \value{