diff --git a/R/dbhydro_clean.R b/R/dbhydro_clean.R index 4a79d15..de2408d 100644 --- a/R/dbhydro_clean.R +++ b/R/dbhydro_clean.R @@ -1,11 +1,16 @@ #'@name cleanwq #'@title Clean raw water quality DBHYDRO data retrievals -#'@description Remove extra columns associated with QA flags, LIMS, and District recieving. Remove QA "blanks". Convert results from long to wide format. -#'@details Current DBHYDRO practice is to return values below the MDL as 0 minus the uncertainty estimate. +#'@description Removes extra columns associated with QA flags and QA blanks which are used to check on potential sources of contamination. Converts \code{\link{getwq}} results from long (each piece of data on its own row) to \code{wide} format (each site x variable combination in its own column). #'@export #'@import reshape2 -#'@param dt data.frame output of getwq -#'@param mdl_handling character specify values to return for measurements below the minimum detection limit choice of "raw", "half", or "full". +#'@param dt data.frame output of \code{\link{getwq}} +#'@param mdl_handling character string specifying the handling of measurement values below the minimum detection limit (MDL). Example choices for this argument include: +#'\itemize{ +#'\item \code{raw}: Returns values exactly as they are stored in the database. Current practice is to return values below the MDL as 0 minus the uncertainty estimate. +#'\item \code{half}: Returns values below the MDL as half the MDL +#'\item \code{full}: Returns values below the MDL as the MDL +#'} +#' #'@examples \dontrun{ #'#check handling of values below MDL #' dt <- getwq("FLAB01", "2014-09-14", "2014-09-18", "NITRATE+NITRITE-N", raw = TRUE) @@ -56,70 +61,16 @@ cleanwq <- function(dt, mdl_handling = "raw"){ #'@name cleanhydro #'@title Clean raw hydrologic DBHYDRO data retrievals -#'@description Cleans output of gethydro query to be consistent with water quality formatting. Connects metadata header to actual measurements. +#'@description Converts output of \code{\link{gethydro}} from long (each piece of data on its own row) to wide format (each site x variable combination in its own column). Metadata (station-name, variable, measurement units) is parsed so that it is wholly contained in column names. #'@export #'@import reshape2 -#'@importFrom utils read.csv -#'@param res output of \code{\link[dbhydroR]{gethydro}} -#'@param raw logical default is FALSE, set to TRUE to return data in "long" format with all comments, qa information, and database codes included. +#'@param dt data.frame output of \code{\link[dbhydroR]{gethydro}} #'@examples #'\dontrun{ -#'servfull <- "http://my.sfwmd.gov/dbhydroplsql/web_io.report_process" -#' -#'qy <- structure(list(v_period = "uspec", v_start_date = "20130101", v_end_date = "20130202", -#' v_report_type = "format6", v_target_code = "file_csv", v_run_mode = "onLine", v_js_flag = "Y", -#' v_dbkey = "15081"), .Names = c("v_period", "v_start_date", "v_end_date", "v_report_type", -#' "v_target_code", "v_run_mode", "v_js_flag", "v_dbkey")) -#' -#'res <- httr::GET(servfull, query = qy) -#' -#'cleanhydro(res, raw = FALSE) -#'cleanhydro(res, raw = TRUE) +#'cleanhydro(gethydro(dbkey = "15081", date_min = "2013-01-01", date_max = "2013-02-02", raw = TRUE)) #'} -cleanhydro <- function(res, raw = FALSE){ - - i <- 1 - while(any(!is.na(suppressMessages(read.csv(text = httr::content(res, - "text"), skip = i, stringsAsFactors = FALSE, header = FALSE))[i, 10:16]))){ - i <- i + 1 - } - - metadata <- suppressMessages(read.csv(text = httr::content(res, "text"), - skip = 1, stringsAsFactors = FALSE))[1:(i - 1),] - - try({dt <- suppressMessages(read.csv(text = httr::content(res, "text"), - skip = i + 1, stringsAsFactors = FALSE))}, silent = TRUE) - if(class(dt) != "data.frame"){ - stop("No data found") - } - - if(!any(names(dt) == "DBKEY")){ - message("Column headings missing. Guessing...") - - names(dt) <- c("Station", "DBKEY", "Daily.Date", "Data.Value", - "Qualifer", "Revision.Date") - - if(all(is.na(as.POSIXct(strptime(dt$Daily.Date, format = "%d-%b-%Y"))))){ - message("Returning instantaneous data...") - - names(dt) <- c("Inst.Date", "DCVP", "DBKEY", "Data.Value", - "Code", "Quality.Flag") - - dt <- merge(metadata, dt) - dt$date <- as.POSIXct(strptime(dt$Inst.Date, format = "%d-%b-%Y %H:%M")) - } - }else{ - dt <- merge(metadata, dt) - dt$date <- as.POSIXct(strptime(dt$Daily.Date, format = "%d-%b-%Y")) - } - - names(dt) <- tolower(names(dt)) - - if(raw == TRUE){ - dt - }else{ +cleanhydro <- function(dt){ reshape2::dcast(dt, date ~ station + type + units, value.var = "data.value", add.missing = TRUE, fun.aggregate = mean) - } } diff --git a/R/dbhydro_get.R b/R/dbhydro_get.R index a9783dc..46c1973 100644 --- a/R/dbhydro_get.R +++ b/R/dbhydro_get.R @@ -129,7 +129,7 @@ getwq <- function(station_id = NA, date_min = NA, date_max = NA, #'@name gethydro -#'@title Retrieve DBHYDO hydrologic data +#'@title Retrieve hydrologic data from the DBHYDRO Environmental Database #'@description Retrieve hydrologic data from the DBHYDRO Environmental Database #'@param dbkey character string specifying a unique data series. See \code{\link[dbhydroR]{getdbkey}} #'@param date_min character date must be in YYYY-MM-DD format @@ -152,7 +152,7 @@ getwq <- function(station_id = NA, date_min = NA, date_max = NA, #'\item The second way to run \code{gethydro} is to specify additional arguments to \code{...} which are passed to \code{\link{getdbkey}} on-the-fly. #' #'} -#'By default, \code{gethydro} returns a cleaned output where metadata is wholly contained in the column name. This is accomplished internally by the \code{\link{cleanhydro}} function. If additional metadata such as lattitude and longitude are desired set the \code{raw} argument to \code{TRUE}. +#'By default, \code{gethydro} returns a cleaned output where metadata (station-name, variable, measurement units) is wholly contained in the column name. This is accomplished internally by the \code{\link{cleanhydro}} function. If additional metadata such as lattitude and longitude are desired set the \code{raw} argument to \code{TRUE}. #'@examples #'\dontrun{ #'#One variable/station time series @@ -207,14 +207,62 @@ gethydro <- function(dbkey = NA, date_min = NA, date_max = NA, raw = FALSE, ...) v_run_mode = "onLine", v_js_flag = "Y", v_dbkey = dbkey) res <- httr::GET(servfull, query = qy) - try({res <- cleanhydro(res, raw, ...)}, silent = TRUE) + try({res <- parse_hydro_response(res, raw, ...)}, silent = TRUE) + if(class(res) == "response"){ stop("No data found") } + if(raw == FALSE){ + res <- cleanhydro(res) + } + res } +# connect metadata header to results +parse_hydro_response <- function(res, raw = FALSE){ + + i <- 1 + while(any(!is.na(suppressMessages(read.csv(text = httr::content(res, + "text"), skip = i, stringsAsFactors = FALSE, header = FALSE))[i, 10:16]))){ + i <- i + 1 + } + + metadata <- suppressMessages(read.csv(text = httr::content(res, "text"), + skip = 1, stringsAsFactors = FALSE))[1:(i - 1),] + + try({dt <- suppressMessages(read.csv(text = httr::content(res, "text"), + skip = i + 1, stringsAsFactors = FALSE))}, silent = TRUE) + if(class(dt) != "data.frame"){ + stop("No data found") + } + + if(!any(names(dt) == "DBKEY")){ + message("Column headings missing. Guessing...") + + names(dt) <- c("Station", "DBKEY", "Daily.Date", "Data.Value", + "Qualifer", "Revision.Date") + + if(all(is.na(as.POSIXct(strptime(dt$Daily.Date, format = "%d-%b-%Y"))))){ + message("Returning instantaneous data...") + + names(dt) <- c("Inst.Date", "DCVP", "DBKEY", "Data.Value", + "Code", "Quality.Flag") + + dt <- merge(metadata, dt) + dt$date <- as.POSIXct(strptime(dt$Inst.Date, format = "%d-%b-%Y %H:%M")) + } + }else{ + dt <- merge(metadata, dt) + dt$date <- as.POSIXct(strptime(dt$Daily.Date, format = "%d-%b-%Y")) + } + + names(dt) <- tolower(names(dt)) + + dt +} + #'@name getdbkey #'@title Query dbkey information #'@description Retrieve a data.frame summary including dbkeys or a vector of dbkeys corresponding to specified parameters @@ -332,5 +380,4 @@ getdbkey <- function(category, stationid = NA, param = NA, freq = NA, }else{ res[,1] } - } diff --git a/man/cleanhydro.Rd b/man/cleanhydro.Rd index f346230..24167c3 100644 --- a/man/cleanhydro.Rd +++ b/man/cleanhydro.Rd @@ -4,29 +4,17 @@ \alias{cleanhydro} \title{Clean raw hydrologic DBHYDRO data retrievals} \usage{ -cleanhydro(res, raw = FALSE) +cleanhydro(dt) } \arguments{ -\item{res}{output of \code{\link[dbhydroR]{gethydro}}} - -\item{raw}{logical default is FALSE, set to TRUE to return data in "long" format with all comments, qa information, and database codes included.} +\item{dt}{data.frame output of \code{\link[dbhydroR]{gethydro}}} } \description{ -Cleans output of gethydro query to be consistent with water quality formatting. Connects metadata header to actual measurements. +Converts output of \code{\link{gethydro}} from long (each piece of data on its own row) to wide format (each site x variable combination in its own column). Metadata (station-name, variable, measurement units) is parsed so that it is wholly contained in column names. } \examples{ \dontrun{ -servfull <- "http://my.sfwmd.gov/dbhydroplsql/web_io.report_process" - -qy <- structure(list(v_period = "uspec", v_start_date = "20130101", v_end_date = "20130202", -v_report_type = "format6", v_target_code = "file_csv", v_run_mode = "onLine", v_js_flag = "Y", - v_dbkey = "15081"), .Names = c("v_period", "v_start_date", "v_end_date", "v_report_type", - "v_target_code", "v_run_mode", "v_js_flag", "v_dbkey")) - -res <- httr::GET(servfull, query = qy) - -cleanhydro(res, raw = FALSE) -cleanhydro(res, raw = TRUE) +cleanhydro(gethydro(dbkey = "15081", date_min = "2013-01-01", date_max = "2013-02-02", raw = TRUE)) } } diff --git a/man/cleanwq.Rd b/man/cleanwq.Rd index 6db1f42..61a07b7 100644 --- a/man/cleanwq.Rd +++ b/man/cleanwq.Rd @@ -7,15 +7,17 @@ cleanwq(dt, mdl_handling = "raw") } \arguments{ -\item{dt}{data.frame output of getwq} +\item{dt}{data.frame output of \code{\link{getwq}}} -\item{mdl_handling}{character specify values to return for measurements below the minimum detection limit choice of "raw", "half", or "full".} +\item{mdl_handling}{character string specifying the handling of measurement values below the minimum detection limit (MDL). Example choices for this argument include: +\itemize{ +\item \code{raw}: Returns values exactly as they are stored in the database. Current practice is to return values below the MDL as 0 minus the uncertainty estimate. +\item \code{half}: Returns values below the MDL as half the MDL +\item \code{full}: Returns values below the MDL as the MDL +}} } \description{ -Remove extra columns associated with QA flags, LIMS, and District recieving. Remove QA "blanks". Convert results from long to wide format. -} -\details{ -Current DBHYDRO practice is to return values below the MDL as 0 minus the uncertainty estimate. +Removes extra columns associated with QA flags and QA blanks which are used to check on potential sources of contamination. Converts \code{\link{getwq}} results from long (each piece of data on its own row) to \code{wide} format (each site x variable combination in its own column). } \examples{ \dontrun{ diff --git a/man/gethydro.Rd b/man/gethydro.Rd index da71669..876a7e9 100644 --- a/man/gethydro.Rd +++ b/man/gethydro.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/dbhydro_get.R \name{gethydro} \alias{gethydro} -\title{Retrieve DBHYDO hydrologic data} +\title{Retrieve hydrologic data from the DBHYDRO Environmental Database} \usage{ gethydro(dbkey = NA, date_min = NA, date_max = NA, raw = FALSE, ...) } @@ -34,7 +34,7 @@ Retrieve hydrologic data from the DBHYDRO Environmental Database \item The second way to run \code{gethydro} is to specify additional arguments to \code{...} which are passed to \code{\link{getdbkey}} on-the-fly. } -By default, \code{gethydro} returns a cleaned output where metadata is wholly contained in the column name. This is accomplished internally by the \code{\link{cleanhydro}} function. If additional metadata such as lattitude and longitude are desired set the \code{raw} argument to \code{TRUE}. +By default, \code{gethydro} returns a cleaned output where metadata (station-name, variable, measurement units) is wholly contained in the column name. This is accomplished internally by the \code{\link{cleanhydro}} function. If additional metadata such as lattitude and longitude are desired set the \code{raw} argument to \code{TRUE}. } \examples{ \dontrun{