Skip to content
This repository has been archived by the owner on Jan 30, 2024. It is now read-only.

Commit

Permalink
refactor cleanhydro, add details to cleanwq and cleanhydro docs
Browse files Browse the repository at this point in the history
  • Loading branch information
jsta committed Aug 28, 2016
1 parent e66be8f commit 86436e3
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 90 deletions.
75 changes: 13 additions & 62 deletions R/dbhydro_clean.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
#'@name cleanwq
#'@title Clean raw water quality DBHYDRO data retrievals
#'@description Remove extra columns associated with QA flags, LIMS, and District recieving. Remove QA "blanks". Convert results from long to wide format.
#'@details Current DBHYDRO practice is to return values below the MDL as 0 minus the uncertainty estimate.
#'@description Removes extra columns associated with QA flags and QA blanks which are used to check on potential sources of contamination. Converts \code{\link{getwq}} results from long (each piece of data on its own row) to \code{wide} format (each site x variable combination in its own column).
#'@export
#'@import reshape2
#'@param dt data.frame output of getwq
#'@param mdl_handling character specify values to return for measurements below the minimum detection limit choice of "raw", "half", or "full".
#'@param dt data.frame output of \code{\link{getwq}}
#'@param mdl_handling character string specifying the handling of measurement values below the minimum detection limit (MDL). Example choices for this argument include:
#'\itemize{
#'\item \code{raw}: Returns values exactly as they are stored in the database. Current practice is to return values below the MDL as 0 minus the uncertainty estimate.
#'\item \code{half}: Returns values below the MDL as half the MDL
#'\item \code{full}: Returns values below the MDL as the MDL
#'}
#'
#'@examples \dontrun{
#'#check handling of values below MDL
#' dt <- getwq("FLAB01", "2014-09-14", "2014-09-18", "NITRATE+NITRITE-N", raw = TRUE)
Expand Down Expand Up @@ -56,70 +61,16 @@ cleanwq <- function(dt, mdl_handling = "raw"){

#'@name cleanhydro
#'@title Clean raw hydrologic DBHYDRO data retrievals
#'@description Cleans output of gethydro query to be consistent with water quality formatting. Connects metadata header to actual measurements.
#'@description Converts output of \code{\link{gethydro}} from long (each piece of data on its own row) to wide format (each site x variable combination in its own column). Metadata (station-name, variable, measurement units) is parsed so that it is wholly contained in column names.
#'@export
#'@import reshape2
#'@importFrom utils read.csv
#'@param res output of \code{\link[dbhydroR]{gethydro}}
#'@param raw logical default is FALSE, set to TRUE to return data in "long" format with all comments, qa information, and database codes included.
#'@param dt data.frame output of \code{\link[dbhydroR]{gethydro}}
#'@examples
#'\dontrun{
#'servfull <- "http://my.sfwmd.gov/dbhydroplsql/web_io.report_process"
#'
#'qy <- structure(list(v_period = "uspec", v_start_date = "20130101", v_end_date = "20130202",
#' v_report_type = "format6", v_target_code = "file_csv", v_run_mode = "onLine", v_js_flag = "Y",
#' v_dbkey = "15081"), .Names = c("v_period", "v_start_date", "v_end_date", "v_report_type",
#' "v_target_code", "v_run_mode", "v_js_flag", "v_dbkey"))
#'
#'res <- httr::GET(servfull, query = qy)
#'
#'cleanhydro(res, raw = FALSE)
#'cleanhydro(res, raw = TRUE)
#'cleanhydro(gethydro(dbkey = "15081", date_min = "2013-01-01", date_max = "2013-02-02", raw = TRUE))
#'}

cleanhydro <- function(res, raw = FALSE){

i <- 1
while(any(!is.na(suppressMessages(read.csv(text = httr::content(res,
"text"), skip = i, stringsAsFactors = FALSE, header = FALSE))[i, 10:16]))){
i <- i + 1
}

metadata <- suppressMessages(read.csv(text = httr::content(res, "text"),
skip = 1, stringsAsFactors = FALSE))[1:(i - 1),]

try({dt <- suppressMessages(read.csv(text = httr::content(res, "text"),
skip = i + 1, stringsAsFactors = FALSE))}, silent = TRUE)
if(class(dt) != "data.frame"){
stop("No data found")
}

if(!any(names(dt) == "DBKEY")){
message("Column headings missing. Guessing...")

names(dt) <- c("Station", "DBKEY", "Daily.Date", "Data.Value",
"Qualifer", "Revision.Date")

if(all(is.na(as.POSIXct(strptime(dt$Daily.Date, format = "%d-%b-%Y"))))){
message("Returning instantaneous data...")

names(dt) <- c("Inst.Date", "DCVP", "DBKEY", "Data.Value",
"Code", "Quality.Flag")

dt <- merge(metadata, dt)
dt$date <- as.POSIXct(strptime(dt$Inst.Date, format = "%d-%b-%Y %H:%M"))
}
}else{
dt <- merge(metadata, dt)
dt$date <- as.POSIXct(strptime(dt$Daily.Date, format = "%d-%b-%Y"))
}

names(dt) <- tolower(names(dt))

if(raw == TRUE){
dt
}else{
cleanhydro <- function(dt){
reshape2::dcast(dt, date ~ station + type + units, value.var = "data.value",
add.missing = TRUE, fun.aggregate = mean)
}
}
55 changes: 51 additions & 4 deletions R/dbhydro_get.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ getwq <- function(station_id = NA, date_min = NA, date_max = NA,


#'@name gethydro
#'@title Retrieve DBHYDO hydrologic data
#'@title Retrieve hydrologic data from the DBHYDRO Environmental Database
#'@description Retrieve hydrologic data from the DBHYDRO Environmental Database
#'@param dbkey character string specifying a unique data series. See \code{\link[dbhydroR]{getdbkey}}
#'@param date_min character date must be in YYYY-MM-DD format
Expand All @@ -152,7 +152,7 @@ getwq <- function(station_id = NA, date_min = NA, date_max = NA,
#'\item The second way to run \code{gethydro} is to specify additional arguments to \code{...} which are passed to \code{\link{getdbkey}} on-the-fly.
#'
#'}
#'By default, \code{gethydro} returns a cleaned output where metadata is wholly contained in the column name. This is accomplished internally by the \code{\link{cleanhydro}} function. If additional metadata such as lattitude and longitude are desired set the \code{raw} argument to \code{TRUE}.
#'By default, \code{gethydro} returns a cleaned output where metadata (station-name, variable, measurement units) is wholly contained in the column name. This is accomplished internally by the \code{\link{cleanhydro}} function. If additional metadata such as lattitude and longitude are desired set the \code{raw} argument to \code{TRUE}.
#'@examples
#'\dontrun{
#'#One variable/station time series
Expand Down Expand Up @@ -207,14 +207,62 @@ gethydro <- function(dbkey = NA, date_min = NA, date_max = NA, raw = FALSE, ...)
v_run_mode = "onLine", v_js_flag = "Y", v_dbkey = dbkey)

res <- httr::GET(servfull, query = qy)
try({res <- cleanhydro(res, raw, ...)}, silent = TRUE)
try({res <- parse_hydro_response(res, raw, ...)}, silent = TRUE)

if(class(res) == "response"){
stop("No data found")
}

if(raw == FALSE){
res <- cleanhydro(res)
}

res
}

# connect metadata header to results
parse_hydro_response <- function(res, raw = FALSE){

i <- 1
while(any(!is.na(suppressMessages(read.csv(text = httr::content(res,
"text"), skip = i, stringsAsFactors = FALSE, header = FALSE))[i, 10:16]))){
i <- i + 1
}

metadata <- suppressMessages(read.csv(text = httr::content(res, "text"),
skip = 1, stringsAsFactors = FALSE))[1:(i - 1),]

try({dt <- suppressMessages(read.csv(text = httr::content(res, "text"),
skip = i + 1, stringsAsFactors = FALSE))}, silent = TRUE)
if(class(dt) != "data.frame"){
stop("No data found")
}

if(!any(names(dt) == "DBKEY")){
message("Column headings missing. Guessing...")

names(dt) <- c("Station", "DBKEY", "Daily.Date", "Data.Value",
"Qualifer", "Revision.Date")

if(all(is.na(as.POSIXct(strptime(dt$Daily.Date, format = "%d-%b-%Y"))))){
message("Returning instantaneous data...")

names(dt) <- c("Inst.Date", "DCVP", "DBKEY", "Data.Value",
"Code", "Quality.Flag")

dt <- merge(metadata, dt)
dt$date <- as.POSIXct(strptime(dt$Inst.Date, format = "%d-%b-%Y %H:%M"))
}
}else{
dt <- merge(metadata, dt)
dt$date <- as.POSIXct(strptime(dt$Daily.Date, format = "%d-%b-%Y"))
}

names(dt) <- tolower(names(dt))

dt
}

#'@name getdbkey
#'@title Query dbkey information
#'@description Retrieve a data.frame summary including dbkeys or a vector of dbkeys corresponding to specified parameters
Expand Down Expand Up @@ -332,5 +380,4 @@ getdbkey <- function(category, stationid = NA, param = NA, freq = NA,
}else{
res[,1]
}

}
20 changes: 4 additions & 16 deletions man/cleanhydro.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 8 additions & 6 deletions man/cleanwq.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/gethydro.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 86436e3

Please sign in to comment.