diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..8f5506c --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,16 @@ +Package: geysertimes +Title: Geyser Data from GeyserTimes.org +Version: 0.0.0.9000 +Imports: lubridate, rappdirs, readr +Authors@R: + person(given = "Stephen", + family = "Kaluzny", + role = c("aut", "cre"), + email = "spkaluzny@gmail.com") +Description: Gets geyser eruption and observation data from the GeyserTimes + database and optionally stores it locally. +License: MIT + file LICENSE +VignetteBuilder: knitr +Encoding: UTF-8 +LazyData: true +NeedsCompilation: no diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..f04a1f2 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,6 @@ +export( + gt_get_data, + gt_load_data, + gt_path, + gt_version +) diff --git a/R/gt_get_data.R b/R/gt_get_data.R new file mode 100644 index 0000000..d123a07 --- /dev/null +++ b/R/gt_get_data.R @@ -0,0 +1,33 @@ +gt_get_data <- function(dest_folder = file.path(tempdir(), "GeyserTimes"), + overwrite=FALSE, quiet=FALSE, version=lubridate::today()) { + if(dest_folder != gt_path()) { + if(!quiet) { + message("Set dest_folder to GeyserTimes::gt_path() so that data persists between R sessions.\n") + } + } + outpath <- file.path(dest_folder, version, "eruptions_data.rds") + if(file.exists(outpath) && !overwrite) { + warning("GeyserTimes data for this version already exists on the local machine. Use the 'overwrite' argument to re-download if neccessary.") + return(invisible(outpath)) + } + outdir <- dirname(outpath) + if(!dir.exists(outdir)) { + dir.create(outdir, recursive=TRUE) + } + base_url <- "https://geysertimes.org/archive/complete/" + raw_data_file <- paste0("geysertimes_eruptions_complete_", version, ".tsv.gz") + download_data_file_path <- file.path(tempdir(), raw_data_file) + data_url <- paste0(base_url, raw_data_file) + oldOpt <- options(warn=-1) + on.exit(options(oldOpt)) + trydownload <- try( + download.file(data_url, destfile=download_data_file_path, quiet=TRUE), + silent=TRUE) + gt_tib <- readr::read_tsv(gzfile(download_data_file_path), + col_types=c("dcddddddddddddccccdddc"), quote="", progress=FALSE) + gt_tib[["eruption_time_epoch"]] <- lubridate::as_datetime(gt_tib[["eruption_time_epoch"]]) + gt_tib[["time_updated"]] <- lubridate::as_datetime(gt_tib[["time_updated"]]) + gt_tib[["time_entered"]] <- lubridate::as_datetime(gt_tib[["time_entered"]]) + saveRDS(gt_tib, file=outpath) + invisible(outpath) +} diff --git a/R/gt_load_data.R b/R/gt_load_data.R new file mode 100644 index 0000000..78bea6d --- /dev/null +++ b/R/gt_load_data.R @@ -0,0 +1,22 @@ +"gt_load_data" <- function(path=gt_path(), quiet=FALSE, version=NULL) { + if(is.null(version)) { + version <- gt_version(path, quiet=TRUE) + } + if(is.null(version)) { + if(!quiet) { + message("Cannot find any GeyserTimes data under ", path) + } + # Look in Rtmp + path <- file.path(tempdir(), "GeyserTimes") + version <- gt_version(path, quiet=TRUE) + if(is.null(version)) { + return(NULL) + } else { + if(!quiet) { + message("Loading data from ", path) + } + } + } + full_path <- file.path(path, version, "eruptions_data.rds") + readRDS(full_path) +} diff --git a/R/gt_path.R b/R/gt_path.R new file mode 100644 index 0000000..2849df8 --- /dev/null +++ b/R/gt_path.R @@ -0,0 +1,7 @@ +"gt_path" <- function(temp=FALSE) { + if(temp) { + file.path(tempdir(), "GeyserTimes") + } else { + rappdirs::user_data_dir(appname = "GeyserTimes", appauthor = "GeyserTimes") + } +} diff --git a/R/gt_version.R b/R/gt_version.R new file mode 100644 index 0000000..6eb4ccd --- /dev/null +++ b/R/gt_version.R @@ -0,0 +1,19 @@ +"gt_version" <- function(path=gt_path(), quiet=FALSE, all=FALSE) { + gt_files <- list.files(path, pattern="eruptions_data\\.rds$", recursive=TRUE) + versions <- as.Date(dirname(gt_files), format="%Y-%m-%d") + # only directories of form yyyy-mm-dd are allowed: + versions <- sort(versions[as.character(versions) == dirname(gt_files)], + decreasing=TRUE) + if(length(gt_files) < 1 || all(is.na(versions))) { + if(!quiet) { + message("Cannot find any GeyserTimes data under", path) + } + return(NULL) + } + version <- if(all) { + versions[!is.na(versions)] + } else { + versions[1] + } + version +} diff --git a/man/gt_get_data.Rd b/man/gt_get_data.Rd new file mode 100644 index 0000000..4981d85 --- /dev/null +++ b/man/gt_get_data.Rd @@ -0,0 +1,59 @@ +\name{gt_get_data} +\alias{gt_get_data} +\title{ +Download Geyser Times Data +} +\description{ +Downloads the data from geysertimes.org. +Reads the data and creates a tibble object in `dest_dir`. +} +\usage{ +gt_get_data(dest_dir = file.path(tempdir(), "GeyserTimes"), + overwrite = FALSE, quiet = FALSE, version = lubridate::today()) +} +\arguments{ + \item{dest_dir}{ +the location where the binary tibble object should be written. +The default is under the current R session's temp directory +which will disappear when the session ends. +} + \item{overwrite}{ +a logical value, +if\code{FALSE}, the data will not be downloaded again if copy of the +data, with \code{version}, already exists in \code{dest_dir}. +} + \item{quiet}{ +a logical value, if \code{TRUE}, no messages are displayed. +} + \item{version}{ +a character string giving the version of the data to download. +This should a date in the form \code{yyyy-mm-dd}. +Typically, only the version with today's date is available. +} +} +\details{ +The data is downloaded from the GeyserTimes archive web site +\url{https://geysertimes.org/archive/} to the \code{tempdir()} directory. +The data is then read with \code{readr::read_tsv} wtih appropriate +column types. +The resulting \code{tibble} object is then saved as an binary (\code{.rds}) +in \code{dest_dir}. +} +\value{ +a character string giving the full path to GeyserTimes data object. +} +\author{ +Stephen Kaluzny . +} +\note{ +Users are encouraged to set \code{dest_dir} to \code{gt_path()} to save +a persistent copy of the data. +} +\seealso{ +gt_load_data. +} +\examples{ +dpath0 <- gt_get_data() # data saved under tempdir() +dpath1 <- gt_get_data(dest_dir=gt_path()) # data saved under gt_path() +} +\keyword{geysertimes} diff --git a/man/gt_load_data.Rd b/man/gt_load_data.Rd new file mode 100644 index 0000000..1d3506a --- /dev/null +++ b/man/gt_load_data.Rd @@ -0,0 +1,49 @@ +\name{gt_load_data} +\alias{gt_load_data} +\title{ +Load the Geyser Times Data +} +\description{ +Loads the Geyser Times data that was previously downloaded by a call +to \code{gt_get_data}. +} +\usage{ +gt_load_data(path = gt_path(), quiet = FALSE, version = NULL) +} +\arguments{ + \item{path}{ +a character string, the local location where the Geyser Times data +has been written. +The default is the local permanent location given by \code{gt_path()}. +If no appropriate data is found at that location, +the function will look in the temporary location given +\code{by gt_path(temp=TRUE)}. +} + \item{quiet}{ +a logical value, if \code{TRUE}, no messages are displayed. +} + \item{version}{ +a character string giving the version of the Geyser Times data to load. +Calling \code{gt_version(path, all=TRUE)} will list all versions +available under \code{path}. +} +} +\details{ +Typically, a user would download the data once, with a call to +`gt_get_data(destdir=gt_path())`. +Subsequent R sessions can the load this downloaded data with a +call to `gt_load_data()`. +} +\value{ +a tibble containing the Geyser Times data with names: +(need to decide on appropriate names) +} +\author{ +Stephen Kaluzny +} +\seealso{ +\code{gt_get_data}. +} +\examples{ +} +\keyword{geysertimes} diff --git a/man/gt_path.Rd b/man/gt_path.Rd new file mode 100644 index 0000000..52ca31f --- /dev/null +++ b/man/gt_path.Rd @@ -0,0 +1,37 @@ +\name{gt_path} +\alias{gt_path} +\title{ +Path to GeyserTimes Local Data +} +\description{ +Returns the path where local GeyserTimes data is stored. +} +\usage{ +gt_path(temp = FALSE) +} +\arguments{ + \item{temp}{ +a logical value, if \code{TRUE}, the temporary path is returned. +This location will disappear when the R session ends. +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +a character string giving the full path where local GeyserTimes data is stored. +} +\author{ +Stephen Kaluzny +} +\note{ +%% ~~further notes~~ +} +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +# The default location for the GeyserTimes data: +gt_path() +} +\keyword{geysertimes} diff --git a/man/gt_version.Rd b/man/gt_version.Rd new file mode 100644 index 0000000..2efec59 --- /dev/null +++ b/man/gt_version.Rd @@ -0,0 +1,42 @@ +\name{gt_version} +\alias{gt_version} +\title{Version of GeyserTimes Data} +\description{ +Returns the version of the current GeyserTimes data. +This is a character string date in year-mm-dy format. +} +\usage{ +gt_version(path = gt_path(), quiet = FALSE, all = FALSE) +} +\arguments{ + \item{path}{ +the path to the GeyserTimes data. +The default is the suggested location used by `gt_get_data`. +} + \item{quiet}{ +a logical value, if \code{TRUE}, no messages are printed. +} + \item{all}{ +list all versions of the GeyserTimes data found, +not just the newest. +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +a character string listing the version(s) of GeyserTimes data +stored under `path`. +} +\author{ +Stephen Kaluzny +} +\note{ +} +\seealso{ +\code{gt_path} +} +\examples{ +gt_version() +} +\keyword{geysertimes} diff --git a/vignettes/geysertimes.Rmd b/vignettes/geysertimes.Rmd new file mode 100644 index 0000000..f6d7a0e --- /dev/null +++ b/vignettes/geysertimes.Rmd @@ -0,0 +1,85 @@ +--- +title: "Introduction to geysertimes" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Introduction to geysertimes} + %\VignetteEngine{knitr::rmarkdown} +--- + +```{r, echo = FALSE, message = FALSE} +knitr::opts_chunk$set(collapse = T, comment = "#>") +options(tibble.print_min = 4L, tibble.print_max = 4L) +``` + +# Basic Use + +Load the package +```{r library} +library("geysertimes") +``` + +## Get the Data + +The `gt_get_data` function downloads the compressed eruptions data +from `https://geysertimes.org/archive/`, +reads the data compressed data into R +and saves version of the R object +in the location specified +in the `dest_folder` argument to the function. +The default location for `dest_folder` is +`file.path(tempdir(), "GeyserTimes"))`. +This default location is used to meet the CRAN requirement of +not writing files by default to any location other than under `tempdir()`. + +```{r default_get} +defpath <- gt_get_data() +defpath +``` + +Users are encouraged to set `dest_folder` to the value given by +`gt_path()` which is a permanent location appropriate for the +user on the particular platform. + +```{r gt_path} +gt_path() +``` + +If a permanent location is used, the user only needs to get the +data once. +Using the suggested value for `dest_folder`: +```{r recommend_path} +recpath <- gt_get_data(dest_folder=gt_path()) +recpath +``` + +## Load the Data + +The `gt_load_data` is used to load the saved R object. + +```{r load01} +gtdata <- gt_load_data() +``` + +A quick look at the data: +```{r look} +dim(gtdata) +names(gtdata) +``` + +### Data Version +The data that is downloaded is versioned. +The version id is the date when the data was downloaded. + +The `gt_version()` lists the latest version of the data that +has been downloaded. +Setting `all=TRUE` will list all versions of the data that have been +downloaded. + +```{r version} +gt_version() +``` + +```{r version_all} +gt_version(all=TRUE) +``` +