From b1d3510cefdc46c9d6559b8fe1fa64b372f2397f Mon Sep 17 00:00:00 2001 From: spkaluzny Date: Fri, 3 May 2019 10:41:26 -0700 Subject: [PATCH 1/3] Initial package for getting and loading the data. --- geysertimes/DESCRIPTION | 16 +++ geysertimes/NAMESPACE | 6 + geysertimes/R/gt_get_data.R | 33 ++++++ geysertimes/R/gt_load_data.R | 22 ++++ geysertimes/R/gt_path.R | 7 ++ geysertimes/R/gt_version.R | 19 +++ geysertimes/man/gt_get_data.Rd | 59 +++++++++ geysertimes/man/gt_load_data.Rd | 49 ++++++++ geysertimes/man/gt_path.Rd | 37 ++++++ geysertimes/man/gt_version.Rd | 42 +++++++ geysertimes/vignettes/geysertimes.Rmd | 85 +++++++++++++ geysertimes/vignettes/geysertimes.html | 158 +++++++++++++++++++++++++ 12 files changed, 533 insertions(+) create mode 100644 geysertimes/DESCRIPTION create mode 100644 geysertimes/NAMESPACE create mode 100644 geysertimes/R/gt_get_data.R create mode 100644 geysertimes/R/gt_load_data.R create mode 100644 geysertimes/R/gt_path.R create mode 100644 geysertimes/R/gt_version.R create mode 100644 geysertimes/man/gt_get_data.Rd create mode 100644 geysertimes/man/gt_load_data.Rd create mode 100644 geysertimes/man/gt_path.Rd create mode 100644 geysertimes/man/gt_version.Rd create mode 100644 geysertimes/vignettes/geysertimes.Rmd create mode 100644 geysertimes/vignettes/geysertimes.html diff --git a/geysertimes/DESCRIPTION b/geysertimes/DESCRIPTION new file mode 100644 index 0000000..8f5506c --- /dev/null +++ b/geysertimes/DESCRIPTION @@ -0,0 +1,16 @@ +Package: geysertimes +Title: Geyser Data from GeyserTimes.org +Version: 0.0.0.9000 +Imports: lubridate, rappdirs, readr +Authors@R: + person(given = "Stephen", + family = "Kaluzny", + role = c("aut", "cre"), + email = "spkaluzny@gmail.com") +Description: Gets geyser eruption and observation data from the GeyserTimes + database and optionally stores it locally. +License: MIT + file LICENSE +VignetteBuilder: knitr +Encoding: UTF-8 +LazyData: true +NeedsCompilation: no diff --git a/geysertimes/NAMESPACE b/geysertimes/NAMESPACE new file mode 100644 index 0000000..f04a1f2 --- /dev/null +++ b/geysertimes/NAMESPACE @@ -0,0 +1,6 @@ +export( + gt_get_data, + gt_load_data, + gt_path, + gt_version +) diff --git a/geysertimes/R/gt_get_data.R b/geysertimes/R/gt_get_data.R new file mode 100644 index 0000000..d123a07 --- /dev/null +++ b/geysertimes/R/gt_get_data.R @@ -0,0 +1,33 @@ +gt_get_data <- function(dest_folder = file.path(tempdir(), "GeyserTimes"), + overwrite=FALSE, quiet=FALSE, version=lubridate::today()) { + if(dest_folder != gt_path()) { + if(!quiet) { + message("Set dest_folder to GeyserTimes::gt_path() so that data persists between R sessions.\n") + } + } + outpath <- file.path(dest_folder, version, "eruptions_data.rds") + if(file.exists(outpath) && !overwrite) { + warning("GeyserTimes data for this version already exists on the local machine. Use the 'overwrite' argument to re-download if neccessary.") + return(invisible(outpath)) + } + outdir <- dirname(outpath) + if(!dir.exists(outdir)) { + dir.create(outdir, recursive=TRUE) + } + base_url <- "https://geysertimes.org/archive/complete/" + raw_data_file <- paste0("geysertimes_eruptions_complete_", version, ".tsv.gz") + download_data_file_path <- file.path(tempdir(), raw_data_file) + data_url <- paste0(base_url, raw_data_file) + oldOpt <- options(warn=-1) + on.exit(options(oldOpt)) + trydownload <- try( + download.file(data_url, destfile=download_data_file_path, quiet=TRUE), + silent=TRUE) + gt_tib <- readr::read_tsv(gzfile(download_data_file_path), + col_types=c("dcddddddddddddccccdddc"), quote="", progress=FALSE) + gt_tib[["eruption_time_epoch"]] <- lubridate::as_datetime(gt_tib[["eruption_time_epoch"]]) + gt_tib[["time_updated"]] <- lubridate::as_datetime(gt_tib[["time_updated"]]) + gt_tib[["time_entered"]] <- lubridate::as_datetime(gt_tib[["time_entered"]]) + saveRDS(gt_tib, file=outpath) + invisible(outpath) +} diff --git a/geysertimes/R/gt_load_data.R b/geysertimes/R/gt_load_data.R new file mode 100644 index 0000000..78bea6d --- /dev/null +++ b/geysertimes/R/gt_load_data.R @@ -0,0 +1,22 @@ +"gt_load_data" <- function(path=gt_path(), quiet=FALSE, version=NULL) { + if(is.null(version)) { + version <- gt_version(path, quiet=TRUE) + } + if(is.null(version)) { + if(!quiet) { + message("Cannot find any GeyserTimes data under ", path) + } + # Look in Rtmp + path <- file.path(tempdir(), "GeyserTimes") + version <- gt_version(path, quiet=TRUE) + if(is.null(version)) { + return(NULL) + } else { + if(!quiet) { + message("Loading data from ", path) + } + } + } + full_path <- file.path(path, version, "eruptions_data.rds") + readRDS(full_path) +} diff --git a/geysertimes/R/gt_path.R b/geysertimes/R/gt_path.R new file mode 100644 index 0000000..2849df8 --- /dev/null +++ b/geysertimes/R/gt_path.R @@ -0,0 +1,7 @@ +"gt_path" <- function(temp=FALSE) { + if(temp) { + file.path(tempdir(), "GeyserTimes") + } else { + rappdirs::user_data_dir(appname = "GeyserTimes", appauthor = "GeyserTimes") + } +} diff --git a/geysertimes/R/gt_version.R b/geysertimes/R/gt_version.R new file mode 100644 index 0000000..6eb4ccd --- /dev/null +++ b/geysertimes/R/gt_version.R @@ -0,0 +1,19 @@ +"gt_version" <- function(path=gt_path(), quiet=FALSE, all=FALSE) { + gt_files <- list.files(path, pattern="eruptions_data\\.rds$", recursive=TRUE) + versions <- as.Date(dirname(gt_files), format="%Y-%m-%d") + # only directories of form yyyy-mm-dd are allowed: + versions <- sort(versions[as.character(versions) == dirname(gt_files)], + decreasing=TRUE) + if(length(gt_files) < 1 || all(is.na(versions))) { + if(!quiet) { + message("Cannot find any GeyserTimes data under", path) + } + return(NULL) + } + version <- if(all) { + versions[!is.na(versions)] + } else { + versions[1] + } + version +} diff --git a/geysertimes/man/gt_get_data.Rd b/geysertimes/man/gt_get_data.Rd new file mode 100644 index 0000000..4981d85 --- /dev/null +++ b/geysertimes/man/gt_get_data.Rd @@ -0,0 +1,59 @@ +\name{gt_get_data} +\alias{gt_get_data} +\title{ +Download Geyser Times Data +} +\description{ +Downloads the data from geysertimes.org. +Reads the data and creates a tibble object in `dest_dir`. +} +\usage{ +gt_get_data(dest_dir = file.path(tempdir(), "GeyserTimes"), + overwrite = FALSE, quiet = FALSE, version = lubridate::today()) +} +\arguments{ + \item{dest_dir}{ +the location where the binary tibble object should be written. +The default is under the current R session's temp directory +which will disappear when the session ends. +} + \item{overwrite}{ +a logical value, +if\code{FALSE}, the data will not be downloaded again if copy of the +data, with \code{version}, already exists in \code{dest_dir}. +} + \item{quiet}{ +a logical value, if \code{TRUE}, no messages are displayed. +} + \item{version}{ +a character string giving the version of the data to download. +This should a date in the form \code{yyyy-mm-dd}. +Typically, only the version with today's date is available. +} +} +\details{ +The data is downloaded from the GeyserTimes archive web site +\url{https://geysertimes.org/archive/} to the \code{tempdir()} directory. +The data is then read with \code{readr::read_tsv} wtih appropriate +column types. +The resulting \code{tibble} object is then saved as an binary (\code{.rds}) +in \code{dest_dir}. +} +\value{ +a character string giving the full path to GeyserTimes data object. +} +\author{ +Stephen Kaluzny . +} +\note{ +Users are encouraged to set \code{dest_dir} to \code{gt_path()} to save +a persistent copy of the data. +} +\seealso{ +gt_load_data. +} +\examples{ +dpath0 <- gt_get_data() # data saved under tempdir() +dpath1 <- gt_get_data(dest_dir=gt_path()) # data saved under gt_path() +} +\keyword{geysertimes} diff --git a/geysertimes/man/gt_load_data.Rd b/geysertimes/man/gt_load_data.Rd new file mode 100644 index 0000000..1d3506a --- /dev/null +++ b/geysertimes/man/gt_load_data.Rd @@ -0,0 +1,49 @@ +\name{gt_load_data} +\alias{gt_load_data} +\title{ +Load the Geyser Times Data +} +\description{ +Loads the Geyser Times data that was previously downloaded by a call +to \code{gt_get_data}. +} +\usage{ +gt_load_data(path = gt_path(), quiet = FALSE, version = NULL) +} +\arguments{ + \item{path}{ +a character string, the local location where the Geyser Times data +has been written. +The default is the local permanent location given by \code{gt_path()}. +If no appropriate data is found at that location, +the function will look in the temporary location given +\code{by gt_path(temp=TRUE)}. +} + \item{quiet}{ +a logical value, if \code{TRUE}, no messages are displayed. +} + \item{version}{ +a character string giving the version of the Geyser Times data to load. +Calling \code{gt_version(path, all=TRUE)} will list all versions +available under \code{path}. +} +} +\details{ +Typically, a user would download the data once, with a call to +`gt_get_data(destdir=gt_path())`. +Subsequent R sessions can the load this downloaded data with a +call to `gt_load_data()`. +} +\value{ +a tibble containing the Geyser Times data with names: +(need to decide on appropriate names) +} +\author{ +Stephen Kaluzny +} +\seealso{ +\code{gt_get_data}. +} +\examples{ +} +\keyword{geysertimes} diff --git a/geysertimes/man/gt_path.Rd b/geysertimes/man/gt_path.Rd new file mode 100644 index 0000000..52ca31f --- /dev/null +++ b/geysertimes/man/gt_path.Rd @@ -0,0 +1,37 @@ +\name{gt_path} +\alias{gt_path} +\title{ +Path to GeyserTimes Local Data +} +\description{ +Returns the path where local GeyserTimes data is stored. +} +\usage{ +gt_path(temp = FALSE) +} +\arguments{ + \item{temp}{ +a logical value, if \code{TRUE}, the temporary path is returned. +This location will disappear when the R session ends. +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +a character string giving the full path where local GeyserTimes data is stored. +} +\author{ +Stephen Kaluzny +} +\note{ +%% ~~further notes~~ +} +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +# The default location for the GeyserTimes data: +gt_path() +} +\keyword{geysertimes} diff --git a/geysertimes/man/gt_version.Rd b/geysertimes/man/gt_version.Rd new file mode 100644 index 0000000..2efec59 --- /dev/null +++ b/geysertimes/man/gt_version.Rd @@ -0,0 +1,42 @@ +\name{gt_version} +\alias{gt_version} +\title{Version of GeyserTimes Data} +\description{ +Returns the version of the current GeyserTimes data. +This is a character string date in year-mm-dy format. +} +\usage{ +gt_version(path = gt_path(), quiet = FALSE, all = FALSE) +} +\arguments{ + \item{path}{ +the path to the GeyserTimes data. +The default is the suggested location used by `gt_get_data`. +} + \item{quiet}{ +a logical value, if \code{TRUE}, no messages are printed. +} + \item{all}{ +list all versions of the GeyserTimes data found, +not just the newest. +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +a character string listing the version(s) of GeyserTimes data +stored under `path`. +} +\author{ +Stephen Kaluzny +} +\note{ +} +\seealso{ +\code{gt_path} +} +\examples{ +gt_version() +} +\keyword{geysertimes} diff --git a/geysertimes/vignettes/geysertimes.Rmd b/geysertimes/vignettes/geysertimes.Rmd new file mode 100644 index 0000000..f6d7a0e --- /dev/null +++ b/geysertimes/vignettes/geysertimes.Rmd @@ -0,0 +1,85 @@ +--- +title: "Introduction to geysertimes" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Introduction to geysertimes} + %\VignetteEngine{knitr::rmarkdown} +--- + +```{r, echo = FALSE, message = FALSE} +knitr::opts_chunk$set(collapse = T, comment = "#>") +options(tibble.print_min = 4L, tibble.print_max = 4L) +``` + +# Basic Use + +Load the package +```{r library} +library("geysertimes") +``` + +## Get the Data + +The `gt_get_data` function downloads the compressed eruptions data +from `https://geysertimes.org/archive/`, +reads the data compressed data into R +and saves version of the R object +in the location specified +in the `dest_folder` argument to the function. +The default location for `dest_folder` is +`file.path(tempdir(), "GeyserTimes"))`. +This default location is used to meet the CRAN requirement of +not writing files by default to any location other than under `tempdir()`. + +```{r default_get} +defpath <- gt_get_data() +defpath +``` + +Users are encouraged to set `dest_folder` to the value given by +`gt_path()` which is a permanent location appropriate for the +user on the particular platform. + +```{r gt_path} +gt_path() +``` + +If a permanent location is used, the user only needs to get the +data once. +Using the suggested value for `dest_folder`: +```{r recommend_path} +recpath <- gt_get_data(dest_folder=gt_path()) +recpath +``` + +## Load the Data + +The `gt_load_data` is used to load the saved R object. + +```{r load01} +gtdata <- gt_load_data() +``` + +A quick look at the data: +```{r look} +dim(gtdata) +names(gtdata) +``` + +### Data Version +The data that is downloaded is versioned. +The version id is the date when the data was downloaded. + +The `gt_version()` lists the latest version of the data that +has been downloaded. +Setting `all=TRUE` will list all versions of the data that have been +downloaded. + +```{r version} +gt_version() +``` + +```{r version_all} +gt_version(all=TRUE) +``` + diff --git a/geysertimes/vignettes/geysertimes.html b/geysertimes/vignettes/geysertimes.html new file mode 100644 index 0000000..26c0039 --- /dev/null +++ b/geysertimes/vignettes/geysertimes.html @@ -0,0 +1,158 @@ + + + + + + + + + + + + + + +Introduction to geysertimes + + + + + + + + + + + + + + + + + + + + + +

Introduction to geysertimes

+ + + +
+

Basic Use

+

Load the package

+
library("geysertimes")
+
+

Get the Data

+

The gt_get_data function downloads the compressed eruptions data from https://geysertimes.org/archive/, reads the data compressed data into R and saves version of the R object in the location specified in the dest_folder argument to the function. The default location for dest_folder is file.path(tempdir(), "GeyserTimes")). This default location is used to meet the CRAN requirement of not writing files by default to any location other than under tempdir().

+
defpath <- gt_get_data()
+#> Set dest_folder to GeyserTimes::gt_path() so that data persists between R sessions.
+defpath
+#> [1] "/tmp/Rtmpj41IjU/GeyserTimes/2019-05-03/eruptions_data.rds"
+

Users are encouraged to set dest_folder to the value given by gt_path() which is a permanent location appropriate for the user on the particular platform.

+
gt_path()
+#> [1] "/home/spk/.local/share/GeyserTimes"
+

If a permanent location is used, the user only needs to get the data once. Using the suggested value for dest_folder:

+
recpath <- gt_get_data(dest_folder=gt_path())
+recpath
+#> [1] "/home/spk/.local/share/GeyserTimes/2019-05-03/eruptions_data.rds"
+
+
+

Load the Data

+

The gt_load_data is used to load the saved R object.

+
gtdata <- gt_load_data()
+

A quick look at the data:

+
dim(gtdata)
+#> [1] 1064341      22
+names(gtdata)
+#>  [1] "eruptionID"           "geyser"               "eruption_time_epoch" 
+#>  [4] "has_seconds"          "exact"                "ns"                  
+#>  [7] "ie"                   "E"                    "A"                   
+#> [10] "wc"                   "ini"                  "maj"                 
+#> [13] "min"                  "q"                    "duration"            
+#> [16] "entrant"              "observer"             "eruption_comment"    
+#> [19] "time_updated"         "time_entered"         "associated_primaryID"
+#> [22] "other_comments"
+
+

Data Version

+

The data that is downloaded is versioned. The version id is the date when the data was downloaded.

+

The gt_version() lists the latest version of the data that has been downloaded. Setting all=TRUE will list all versions of the data that have been downloaded.

+
gt_version()
+#> [1] "2019-05-03"
+
gt_version(all=TRUE)
+#> [1] "2019-05-03" "2019-05-02"
+
+
+
+ + + + + + + + From 4e05c912ee5aab95483004c98f1bd7f3ce591933 Mon Sep 17 00:00:00 2001 From: spkaluzny Date: Fri, 10 May 2019 16:58:37 -0700 Subject: [PATCH 2/3] Move all files to geysertimes-r-package --- DESCRIPTION | 16 ++++++++ NAMESPACE | 6 +++ R/gt_get_data.R | 33 +++++++++++++++ R/gt_load_data.R | 22 ++++++++++ R/gt_path.R | 7 ++++ R/gt_version.R | 19 +++++++++ man/gt_get_data.Rd | 59 +++++++++++++++++++++++++++ man/gt_load_data.Rd | 49 ++++++++++++++++++++++ man/gt_path.Rd | 37 +++++++++++++++++ man/gt_version.Rd | 42 +++++++++++++++++++ vignettes/geysertimes.Rmd | 85 +++++++++++++++++++++++++++++++++++++++ 11 files changed, 375 insertions(+) create mode 100644 DESCRIPTION create mode 100644 NAMESPACE create mode 100644 R/gt_get_data.R create mode 100644 R/gt_load_data.R create mode 100644 R/gt_path.R create mode 100644 R/gt_version.R create mode 100644 man/gt_get_data.Rd create mode 100644 man/gt_load_data.Rd create mode 100644 man/gt_path.Rd create mode 100644 man/gt_version.Rd create mode 100644 vignettes/geysertimes.Rmd diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..8f5506c --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,16 @@ +Package: geysertimes +Title: Geyser Data from GeyserTimes.org +Version: 0.0.0.9000 +Imports: lubridate, rappdirs, readr +Authors@R: + person(given = "Stephen", + family = "Kaluzny", + role = c("aut", "cre"), + email = "spkaluzny@gmail.com") +Description: Gets geyser eruption and observation data from the GeyserTimes + database and optionally stores it locally. +License: MIT + file LICENSE +VignetteBuilder: knitr +Encoding: UTF-8 +LazyData: true +NeedsCompilation: no diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..f04a1f2 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,6 @@ +export( + gt_get_data, + gt_load_data, + gt_path, + gt_version +) diff --git a/R/gt_get_data.R b/R/gt_get_data.R new file mode 100644 index 0000000..d123a07 --- /dev/null +++ b/R/gt_get_data.R @@ -0,0 +1,33 @@ +gt_get_data <- function(dest_folder = file.path(tempdir(), "GeyserTimes"), + overwrite=FALSE, quiet=FALSE, version=lubridate::today()) { + if(dest_folder != gt_path()) { + if(!quiet) { + message("Set dest_folder to GeyserTimes::gt_path() so that data persists between R sessions.\n") + } + } + outpath <- file.path(dest_folder, version, "eruptions_data.rds") + if(file.exists(outpath) && !overwrite) { + warning("GeyserTimes data for this version already exists on the local machine. Use the 'overwrite' argument to re-download if neccessary.") + return(invisible(outpath)) + } + outdir <- dirname(outpath) + if(!dir.exists(outdir)) { + dir.create(outdir, recursive=TRUE) + } + base_url <- "https://geysertimes.org/archive/complete/" + raw_data_file <- paste0("geysertimes_eruptions_complete_", version, ".tsv.gz") + download_data_file_path <- file.path(tempdir(), raw_data_file) + data_url <- paste0(base_url, raw_data_file) + oldOpt <- options(warn=-1) + on.exit(options(oldOpt)) + trydownload <- try( + download.file(data_url, destfile=download_data_file_path, quiet=TRUE), + silent=TRUE) + gt_tib <- readr::read_tsv(gzfile(download_data_file_path), + col_types=c("dcddddddddddddccccdddc"), quote="", progress=FALSE) + gt_tib[["eruption_time_epoch"]] <- lubridate::as_datetime(gt_tib[["eruption_time_epoch"]]) + gt_tib[["time_updated"]] <- lubridate::as_datetime(gt_tib[["time_updated"]]) + gt_tib[["time_entered"]] <- lubridate::as_datetime(gt_tib[["time_entered"]]) + saveRDS(gt_tib, file=outpath) + invisible(outpath) +} diff --git a/R/gt_load_data.R b/R/gt_load_data.R new file mode 100644 index 0000000..78bea6d --- /dev/null +++ b/R/gt_load_data.R @@ -0,0 +1,22 @@ +"gt_load_data" <- function(path=gt_path(), quiet=FALSE, version=NULL) { + if(is.null(version)) { + version <- gt_version(path, quiet=TRUE) + } + if(is.null(version)) { + if(!quiet) { + message("Cannot find any GeyserTimes data under ", path) + } + # Look in Rtmp + path <- file.path(tempdir(), "GeyserTimes") + version <- gt_version(path, quiet=TRUE) + if(is.null(version)) { + return(NULL) + } else { + if(!quiet) { + message("Loading data from ", path) + } + } + } + full_path <- file.path(path, version, "eruptions_data.rds") + readRDS(full_path) +} diff --git a/R/gt_path.R b/R/gt_path.R new file mode 100644 index 0000000..2849df8 --- /dev/null +++ b/R/gt_path.R @@ -0,0 +1,7 @@ +"gt_path" <- function(temp=FALSE) { + if(temp) { + file.path(tempdir(), "GeyserTimes") + } else { + rappdirs::user_data_dir(appname = "GeyserTimes", appauthor = "GeyserTimes") + } +} diff --git a/R/gt_version.R b/R/gt_version.R new file mode 100644 index 0000000..6eb4ccd --- /dev/null +++ b/R/gt_version.R @@ -0,0 +1,19 @@ +"gt_version" <- function(path=gt_path(), quiet=FALSE, all=FALSE) { + gt_files <- list.files(path, pattern="eruptions_data\\.rds$", recursive=TRUE) + versions <- as.Date(dirname(gt_files), format="%Y-%m-%d") + # only directories of form yyyy-mm-dd are allowed: + versions <- sort(versions[as.character(versions) == dirname(gt_files)], + decreasing=TRUE) + if(length(gt_files) < 1 || all(is.na(versions))) { + if(!quiet) { + message("Cannot find any GeyserTimes data under", path) + } + return(NULL) + } + version <- if(all) { + versions[!is.na(versions)] + } else { + versions[1] + } + version +} diff --git a/man/gt_get_data.Rd b/man/gt_get_data.Rd new file mode 100644 index 0000000..4981d85 --- /dev/null +++ b/man/gt_get_data.Rd @@ -0,0 +1,59 @@ +\name{gt_get_data} +\alias{gt_get_data} +\title{ +Download Geyser Times Data +} +\description{ +Downloads the data from geysertimes.org. +Reads the data and creates a tibble object in `dest_dir`. +} +\usage{ +gt_get_data(dest_dir = file.path(tempdir(), "GeyserTimes"), + overwrite = FALSE, quiet = FALSE, version = lubridate::today()) +} +\arguments{ + \item{dest_dir}{ +the location where the binary tibble object should be written. +The default is under the current R session's temp directory +which will disappear when the session ends. +} + \item{overwrite}{ +a logical value, +if\code{FALSE}, the data will not be downloaded again if copy of the +data, with \code{version}, already exists in \code{dest_dir}. +} + \item{quiet}{ +a logical value, if \code{TRUE}, no messages are displayed. +} + \item{version}{ +a character string giving the version of the data to download. +This should a date in the form \code{yyyy-mm-dd}. +Typically, only the version with today's date is available. +} +} +\details{ +The data is downloaded from the GeyserTimes archive web site +\url{https://geysertimes.org/archive/} to the \code{tempdir()} directory. +The data is then read with \code{readr::read_tsv} wtih appropriate +column types. +The resulting \code{tibble} object is then saved as an binary (\code{.rds}) +in \code{dest_dir}. +} +\value{ +a character string giving the full path to GeyserTimes data object. +} +\author{ +Stephen Kaluzny . +} +\note{ +Users are encouraged to set \code{dest_dir} to \code{gt_path()} to save +a persistent copy of the data. +} +\seealso{ +gt_load_data. +} +\examples{ +dpath0 <- gt_get_data() # data saved under tempdir() +dpath1 <- gt_get_data(dest_dir=gt_path()) # data saved under gt_path() +} +\keyword{geysertimes} diff --git a/man/gt_load_data.Rd b/man/gt_load_data.Rd new file mode 100644 index 0000000..1d3506a --- /dev/null +++ b/man/gt_load_data.Rd @@ -0,0 +1,49 @@ +\name{gt_load_data} +\alias{gt_load_data} +\title{ +Load the Geyser Times Data +} +\description{ +Loads the Geyser Times data that was previously downloaded by a call +to \code{gt_get_data}. +} +\usage{ +gt_load_data(path = gt_path(), quiet = FALSE, version = NULL) +} +\arguments{ + \item{path}{ +a character string, the local location where the Geyser Times data +has been written. +The default is the local permanent location given by \code{gt_path()}. +If no appropriate data is found at that location, +the function will look in the temporary location given +\code{by gt_path(temp=TRUE)}. +} + \item{quiet}{ +a logical value, if \code{TRUE}, no messages are displayed. +} + \item{version}{ +a character string giving the version of the Geyser Times data to load. +Calling \code{gt_version(path, all=TRUE)} will list all versions +available under \code{path}. +} +} +\details{ +Typically, a user would download the data once, with a call to +`gt_get_data(destdir=gt_path())`. +Subsequent R sessions can the load this downloaded data with a +call to `gt_load_data()`. +} +\value{ +a tibble containing the Geyser Times data with names: +(need to decide on appropriate names) +} +\author{ +Stephen Kaluzny +} +\seealso{ +\code{gt_get_data}. +} +\examples{ +} +\keyword{geysertimes} diff --git a/man/gt_path.Rd b/man/gt_path.Rd new file mode 100644 index 0000000..52ca31f --- /dev/null +++ b/man/gt_path.Rd @@ -0,0 +1,37 @@ +\name{gt_path} +\alias{gt_path} +\title{ +Path to GeyserTimes Local Data +} +\description{ +Returns the path where local GeyserTimes data is stored. +} +\usage{ +gt_path(temp = FALSE) +} +\arguments{ + \item{temp}{ +a logical value, if \code{TRUE}, the temporary path is returned. +This location will disappear when the R session ends. +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +a character string giving the full path where local GeyserTimes data is stored. +} +\author{ +Stephen Kaluzny +} +\note{ +%% ~~further notes~~ +} +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +# The default location for the GeyserTimes data: +gt_path() +} +\keyword{geysertimes} diff --git a/man/gt_version.Rd b/man/gt_version.Rd new file mode 100644 index 0000000..2efec59 --- /dev/null +++ b/man/gt_version.Rd @@ -0,0 +1,42 @@ +\name{gt_version} +\alias{gt_version} +\title{Version of GeyserTimes Data} +\description{ +Returns the version of the current GeyserTimes data. +This is a character string date in year-mm-dy format. +} +\usage{ +gt_version(path = gt_path(), quiet = FALSE, all = FALSE) +} +\arguments{ + \item{path}{ +the path to the GeyserTimes data. +The default is the suggested location used by `gt_get_data`. +} + \item{quiet}{ +a logical value, if \code{TRUE}, no messages are printed. +} + \item{all}{ +list all versions of the GeyserTimes data found, +not just the newest. +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +a character string listing the version(s) of GeyserTimes data +stored under `path`. +} +\author{ +Stephen Kaluzny +} +\note{ +} +\seealso{ +\code{gt_path} +} +\examples{ +gt_version() +} +\keyword{geysertimes} diff --git a/vignettes/geysertimes.Rmd b/vignettes/geysertimes.Rmd new file mode 100644 index 0000000..f6d7a0e --- /dev/null +++ b/vignettes/geysertimes.Rmd @@ -0,0 +1,85 @@ +--- +title: "Introduction to geysertimes" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Introduction to geysertimes} + %\VignetteEngine{knitr::rmarkdown} +--- + +```{r, echo = FALSE, message = FALSE} +knitr::opts_chunk$set(collapse = T, comment = "#>") +options(tibble.print_min = 4L, tibble.print_max = 4L) +``` + +# Basic Use + +Load the package +```{r library} +library("geysertimes") +``` + +## Get the Data + +The `gt_get_data` function downloads the compressed eruptions data +from `https://geysertimes.org/archive/`, +reads the data compressed data into R +and saves version of the R object +in the location specified +in the `dest_folder` argument to the function. +The default location for `dest_folder` is +`file.path(tempdir(), "GeyserTimes"))`. +This default location is used to meet the CRAN requirement of +not writing files by default to any location other than under `tempdir()`. + +```{r default_get} +defpath <- gt_get_data() +defpath +``` + +Users are encouraged to set `dest_folder` to the value given by +`gt_path()` which is a permanent location appropriate for the +user on the particular platform. + +```{r gt_path} +gt_path() +``` + +If a permanent location is used, the user only needs to get the +data once. +Using the suggested value for `dest_folder`: +```{r recommend_path} +recpath <- gt_get_data(dest_folder=gt_path()) +recpath +``` + +## Load the Data + +The `gt_load_data` is used to load the saved R object. + +```{r load01} +gtdata <- gt_load_data() +``` + +A quick look at the data: +```{r look} +dim(gtdata) +names(gtdata) +``` + +### Data Version +The data that is downloaded is versioned. +The version id is the date when the data was downloaded. + +The `gt_version()` lists the latest version of the data that +has been downloaded. +Setting `all=TRUE` will list all versions of the data that have been +downloaded. + +```{r version} +gt_version() +``` + +```{r version_all} +gt_version(all=TRUE) +``` + From 5627acd5620c193a81179146ee4d528c92c78a7e Mon Sep 17 00:00:00 2001 From: spkaluzny Date: Fri, 10 May 2019 17:02:17 -0700 Subject: [PATCH 3/3] Removed geysertimes subdirectory --- geysertimes/DESCRIPTION | 16 --- geysertimes/NAMESPACE | 6 - geysertimes/R/gt_get_data.R | 33 ------ geysertimes/R/gt_load_data.R | 22 ---- geysertimes/R/gt_path.R | 7 -- geysertimes/R/gt_version.R | 19 --- geysertimes/man/gt_get_data.Rd | 59 --------- geysertimes/man/gt_load_data.Rd | 49 -------- geysertimes/man/gt_path.Rd | 37 ------ geysertimes/man/gt_version.Rd | 42 ------- geysertimes/vignettes/geysertimes.Rmd | 85 ------------- geysertimes/vignettes/geysertimes.html | 158 ------------------------- 12 files changed, 533 deletions(-) delete mode 100644 geysertimes/DESCRIPTION delete mode 100644 geysertimes/NAMESPACE delete mode 100644 geysertimes/R/gt_get_data.R delete mode 100644 geysertimes/R/gt_load_data.R delete mode 100644 geysertimes/R/gt_path.R delete mode 100644 geysertimes/R/gt_version.R delete mode 100644 geysertimes/man/gt_get_data.Rd delete mode 100644 geysertimes/man/gt_load_data.Rd delete mode 100644 geysertimes/man/gt_path.Rd delete mode 100644 geysertimes/man/gt_version.Rd delete mode 100644 geysertimes/vignettes/geysertimes.Rmd delete mode 100644 geysertimes/vignettes/geysertimes.html diff --git a/geysertimes/DESCRIPTION b/geysertimes/DESCRIPTION deleted file mode 100644 index 8f5506c..0000000 --- a/geysertimes/DESCRIPTION +++ /dev/null @@ -1,16 +0,0 @@ -Package: geysertimes -Title: Geyser Data from GeyserTimes.org -Version: 0.0.0.9000 -Imports: lubridate, rappdirs, readr -Authors@R: - person(given = "Stephen", - family = "Kaluzny", - role = c("aut", "cre"), - email = "spkaluzny@gmail.com") -Description: Gets geyser eruption and observation data from the GeyserTimes - database and optionally stores it locally. -License: MIT + file LICENSE -VignetteBuilder: knitr -Encoding: UTF-8 -LazyData: true -NeedsCompilation: no diff --git a/geysertimes/NAMESPACE b/geysertimes/NAMESPACE deleted file mode 100644 index f04a1f2..0000000 --- a/geysertimes/NAMESPACE +++ /dev/null @@ -1,6 +0,0 @@ -export( - gt_get_data, - gt_load_data, - gt_path, - gt_version -) diff --git a/geysertimes/R/gt_get_data.R b/geysertimes/R/gt_get_data.R deleted file mode 100644 index d123a07..0000000 --- a/geysertimes/R/gt_get_data.R +++ /dev/null @@ -1,33 +0,0 @@ -gt_get_data <- function(dest_folder = file.path(tempdir(), "GeyserTimes"), - overwrite=FALSE, quiet=FALSE, version=lubridate::today()) { - if(dest_folder != gt_path()) { - if(!quiet) { - message("Set dest_folder to GeyserTimes::gt_path() so that data persists between R sessions.\n") - } - } - outpath <- file.path(dest_folder, version, "eruptions_data.rds") - if(file.exists(outpath) && !overwrite) { - warning("GeyserTimes data for this version already exists on the local machine. Use the 'overwrite' argument to re-download if neccessary.") - return(invisible(outpath)) - } - outdir <- dirname(outpath) - if(!dir.exists(outdir)) { - dir.create(outdir, recursive=TRUE) - } - base_url <- "https://geysertimes.org/archive/complete/" - raw_data_file <- paste0("geysertimes_eruptions_complete_", version, ".tsv.gz") - download_data_file_path <- file.path(tempdir(), raw_data_file) - data_url <- paste0(base_url, raw_data_file) - oldOpt <- options(warn=-1) - on.exit(options(oldOpt)) - trydownload <- try( - download.file(data_url, destfile=download_data_file_path, quiet=TRUE), - silent=TRUE) - gt_tib <- readr::read_tsv(gzfile(download_data_file_path), - col_types=c("dcddddddddddddccccdddc"), quote="", progress=FALSE) - gt_tib[["eruption_time_epoch"]] <- lubridate::as_datetime(gt_tib[["eruption_time_epoch"]]) - gt_tib[["time_updated"]] <- lubridate::as_datetime(gt_tib[["time_updated"]]) - gt_tib[["time_entered"]] <- lubridate::as_datetime(gt_tib[["time_entered"]]) - saveRDS(gt_tib, file=outpath) - invisible(outpath) -} diff --git a/geysertimes/R/gt_load_data.R b/geysertimes/R/gt_load_data.R deleted file mode 100644 index 78bea6d..0000000 --- a/geysertimes/R/gt_load_data.R +++ /dev/null @@ -1,22 +0,0 @@ -"gt_load_data" <- function(path=gt_path(), quiet=FALSE, version=NULL) { - if(is.null(version)) { - version <- gt_version(path, quiet=TRUE) - } - if(is.null(version)) { - if(!quiet) { - message("Cannot find any GeyserTimes data under ", path) - } - # Look in Rtmp - path <- file.path(tempdir(), "GeyserTimes") - version <- gt_version(path, quiet=TRUE) - if(is.null(version)) { - return(NULL) - } else { - if(!quiet) { - message("Loading data from ", path) - } - } - } - full_path <- file.path(path, version, "eruptions_data.rds") - readRDS(full_path) -} diff --git a/geysertimes/R/gt_path.R b/geysertimes/R/gt_path.R deleted file mode 100644 index 2849df8..0000000 --- a/geysertimes/R/gt_path.R +++ /dev/null @@ -1,7 +0,0 @@ -"gt_path" <- function(temp=FALSE) { - if(temp) { - file.path(tempdir(), "GeyserTimes") - } else { - rappdirs::user_data_dir(appname = "GeyserTimes", appauthor = "GeyserTimes") - } -} diff --git a/geysertimes/R/gt_version.R b/geysertimes/R/gt_version.R deleted file mode 100644 index 6eb4ccd..0000000 --- a/geysertimes/R/gt_version.R +++ /dev/null @@ -1,19 +0,0 @@ -"gt_version" <- function(path=gt_path(), quiet=FALSE, all=FALSE) { - gt_files <- list.files(path, pattern="eruptions_data\\.rds$", recursive=TRUE) - versions <- as.Date(dirname(gt_files), format="%Y-%m-%d") - # only directories of form yyyy-mm-dd are allowed: - versions <- sort(versions[as.character(versions) == dirname(gt_files)], - decreasing=TRUE) - if(length(gt_files) < 1 || all(is.na(versions))) { - if(!quiet) { - message("Cannot find any GeyserTimes data under", path) - } - return(NULL) - } - version <- if(all) { - versions[!is.na(versions)] - } else { - versions[1] - } - version -} diff --git a/geysertimes/man/gt_get_data.Rd b/geysertimes/man/gt_get_data.Rd deleted file mode 100644 index 4981d85..0000000 --- a/geysertimes/man/gt_get_data.Rd +++ /dev/null @@ -1,59 +0,0 @@ -\name{gt_get_data} -\alias{gt_get_data} -\title{ -Download Geyser Times Data -} -\description{ -Downloads the data from geysertimes.org. -Reads the data and creates a tibble object in `dest_dir`. -} -\usage{ -gt_get_data(dest_dir = file.path(tempdir(), "GeyserTimes"), - overwrite = FALSE, quiet = FALSE, version = lubridate::today()) -} -\arguments{ - \item{dest_dir}{ -the location where the binary tibble object should be written. -The default is under the current R session's temp directory -which will disappear when the session ends. -} - \item{overwrite}{ -a logical value, -if\code{FALSE}, the data will not be downloaded again if copy of the -data, with \code{version}, already exists in \code{dest_dir}. -} - \item{quiet}{ -a logical value, if \code{TRUE}, no messages are displayed. -} - \item{version}{ -a character string giving the version of the data to download. -This should a date in the form \code{yyyy-mm-dd}. -Typically, only the version with today's date is available. -} -} -\details{ -The data is downloaded from the GeyserTimes archive web site -\url{https://geysertimes.org/archive/} to the \code{tempdir()} directory. -The data is then read with \code{readr::read_tsv} wtih appropriate -column types. -The resulting \code{tibble} object is then saved as an binary (\code{.rds}) -in \code{dest_dir}. -} -\value{ -a character string giving the full path to GeyserTimes data object. -} -\author{ -Stephen Kaluzny . -} -\note{ -Users are encouraged to set \code{dest_dir} to \code{gt_path()} to save -a persistent copy of the data. -} -\seealso{ -gt_load_data. -} -\examples{ -dpath0 <- gt_get_data() # data saved under tempdir() -dpath1 <- gt_get_data(dest_dir=gt_path()) # data saved under gt_path() -} -\keyword{geysertimes} diff --git a/geysertimes/man/gt_load_data.Rd b/geysertimes/man/gt_load_data.Rd deleted file mode 100644 index 1d3506a..0000000 --- a/geysertimes/man/gt_load_data.Rd +++ /dev/null @@ -1,49 +0,0 @@ -\name{gt_load_data} -\alias{gt_load_data} -\title{ -Load the Geyser Times Data -} -\description{ -Loads the Geyser Times data that was previously downloaded by a call -to \code{gt_get_data}. -} -\usage{ -gt_load_data(path = gt_path(), quiet = FALSE, version = NULL) -} -\arguments{ - \item{path}{ -a character string, the local location where the Geyser Times data -has been written. -The default is the local permanent location given by \code{gt_path()}. -If no appropriate data is found at that location, -the function will look in the temporary location given -\code{by gt_path(temp=TRUE)}. -} - \item{quiet}{ -a logical value, if \code{TRUE}, no messages are displayed. -} - \item{version}{ -a character string giving the version of the Geyser Times data to load. -Calling \code{gt_version(path, all=TRUE)} will list all versions -available under \code{path}. -} -} -\details{ -Typically, a user would download the data once, with a call to -`gt_get_data(destdir=gt_path())`. -Subsequent R sessions can the load this downloaded data with a -call to `gt_load_data()`. -} -\value{ -a tibble containing the Geyser Times data with names: -(need to decide on appropriate names) -} -\author{ -Stephen Kaluzny -} -\seealso{ -\code{gt_get_data}. -} -\examples{ -} -\keyword{geysertimes} diff --git a/geysertimes/man/gt_path.Rd b/geysertimes/man/gt_path.Rd deleted file mode 100644 index 52ca31f..0000000 --- a/geysertimes/man/gt_path.Rd +++ /dev/null @@ -1,37 +0,0 @@ -\name{gt_path} -\alias{gt_path} -\title{ -Path to GeyserTimes Local Data -} -\description{ -Returns the path where local GeyserTimes data is stored. -} -\usage{ -gt_path(temp = FALSE) -} -\arguments{ - \item{temp}{ -a logical value, if \code{TRUE}, the temporary path is returned. -This location will disappear when the R session ends. -} -} -\details{ -%% ~~ If necessary, more details than the description above ~~ -} -\value{ -a character string giving the full path where local GeyserTimes data is stored. -} -\author{ -Stephen Kaluzny -} -\note{ -%% ~~further notes~~ -} -\seealso{ -%% ~~objects to See Also as \code{\link{help}}, ~~~ -} -\examples{ -# The default location for the GeyserTimes data: -gt_path() -} -\keyword{geysertimes} diff --git a/geysertimes/man/gt_version.Rd b/geysertimes/man/gt_version.Rd deleted file mode 100644 index 2efec59..0000000 --- a/geysertimes/man/gt_version.Rd +++ /dev/null @@ -1,42 +0,0 @@ -\name{gt_version} -\alias{gt_version} -\title{Version of GeyserTimes Data} -\description{ -Returns the version of the current GeyserTimes data. -This is a character string date in year-mm-dy format. -} -\usage{ -gt_version(path = gt_path(), quiet = FALSE, all = FALSE) -} -\arguments{ - \item{path}{ -the path to the GeyserTimes data. -The default is the suggested location used by `gt_get_data`. -} - \item{quiet}{ -a logical value, if \code{TRUE}, no messages are printed. -} - \item{all}{ -list all versions of the GeyserTimes data found, -not just the newest. -} -} -\details{ -%% ~~ If necessary, more details than the description above ~~ -} -\value{ -a character string listing the version(s) of GeyserTimes data -stored under `path`. -} -\author{ -Stephen Kaluzny -} -\note{ -} -\seealso{ -\code{gt_path} -} -\examples{ -gt_version() -} -\keyword{geysertimes} diff --git a/geysertimes/vignettes/geysertimes.Rmd b/geysertimes/vignettes/geysertimes.Rmd deleted file mode 100644 index f6d7a0e..0000000 --- a/geysertimes/vignettes/geysertimes.Rmd +++ /dev/null @@ -1,85 +0,0 @@ ---- -title: "Introduction to geysertimes" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Introduction to geysertimes} - %\VignetteEngine{knitr::rmarkdown} ---- - -```{r, echo = FALSE, message = FALSE} -knitr::opts_chunk$set(collapse = T, comment = "#>") -options(tibble.print_min = 4L, tibble.print_max = 4L) -``` - -# Basic Use - -Load the package -```{r library} -library("geysertimes") -``` - -## Get the Data - -The `gt_get_data` function downloads the compressed eruptions data -from `https://geysertimes.org/archive/`, -reads the data compressed data into R -and saves version of the R object -in the location specified -in the `dest_folder` argument to the function. -The default location for `dest_folder` is -`file.path(tempdir(), "GeyserTimes"))`. -This default location is used to meet the CRAN requirement of -not writing files by default to any location other than under `tempdir()`. - -```{r default_get} -defpath <- gt_get_data() -defpath -``` - -Users are encouraged to set `dest_folder` to the value given by -`gt_path()` which is a permanent location appropriate for the -user on the particular platform. - -```{r gt_path} -gt_path() -``` - -If a permanent location is used, the user only needs to get the -data once. -Using the suggested value for `dest_folder`: -```{r recommend_path} -recpath <- gt_get_data(dest_folder=gt_path()) -recpath -``` - -## Load the Data - -The `gt_load_data` is used to load the saved R object. - -```{r load01} -gtdata <- gt_load_data() -``` - -A quick look at the data: -```{r look} -dim(gtdata) -names(gtdata) -``` - -### Data Version -The data that is downloaded is versioned. -The version id is the date when the data was downloaded. - -The `gt_version()` lists the latest version of the data that -has been downloaded. -Setting `all=TRUE` will list all versions of the data that have been -downloaded. - -```{r version} -gt_version() -``` - -```{r version_all} -gt_version(all=TRUE) -``` - diff --git a/geysertimes/vignettes/geysertimes.html b/geysertimes/vignettes/geysertimes.html deleted file mode 100644 index 26c0039..0000000 --- a/geysertimes/vignettes/geysertimes.html +++ /dev/null @@ -1,158 +0,0 @@ - - - - - - - - - - - - - - -Introduction to geysertimes - - - - - - - - - - - - - - - - - - - - - -

Introduction to geysertimes

- - - -
-

Basic Use

-

Load the package

-
library("geysertimes")
-
-

Get the Data

-

The gt_get_data function downloads the compressed eruptions data from https://geysertimes.org/archive/, reads the data compressed data into R and saves version of the R object in the location specified in the dest_folder argument to the function. The default location for dest_folder is file.path(tempdir(), "GeyserTimes")). This default location is used to meet the CRAN requirement of not writing files by default to any location other than under tempdir().

-
defpath <- gt_get_data()
-#> Set dest_folder to GeyserTimes::gt_path() so that data persists between R sessions.
-defpath
-#> [1] "/tmp/Rtmpj41IjU/GeyserTimes/2019-05-03/eruptions_data.rds"
-

Users are encouraged to set dest_folder to the value given by gt_path() which is a permanent location appropriate for the user on the particular platform.

-
gt_path()
-#> [1] "/home/spk/.local/share/GeyserTimes"
-

If a permanent location is used, the user only needs to get the data once. Using the suggested value for dest_folder:

-
recpath <- gt_get_data(dest_folder=gt_path())
-recpath
-#> [1] "/home/spk/.local/share/GeyserTimes/2019-05-03/eruptions_data.rds"
-
-
-

Load the Data

-

The gt_load_data is used to load the saved R object.

-
gtdata <- gt_load_data()
-

A quick look at the data:

-
dim(gtdata)
-#> [1] 1064341      22
-names(gtdata)
-#>  [1] "eruptionID"           "geyser"               "eruption_time_epoch" 
-#>  [4] "has_seconds"          "exact"                "ns"                  
-#>  [7] "ie"                   "E"                    "A"                   
-#> [10] "wc"                   "ini"                  "maj"                 
-#> [13] "min"                  "q"                    "duration"            
-#> [16] "entrant"              "observer"             "eruption_comment"    
-#> [19] "time_updated"         "time_entered"         "associated_primaryID"
-#> [22] "other_comments"
-
-

Data Version

-

The data that is downloaded is versioned. The version id is the date when the data was downloaded.

-

The gt_version() lists the latest version of the data that has been downloaded. Setting all=TRUE will list all versions of the data that have been downloaded.

-
gt_version()
-#> [1] "2019-05-03"
-
gt_version(all=TRUE)
-#> [1] "2019-05-03" "2019-05-02"
-
-
-
- - - - - - - -