-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3 from geysertimes/data-load
Initial package for getting and loading the data.
- Loading branch information
Showing
11 changed files
with
375 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
Package: geysertimes | ||
Title: Geyser Data from GeyserTimes.org | ||
Version: 0.0.0.9000 | ||
Imports: lubridate, rappdirs, readr | ||
Authors@R: | ||
person(given = "Stephen", | ||
family = "Kaluzny", | ||
role = c("aut", "cre"), | ||
email = "[email protected]") | ||
Description: Gets geyser eruption and observation data from the GeyserTimes | ||
database and optionally stores it locally. | ||
License: MIT + file LICENSE | ||
VignetteBuilder: knitr | ||
Encoding: UTF-8 | ||
LazyData: true | ||
NeedsCompilation: no |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
export( | ||
gt_get_data, | ||
gt_load_data, | ||
gt_path, | ||
gt_version | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
gt_get_data <- function(dest_folder = file.path(tempdir(), "GeyserTimes"), | ||
overwrite=FALSE, quiet=FALSE, version=lubridate::today()) { | ||
if(dest_folder != gt_path()) { | ||
if(!quiet) { | ||
message("Set dest_folder to GeyserTimes::gt_path() so that data persists between R sessions.\n") | ||
} | ||
} | ||
outpath <- file.path(dest_folder, version, "eruptions_data.rds") | ||
if(file.exists(outpath) && !overwrite) { | ||
warning("GeyserTimes data for this version already exists on the local machine. Use the 'overwrite' argument to re-download if neccessary.") | ||
return(invisible(outpath)) | ||
} | ||
outdir <- dirname(outpath) | ||
if(!dir.exists(outdir)) { | ||
dir.create(outdir, recursive=TRUE) | ||
} | ||
base_url <- "https://geysertimes.org/archive/complete/" | ||
raw_data_file <- paste0("geysertimes_eruptions_complete_", version, ".tsv.gz") | ||
download_data_file_path <- file.path(tempdir(), raw_data_file) | ||
data_url <- paste0(base_url, raw_data_file) | ||
oldOpt <- options(warn=-1) | ||
on.exit(options(oldOpt)) | ||
trydownload <- try( | ||
download.file(data_url, destfile=download_data_file_path, quiet=TRUE), | ||
silent=TRUE) | ||
gt_tib <- readr::read_tsv(gzfile(download_data_file_path), | ||
col_types=c("dcddddddddddddccccdddc"), quote="", progress=FALSE) | ||
gt_tib[["eruption_time_epoch"]] <- lubridate::as_datetime(gt_tib[["eruption_time_epoch"]]) | ||
gt_tib[["time_updated"]] <- lubridate::as_datetime(gt_tib[["time_updated"]]) | ||
gt_tib[["time_entered"]] <- lubridate::as_datetime(gt_tib[["time_entered"]]) | ||
saveRDS(gt_tib, file=outpath) | ||
invisible(outpath) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
"gt_load_data" <- function(path=gt_path(), quiet=FALSE, version=NULL) { | ||
if(is.null(version)) { | ||
version <- gt_version(path, quiet=TRUE) | ||
} | ||
if(is.null(version)) { | ||
if(!quiet) { | ||
message("Cannot find any GeyserTimes data under ", path) | ||
} | ||
# Look in Rtmp | ||
path <- file.path(tempdir(), "GeyserTimes") | ||
version <- gt_version(path, quiet=TRUE) | ||
if(is.null(version)) { | ||
return(NULL) | ||
} else { | ||
if(!quiet) { | ||
message("Loading data from ", path) | ||
} | ||
} | ||
} | ||
full_path <- file.path(path, version, "eruptions_data.rds") | ||
readRDS(full_path) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
"gt_path" <- function(temp=FALSE) { | ||
if(temp) { | ||
file.path(tempdir(), "GeyserTimes") | ||
} else { | ||
rappdirs::user_data_dir(appname = "GeyserTimes", appauthor = "GeyserTimes") | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
"gt_version" <- function(path=gt_path(), quiet=FALSE, all=FALSE) { | ||
gt_files <- list.files(path, pattern="eruptions_data\\.rds$", recursive=TRUE) | ||
versions <- as.Date(dirname(gt_files), format="%Y-%m-%d") | ||
# only directories of form yyyy-mm-dd are allowed: | ||
versions <- sort(versions[as.character(versions) == dirname(gt_files)], | ||
decreasing=TRUE) | ||
if(length(gt_files) < 1 || all(is.na(versions))) { | ||
if(!quiet) { | ||
message("Cannot find any GeyserTimes data under", path) | ||
} | ||
return(NULL) | ||
} | ||
version <- if(all) { | ||
versions[!is.na(versions)] | ||
} else { | ||
versions[1] | ||
} | ||
version | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
\name{gt_get_data} | ||
\alias{gt_get_data} | ||
\title{ | ||
Download Geyser Times Data | ||
} | ||
\description{ | ||
Downloads the data from geysertimes.org. | ||
Reads the data and creates a tibble object in `dest_dir`. | ||
} | ||
\usage{ | ||
gt_get_data(dest_dir = file.path(tempdir(), "GeyserTimes"), | ||
overwrite = FALSE, quiet = FALSE, version = lubridate::today()) | ||
} | ||
\arguments{ | ||
\item{dest_dir}{ | ||
the location where the binary tibble object should be written. | ||
The default is under the current R session's temp directory | ||
which will disappear when the session ends. | ||
} | ||
\item{overwrite}{ | ||
a logical value, | ||
if\code{FALSE}, the data will not be downloaded again if copy of the | ||
data, with \code{version}, already exists in \code{dest_dir}. | ||
} | ||
\item{quiet}{ | ||
a logical value, if \code{TRUE}, no messages are displayed. | ||
} | ||
\item{version}{ | ||
a character string giving the version of the data to download. | ||
This should a date in the form \code{yyyy-mm-dd}. | ||
Typically, only the version with today's date is available. | ||
} | ||
} | ||
\details{ | ||
The data is downloaded from the GeyserTimes archive web site | ||
\url{https://geysertimes.org/archive/} to the \code{tempdir()} directory. | ||
The data is then read with \code{readr::read_tsv} wtih appropriate | ||
column types. | ||
The resulting \code{tibble} object is then saved as an binary (\code{.rds}) | ||
in \code{dest_dir}. | ||
} | ||
\value{ | ||
a character string giving the full path to GeyserTimes data object. | ||
} | ||
\author{ | ||
Stephen Kaluzny <spkaluzny@gmail.com>. | ||
} | ||
\note{ | ||
Users are encouraged to set \code{dest_dir} to \code{gt_path()} to save | ||
a persistent copy of the data. | ||
} | ||
\seealso{ | ||
gt_load_data. | ||
} | ||
\examples{ | ||
dpath0 <- gt_get_data() # data saved under tempdir() | ||
dpath1 <- gt_get_data(dest_dir=gt_path()) # data saved under gt_path() | ||
} | ||
\keyword{geysertimes} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
\name{gt_load_data} | ||
\alias{gt_load_data} | ||
\title{ | ||
Load the Geyser Times Data | ||
} | ||
\description{ | ||
Loads the Geyser Times data that was previously downloaded by a call | ||
to \code{gt_get_data}. | ||
} | ||
\usage{ | ||
gt_load_data(path = gt_path(), quiet = FALSE, version = NULL) | ||
} | ||
\arguments{ | ||
\item{path}{ | ||
a character string, the local location where the Geyser Times data | ||
has been written. | ||
The default is the local permanent location given by \code{gt_path()}. | ||
If no appropriate data is found at that location, | ||
the function will look in the temporary location given | ||
\code{by gt_path(temp=TRUE)}. | ||
} | ||
\item{quiet}{ | ||
a logical value, if \code{TRUE}, no messages are displayed. | ||
} | ||
\item{version}{ | ||
a character string giving the version of the Geyser Times data to load. | ||
Calling \code{gt_version(path, all=TRUE)} will list all versions | ||
available under \code{path}. | ||
} | ||
} | ||
\details{ | ||
Typically, a user would download the data once, with a call to | ||
`gt_get_data(destdir=gt_path())`. | ||
Subsequent R sessions can the load this downloaded data with a | ||
call to `gt_load_data()`. | ||
} | ||
\value{ | ||
a tibble containing the Geyser Times data with names: | ||
(need to decide on appropriate names) | ||
} | ||
\author{ | ||
Stephen Kaluzny <spkaluzny@gmail.com> | ||
} | ||
\seealso{ | ||
\code{gt_get_data}. | ||
} | ||
\examples{ | ||
} | ||
\keyword{geysertimes} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
\name{gt_path} | ||
\alias{gt_path} | ||
\title{ | ||
Path to GeyserTimes Local Data | ||
} | ||
\description{ | ||
Returns the path where local GeyserTimes data is stored. | ||
} | ||
\usage{ | ||
gt_path(temp = FALSE) | ||
} | ||
\arguments{ | ||
\item{temp}{ | ||
a logical value, if \code{TRUE}, the temporary path is returned. | ||
This location will disappear when the R session ends. | ||
} | ||
} | ||
\details{ | ||
%% ~~ If necessary, more details than the description above ~~ | ||
} | ||
\value{ | ||
a character string giving the full path where local GeyserTimes data is stored. | ||
} | ||
\author{ | ||
Stephen Kaluzny <spkaluzny@gmail.com> | ||
} | ||
\note{ | ||
%% ~~further notes~~ | ||
} | ||
\seealso{ | ||
%% ~~objects to See Also as \code{\link{help}}, ~~~ | ||
} | ||
\examples{ | ||
# The default location for the GeyserTimes data: | ||
gt_path() | ||
} | ||
\keyword{geysertimes} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
\name{gt_version} | ||
\alias{gt_version} | ||
\title{Version of GeyserTimes Data} | ||
\description{ | ||
Returns the version of the current GeyserTimes data. | ||
This is a character string date in year-mm-dy format. | ||
} | ||
\usage{ | ||
gt_version(path = gt_path(), quiet = FALSE, all = FALSE) | ||
} | ||
\arguments{ | ||
\item{path}{ | ||
the path to the GeyserTimes data. | ||
The default is the suggested location used by `gt_get_data`. | ||
} | ||
\item{quiet}{ | ||
a logical value, if \code{TRUE}, no messages are printed. | ||
} | ||
\item{all}{ | ||
list all versions of the GeyserTimes data found, | ||
not just the newest. | ||
} | ||
} | ||
\details{ | ||
%% ~~ If necessary, more details than the description above ~~ | ||
} | ||
\value{ | ||
a character string listing the version(s) of GeyserTimes data | ||
stored under `path`. | ||
} | ||
\author{ | ||
Stephen Kaluzny <spkaluzny@gmail.com> | ||
} | ||
\note{ | ||
} | ||
\seealso{ | ||
\code{gt_path} | ||
} | ||
\examples{ | ||
gt_version() | ||
} | ||
\keyword{geysertimes} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
--- | ||
title: "Introduction to geysertimes" | ||
output: rmarkdown::html_vignette | ||
vignette: > | ||
%\VignetteIndexEntry{Introduction to geysertimes} | ||
%\VignetteEngine{knitr::rmarkdown} | ||
--- | ||
|
||
```{r, echo = FALSE, message = FALSE} | ||
knitr::opts_chunk$set(collapse = T, comment = "#>") | ||
options(tibble.print_min = 4L, tibble.print_max = 4L) | ||
``` | ||
|
||
# Basic Use | ||
|
||
Load the package | ||
```{r library} | ||
library("geysertimes") | ||
``` | ||
|
||
## Get the Data | ||
|
||
The `gt_get_data` function downloads the compressed eruptions data | ||
from `https://geysertimes.org/archive/`, | ||
reads the data compressed data into R | ||
and saves version of the R object | ||
in the location specified | ||
in the `dest_folder` argument to the function. | ||
The default location for `dest_folder` is | ||
`file.path(tempdir(), "GeyserTimes"))`. | ||
This default location is used to meet the CRAN requirement of | ||
not writing files by default to any location other than under `tempdir()`. | ||
|
||
```{r default_get} | ||
defpath <- gt_get_data() | ||
defpath | ||
``` | ||
|
||
Users are encouraged to set `dest_folder` to the value given by | ||
`gt_path()` which is a permanent location appropriate for the | ||
user on the particular platform. | ||
|
||
```{r gt_path} | ||
gt_path() | ||
``` | ||
|
||
If a permanent location is used, the user only needs to get the | ||
data once. | ||
Using the suggested value for `dest_folder`: | ||
```{r recommend_path} | ||
recpath <- gt_get_data(dest_folder=gt_path()) | ||
recpath | ||
``` | ||
|
||
## Load the Data | ||
|
||
The `gt_load_data` is used to load the saved R object. | ||
|
||
```{r load01} | ||
gtdata <- gt_load_data() | ||
``` | ||
|
||
A quick look at the data: | ||
```{r look} | ||
dim(gtdata) | ||
names(gtdata) | ||
``` | ||
|
||
### Data Version | ||
The data that is downloaded is versioned. | ||
The version id is the date when the data was downloaded. | ||
|
||
The `gt_version()` lists the latest version of the data that | ||
has been downloaded. | ||
Setting `all=TRUE` will list all versions of the data that have been | ||
downloaded. | ||
|
||
```{r version} | ||
gt_version() | ||
``` | ||
|
||
```{r version_all} | ||
gt_version(all=TRUE) | ||
``` | ||
|