Skip to content

Commit

Permalink
Merge pull request #25 from KWB-R/dev
Browse files Browse the repository at this point in the history
Release v0.1.1
  • Loading branch information
mrustl authored Jun 9, 2022
2 parents 0040c26 + 1fec8ba commit 1cb656e
Show file tree
Hide file tree
Showing 17 changed files with 248 additions and 36 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: wasserportal
Title: R Package with Functions for Scraping Data of
Wasserportal Berlin
Version: 0.1.0
Version: 0.1.1
Authors@R:
c(person(given = "Hauke",
family = "Sonnenberg",
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
export("%>%")
export(columns_to_labels)
export(get_overview_options)
export(get_station_variables)
export(get_stations)
export(get_wasserportal_master_data)
export(get_wasserportal_masters_data)
Expand All @@ -22,6 +23,7 @@ importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,rename)
importFrom(dplyr,select)
importFrom(dplyr,select_if)
importFrom(httr,POST)
importFrom(httr,content)
importFrom(kwb.datetime,textToEuropeBerlinPosix)
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# [wasserportal 0.1.1](https://github.com/KWB-R/wasserportal/releases/tag/v0.1.1) <small>2022-06-09</small>

* Fix bug in `get_wasserportal_stations_table()` now correctly naming parameter
`temperature` (formerly incorrectly `level`)
* Fix [Surface Water](../articles/surface-water.html) article
* Adapt Zenodo DOI badge to cite always latest release

# [wasserportal 0.1.0](https://github.com/KWB-R/wasserportal/releases/tag/v0.1.0) <small>2022-06-01</small>

R package for scraping `groundwater` data (`groundwater level` and `quality`) from [Wasserportal Berlin](https://wasserportal.berlin.de). Please note that the
Expand Down
51 changes: 51 additions & 0 deletions R/.test-surface-water_download.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
if (FALSE) {
library(wasserportal)

stations <- wasserportal::get_stations()
stations_crosstable <- stations$crosstable

stations_crosstable_bb <- stations_crosstable %>%
dplyr::filter(stringr::str_detect(.data$Messstellennummer,
pattern = "^[A-Z]{2}_"))

stations_crosstable_berlin <- stations_crosstable %>%
dplyr::filter(stringr::str_detect(.data$Messstellennummer,
pattern = "^[A-Z]{2}_",
negate = TRUE))



station_crosstable_berlin <- stations_crosstable_berlin[1,]
stations_crosstable_berlin
from_date <- "1900-01-01"
sw_station_berlin_daily <- wasserportal::read_wasserportal_raw(
station = station_crosstable_berlin$Messstellennummer,
variable = get_station_variables(stations_crosstable_berlin)[1],
type = "daily",
from_date = from_date,
include_raw_time = TRUE,
stations_crosstable = stations_crosstable
)

str(sw_station_berlin_daily)



sw_stations_berlin_daily <- stats::setNames(lapply(stations_crosstable_berlin$Messstellennummer,
function(station) {
msg <- sprintf("Fetching data for station '%s'", station)
kwb.utils::catAndRun(msg, expr = {
wasserportal::read_wasserportal(
station = station,
type = "daily",
from_date = from_date,
include_raw_time = TRUE,
stations_crosstable = stations_crosstable
)})}
), nm = stations_crosstable$Messstellennummer)

str(sw_stations_daily)


}

18 changes: 15 additions & 3 deletions R/get_wasserportal_master_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@ wasserportal_base_url <- function() {
#' @importFrom rlang .data
#' @export
#' @examples
#'## GW Station
#'\dontrun{
#' get_wasserportal_master_data(station_id = 149)
#'

#'}
#'## SW Station
#'get_wasserportal_master_data(station_id = 5865900)
get_wasserportal_master_data <- function (
station_id,
url_wasserportal = wasserportal_base_url()
Expand All @@ -35,11 +38,20 @@ get_wasserportal_master_data <- function (

master_table <- html_overview %>%
rvest::html_node(xpath = '//*[@summary="Pegel Berlin"]') %>%
rvest::html_table() %>%
rvest::html_table()

if(nrow(master_table) == 0) {
msg <- sprintf("No master table for station '%s' available at '%s'",
station_id,
master_url)
stop(msg)
} else {
master_table <- master_table %>%
dplyr::rename("key" = "X1", "value" = "X2") %>%
dplyr::mutate(key = stringr::str_remove_all(.data$key, "-")) %>%
dplyr::mutate(key = kwb.utils::substSpecialChars(.data$key)) %>%
tidyr::pivot_wider(names_from = "key", values_from = "value")
}

master_table

Expand Down
2 changes: 1 addition & 1 deletion R/get_wasserportal_stations_table.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ get_overview_options <- function () {

list(surface_water = list(water_level = "ws",
flow = "df",
level = "wt",
temperature = "wt",
conductivity = "lf",
ph = "ph",
oxygen_concentration = "og",
Expand Down
35 changes: 22 additions & 13 deletions R/read_wasserportal.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,34 +13,38 @@
#' \code{UTCOffset}, 1 in winter, 2 in summer).
#'
#' @param station station number, as returned by
#' \code{\link{get_wasserportal_stations}}
#' \code{\link{get_stations}}
#' @param variables vector of variable identifiers, as returned by
#' \code{\link{get_wasserportal_variables}}
#' \code{\link{get_station_variables}}
#' @param from_date \code{Date} object (or string in format "yyyy-mm-dd" that
#' can be converted to a \code{Date} object representing the first day for
#' which to request data. Default: \code{as.character(Sys.Date() - 90L)}
#' @param type one of "single" (the default), "daily", "monthly"
#' @param include_raw_time if \code{TRUE} the original time column and the
#' column with the corrected winter time are included in the output. The
#' default is \code{FALSE}.
#' @param stations_crosstable sublist `crosstable` as retrieved from \code{\link{get_stations}}
#' i.e. `get_stations()$crosstable`
#' @return data frame read from the CSV file that the download provides.
#' IMPORTANT: It is not yet clear how to interpret the timestamp, see example
#' @importFrom httr POST content
#' @importFrom utils read.table
#' @export
#' @examples
#' # Get a list of available water quality stations and variables
#' stations <- wasserportal::get_wasserportal_stations()
#' variables <- wasserportal::get_wasserportal_variables()
#' stations <- wasserportal::get_stations()
#' stations_crosstable <- stations$crosstable
#' station_crosstable <- stations_crosstable[1,]
#'
#' # Set the start date
#' from_date <- "2021-03-01"
#'
#' # Read the timeseries (multiple variables for one station)
#' water_quality <- wasserportal::read_wasserportal(
#' station = stations$MPS_Charlottenburg,
#' variables = c(variables["Sauerstoffgehalt"], variables["Leitfaehigkeit"]),
#' from_date = from_date, include_raw_time = TRUE
#' station = station_crosstable$Messstellennummer,
#' from_date = from_date,
#' include_raw_time = TRUE,
#' stations_crosstable = stations_crosstable
#' )
#'
#' # Look at the first few records
Expand Down Expand Up @@ -75,17 +79,20 @@
#' sum(water_quality$timestamp_raw != water_quality$timestamp_corr)
#'
read_wasserportal <- function(
station, variables = get_wasserportal_variables(station),
station,
variables = NULL,
from_date = as.character(Sys.Date() - 90L), type = "single",
include_raw_time = FALSE
include_raw_time = FALSE,
stations_crosstable
)
{
#kwb.utils::assignPackageObjects("wasserportal")
#station=get_wasserportal_stations(type = "flow")$Tiefwerder
#variables = get_wasserportal_variables(station);from_date = "2019-01-01";include_raw_time = FALSE

variable_ids <- get_wasserportal_variables()
station_ids <- get_wasserportal_stations(type = NULL)
station_crosstable <- stations_crosstable[stations_crosstable$Messstellennummer == station,]
variable_ids <- get_station_variables(station_crosstable)
if(is.null(variables)) variables <- variable_ids
station_ids <- stations_crosstable[["Messstellennummer"]]

stopifnot(all(station %in% station_ids))
stopifnot(all(variables %in% variable_ids))
Expand All @@ -101,7 +108,9 @@ read_wasserportal <- function(
from_date = from_date,
type = type,
include_raw_time = include_raw_time,
handle = handle
handle = handle,
stations_crosstable = stations_crosstable

)

# Remove elements of class "response" that are returned in case of an error
Expand Down
36 changes: 31 additions & 5 deletions R/read_wasserportal_raw.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
#' Helper function: get available station variables
#'
#' @param station_df station_df
#'
#' @return returns names of available variables for station
#' @export
#'
#' @importFrom dplyr select_if
#'
get_station_variables <- function(station_df) {
station_df <- station_df %>%
dplyr::select_if(function(x){!all(is.na(x))})
names(station_df)[!names(station_df) %in% c("Messstellennummer", "Messstellenname")]
}

# read_wasserportal_raw --------------------------------------------------------

#' Read Wasserportal Raw
Expand All @@ -8,31 +23,42 @@
#' @param type one of "single", "daily", "monthly" (default: "single")
#' @param include_raw_time TRUE or FALSE (default: FALSE)
#' @param handle handle (default: NULL)
#'
#' @param stations_crosstable sublist `crosstable` as retrieved from \code{\link{get_stations}}
#' i.e. `get_stations()$crosstable`
#' @return ????
#' @export
#' @import kwb.utils
#' @importFrom kwb.datetime textToEuropeBerlinPosix
read_wasserportal_raw <- function(
variable, station, from_date, type = "single", include_raw_time = FALSE,
handle = NULL
handle = NULL,
stations_crosstable
)
{
#variable <- variables[1]
from_date <- assert_date(from_date)


stopifnot(length(station) == 1)
station_ids <- get_wasserportal_stations(type = NULL)
station_ids <- stations_crosstable$Messstellennummer
stopifnot(station %in% station_ids)

stopifnot(length(variable) == 1)
variable_ids <- get_wasserportal_variables(station)

station_df <- stations_crosstable[stations_crosstable$Messstellennummer == station, ] %>%
dplyr::select_if(function(x){!all(is.na(x))})

variable_ids <- get_station_variables(station_df)
stopifnot(variable %in% variable_ids)

sreihe <- kwb.utils::selectElements(elements = type, list(
single = "w", single_all = "wa", daily = "m", monthly = "j"
))

variable <- kwb.utils::selectElements(elements = variable, list(
ws = "w", df = "d", wt = "t", lf = "l", ph = "p", og = "g", os = "s"
))

progress <- get_wasserportal_text(station, variable, station_ids, variable_ids)
url <- get_wasserportal_url(station, variable)

Expand Down Expand Up @@ -71,7 +97,7 @@ read_wasserportal_raw <- function(
data <- read(text, header = FALSE, skip = 1)

# Get the numbers of the data columns
if (type != "monthly") {
if (!type %in% c("daily", "monthly")) {
stopifnot(ncol(data) == 2L)
}

Expand Down
2 changes: 2 additions & 0 deletions R/read_wasserportal_raw_gw.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
#' @importFrom tidyr pivot_longer
#' @importFrom dplyr select filter mutate
#' @examples
#' \dontrun{
#' read_wasserportal_raw_gw(station = 149, stype = "gwl")
#' read_wasserportal_raw_gw(station = 149, stype = "gwq")
#' }
read_wasserportal_raw_gw <- function(
station = 149,
stype = "gwl",
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
[![Project Status](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://www.tidyverse.org/lifecycle/#experimental)
[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/wasserportal)]()
[![R-Universe_Status_Badge](https://kwb-r.r-universe.dev/badges/wasserportal)](https://kwb-r.r-universe.dev/)
[![DOI](https://zenodo.org/badge/doi/10.5281/zenodo.6602574.svg)](https://doi.org/10.5281/zenodo.6602574)
[![DOI](https://zenodo.org/badge/doi/10.5281/zenodo.6602573.svg)](https://doi.org/10.5281/zenodo.6602573)

# wasserportal

Expand Down
2 changes: 1 addition & 1 deletion index.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
[![Project Status](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://www.tidyverse.org/lifecycle/#experimental)
[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/wasserportal)]()
[![R-Universe_Status_Badge](https://kwb-r.r-universe.dev/badges/wasserportal)](https://kwb-r.r-universe.dev/)
[![DOI](https://zenodo.org/badge/doi/10.5281/zenodo.6602574.svg)](https://doi.org/10.5281/zenodo.6602574)
[![DOI](https://zenodo.org/badge/doi/10.5281/zenodo.6602573.svg)](https://doi.org/10.5281/zenodo.6602573)

R Package with Functions for Scraping Data of
Wasserportal Berlin (https://wasserportal.berlin.de), which contains
Expand Down
17 changes: 17 additions & 0 deletions man/get_station_variables.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion man/get_wasserportal_master_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 1cb656e

Please sign in to comment.