Skip to content

Commit

Permalink
Merge pull request #46 from statistikat/tibble_pkg
Browse files Browse the repository at this point in the history
Tibble pkg
  • Loading branch information
alexkowa authored Jul 18, 2024
2 parents 3bd6c7d + 144da68 commit a24efcc
Show file tree
Hide file tree
Showing 70 changed files with 1,532 additions and 400 deletions.
12 changes: 8 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: STATcubeR
Title: R interface for the STATcube REST API and Open Government Data
Version: 0.5.0
Version: 0.5.2
Authors@R: c(
person("Gregor", "de Cillia", , "[email protected]", role = "aut"),
person("Bernhard", "Meindl", , "[email protected]", role = "ctb"),
Expand All @@ -15,14 +15,17 @@ Description: Import data from the STATcube REST API or from the open data
License: GPL (>= 2)
URL: https://statistikat.github.io/STATcubeR,
https://github.com/statistikat/STATcubeR
BugReports: https://github.com/statistikat/STATcubeR/issues
Imports:
cli (>= 3.4.1),
httr,
jsonlite,
magrittr
Suggests:
magrittr,
pillar (>= 1.5.0),
vctrs (>= 0.5.2)
Suggests:
spelling,
data.tree,
pillar,
rappdirs,
xml2,
reactable,
Expand All @@ -32,3 +35,4 @@ Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
Language: en-US
18 changes: 18 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,29 @@

S3method(as.character,od_json)
S3method(as.character,sc_json)
S3method(as.character,sc_schema_uri)
S3method(as.data.frame,sc_data)
S3method(format,pillar_shaft_ogd_file)
S3method(format,sc_schema_uri)
S3method(format,sdmx_table)
S3method(pillar_shaft,ogd_file)
S3method(pillar_shaft,sc_dttm)
S3method(pillar_shaft,sc_schema_type)
S3method(pillar_shaft,sc_schema_uri)
S3method(print,od_cache_file)
S3method(print,od_json)
S3method(print,od_revisions)
S3method(print,od_table)
S3method(print,sc_rate_limit_table)
S3method(print,sc_schema)
S3method(print,sc_schema_flatten)
S3method(print,sc_table)
S3method(print,sc_tibble_meta)
S3method(print,sc_url)
S3method(print,sdmx_description)
S3method(tbl_format_footer,sc_meta)
S3method(tbl_sum,sc_meta)
S3method(tbl_sum,sc_tibble)
export("%>%")
export(od_cache_clear)
export(od_cache_dir)
Expand Down Expand Up @@ -53,6 +66,7 @@ export(sc_last_error_parsed)
export(sc_rate_limit_schema)
export(sc_rate_limit_table)
export(sc_rate_limits)
export(sc_recode)
export(sc_schema)
export(sc_schema_catalogue)
export(sc_schema_db)
Expand All @@ -62,6 +76,10 @@ export(sc_table_custom)
export(sc_table_saved)
export(sc_table_saved_list)
export(sc_tabulate)
export(sdmx_table)
importFrom(magrittr,"%<>%")
importFrom(magrittr,"%>%")
importFrom(magrittr,"%T>%")
importFrom(pillar,pillar_shaft)
importFrom(pillar,tbl_format_footer)
importFrom(pillar,tbl_sum)
17 changes: 15 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
# Upcoming (0.6.0)

* Update print methods with the `{tibble}` package (#32)

# STATcubeR 0.5.2

* Add filters and other recodes to `sc_table_custom()` (#33)
* Add global option `STATcubeR.language` to override the default language
* `od_table()`: Add descriptions to `x$header` and `x$field(i)`
* Depend on cli >= 3.4.1 (@matmo, #35)
* Allow json strings in `sc_table()` (@matmo, #36)
* add `sdmx_table()` to import sdmx archives (.zip)

# STATcubeR 0.5.0

* adapt `od_list()` to data.statistik.at update ([`2249b66`](https://github.com/statistikat/STATcubeR/commit/2249b6607cb822a4aac56c6258cbe967832171f1))
Expand Down Expand Up @@ -38,7 +51,7 @@
* Allow recodes of `sc_data` objects (#17)
* Better parsing of time variables (#15, #16)
* Use bootstrap 5 and `{pkgdown}` 2.0.0 for the website
* Allow export and import of open data using tar archves (#20)
* Allow export and import of open data using tar archives (#20)

# STATcubeR 0.2.4

Expand Down Expand Up @@ -85,7 +98,7 @@ This version finalizes #11
https://data.statistik.gv.at/

* new class `od_table` to get OGD data
* methods to tabulate reponses
* methods to tabulate responses
* caching
* four new pkgdown articles for `od_table()`, `od_list()`, `od_resource()` and `sc_data`

Expand Down
4 changes: 2 additions & 2 deletions R/browse.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ sc_browse <- function(server = "ext") {
sc_url(sc_url_gui(server), "home")
}

#' @describeIn sc_browse opens the preference menu with the api key
#' @describeIn sc_browse opens the preference menu with the API key
#' @examples
#' sc_browse_preferences()
#' @export
Expand Down Expand Up @@ -75,7 +75,7 @@ in_stat <- function() {
}

sc_url_gui <- function(server = "ext") {
if (server == "ext" && !in_stat())
if (server == "ext" && (!in_stat() || Sys.getenv("NOT_CRAN") != ""))
return("https://portal.statistik.at/statistik.at/ext/statcube/")
if (server == "test")
return("http://sdbtest:8081/statistik.at/wdev/statcube/")
Expand Down
8 changes: 4 additions & 4 deletions R/cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#' Caching can be set up using environment variables. To set up a persistent cache
#' for both Open Data and the REST API, the following lines in `.Renviron` can
#' be used.
#' The paths in this example are only applicalble for UNIX-based operating systems.
#' The paths in this example are only applicable for UNIX-based operating systems.
#'
#' ```sh
#' STATCUBE_KEY_EXT = YOUR_API_KEY_GOES_HERE
Expand All @@ -23,7 +23,7 @@
#' Caching is not implemented for the
#' endpoints [sc_info()] and [sc_rate_limit_table()].
#' @rdname sc_cache
#' @param verbose print instuctions on how to set up caching persistently
#' @param verbose print instructions on how to set up caching persistently
#' via environment variables?
#' @name sc_cache
NULL
Expand All @@ -49,14 +49,14 @@ sc_cache_disable <- function() {
Sys.unsetenv("STATCUBE_CACHE")
}

#' @describeIn sc_cache informs wether the cache is currently enabled
#' @describeIn sc_cache informs whether the cache is currently enabled
#' @export
sc_cache_enabled <- function() {
Sys.getenv("STATCUBE_CACHE") != ""
}

#' @export
#' @param dir a chace directory
#' @param dir a cache directory
#' @describeIn sc_cache get/set the directory used for caching
sc_cache_dir <- function(dir = NULL) {
if (is.null(dir))
Expand Down
6 changes: 3 additions & 3 deletions R/error.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
#' Error handling for the STATcube REST API
#'
#' @description
#' In case API requests are unsuccessfull, `STATcubeR` will throw errors
#' In case API requests are unsuccessful, `STATcubeR` will throw errors
#' to summarize the httr error type and its meaning.
#' Requests are considered unsuccessfull if one of the following applies
#' Requests are considered unsuccessful if one of the following applies
#' * The response returns `TRUE` for `httr::http_error()`.
#' * The response is not of type `"application/json"`
#'
#' In some cases it is useful to get direct access to a faulty response object.
#' For that purpose, it is possible to use [sc_last_error()] which will provide
#' the httr response object for the last unsuccessfull request.
#' the httr response object for the last unsuccessful request.
#' @return The return value from `httr::GET()` or `httr::POST()`.
#' @examplesIf sc_key_exists()
#' try(sc_table_saved("invalid_id"))
Expand Down
15 changes: 8 additions & 7 deletions R/od_cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#' od_downloads()
#' @details
#' [od_cache_summary()] provides an overview of all contents of the cache through
#' a data.frame. It hasone row for each dataset and the following columns.
#' a data.frame. It has one row for each dataset and the following columns.
#' All file sizes are given in bytes
#' - **`id`** the dataset id
#' - **`updated`** the last modified time for `${id}.json`
Expand All @@ -41,7 +41,7 @@ od_cache_summary <- function(server = "ext") {
field <- substr(files[is_field], 1 + pos_underscore[is_field], nchar(files[is_field]) - 4)
id <- substr(files[is_field], 1, pos_underscore[is_field] - 1)
sizes_fields <- file.size(file.path(od_cache_dir(), files[is_field])) %>% split(id) %>% sapply(sum)
fields <- data.frame(id, field, stringsAsFactors = FALSE)
fields <- list(id = id, field = field)

files <- files[!is_field]
pos_underscore <- as.integer(gregexpr("_HEADER", files))
Expand All @@ -50,17 +50,18 @@ od_cache_summary <- function(server = "ext") {
files <- files[!is_header]
id_data <- substr(files, 1, nchar(files) - 4)
all_ids <- unique(c(id_data, id_header, fields$id))
data.frame(
id = all_ids,
res <- data_frame(
id = all_ids %>% `class<-`(c("ogd_id", "character")),
updated = file.mtime(paste0(cache_dir, all_ids, ".json")),
json = file.size(paste0(cache_dir, all_ids, ".json")),
data = file.size(paste0(cache_dir, all_ids, ".csv")),
header = file.size(paste0(cache_dir, all_ids, "_HEADER.csv")),
fields = sizes_fields[match(unique(fields$id), all_ids)],
n_fields = match(fields$id, all_ids) %>% factor(seq_along(all_ids)) %>%
table() %>% as.integer(),
row.names = NULL, stringsAsFactors = FALSE
) %>% `class<-`(c("tbl", "data.frame"))
table() %>% as.integer()
)
class(res$updated) <- c("sc_dttm", class(res$updated))
res
}


Expand Down
32 changes: 20 additions & 12 deletions R/od_list.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#' [od_list()] returns a `data.frame ` containing all datasets published at
#' [data.statistik.gv.at](https://data.statistik.gv.at)
#'
#' @param unique some datasets are pulbished under multiple groups.
#' @param unique some datasets are published under multiple groups.
#' They will only be listed once with the first group they appear in unless
#' this parameter is set to `FALSE`.
#' @param server the open data server to use. Either `ext` for the external
Expand Down Expand Up @@ -43,11 +43,10 @@ od_list <- function(unique = TRUE, server = c("ext", "red")) {
xml2::xml_find_all(".//a")

# ids
df <- data.frame(
category = "NA",
df <- data_frame(
category = rep("NA", length(el)),
id = el %>% xml2::xml_attr("aria-label"),
label = el %>% xml2::xml_text(),
stringsAsFactors = FALSE
label = el %>% xml2::xml_text()
)

ignored_labels <- c("[Alle \u00f6ffnen]", "[Alle schlie\u00dfen]",
Expand All @@ -67,7 +66,9 @@ od_list <- function(unique = TRUE, server = c("ext", "red")) {
df <- df[!(df$id %in% od_resource_blacklist), ]
rownames(df) <- NULL
attr(df, "od") <- r$times[["total"]]
df %>% `class<-`(c("tbl", "data.frame"))
class(df$id) <- c("ogd_id", "character")
class(df) <- c("tbl_df", class(df))
df
}

#' Get a catalogue for OGD datasets
Expand Down Expand Up @@ -95,7 +96,7 @@ od_list <- function(unique = TRUE, server = c("ext", "red")) {
#' |json |`list<od_json>`| Full json metadata
#'
#' The type `datetime` refers to the `POSIXct` format as returned by [Sys.time()].
#' The last column `"json"` containes the full json metadata as returned by
#' The last column `"json"` contains the full json metadata as returned by
#' [od_json()].
#'
#' @inheritParams od_table
Expand All @@ -120,7 +121,15 @@ od_catalogue <- function(server = "ext", local = TRUE) {
ids <- od_revisions(server = server)
}
timestamp <- switch(as.character(local), "TRUE" = NULL, "FALSE" = Sys.time())
jsons <- lapply(ids, od_json, timestamp, server)
jsons <- lapply(
cli::cli_progress_along(
ids, type = "tasks", "downloading json metadata files"),
function(i) {
od_json(ids[i], timestamp, server)
}
)
if (!local)
cli::cli_text("\rDownloaded {.field {length(ids)}} metadata files with {.fn od_json}")
as_df_jsons(jsons)
}

Expand All @@ -130,7 +139,7 @@ as_df_jsons <- function(jsons) {
}

descs <- sapply(jsons, function(x) x$extras$attribute_description) %>% paste0(";", .)
out <- data.frame(
out <- data_frame(
title = sapply(jsons, function(x) x$title),
measures = gregexpr(";F-", descs) %>% sapply(length),
fields = gregexpr(";C-", descs) %>% sapply(length),
Expand All @@ -145,12 +154,11 @@ as_df_jsons <- function(jsons) {
update_frequency = sapply(jsons, function(x) x$extras$update_frequency),
tags = I(lapply(jsons, function(x) unlist(x$tags))),
categorization = sapply(jsons, function(x) unlist(x$extras$categorization[1])),
json = I(jsons),
stringsAsFactors = FALSE
json = I(jsons)
)
out$modified <- parse_time(out$modified)
out$created <- parse_time(out$created)
class(out) <- c("tbl", class(out))
class(out$id) <- c("ogd_id", "character")
out
}

19 changes: 11 additions & 8 deletions R/od_resource.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ od_resource_blacklist <- c(
)

od_resource_check_id <- function(id) {
if (substr(id, 1, 4) != "OGD_")
stop("Dataset ids must begin with \"OGD_\": ", shQuote(id), call. = FALSE)
if (!grepl("^OGD_", id) && !grepl("^STAT_", id))
stop("Dataset ids must begin with \"OGD_\" or \"STAT_\": ",
shQuote(id), call. = FALSE)
if (id %in% od_resource_blacklist)
stop("Dataset ", shQuote(id), " was blacklisted in STATcubeR ",
"because of inconsistent formats", call. = FALSE)
Expand Down Expand Up @@ -141,15 +142,14 @@ od_resource_parse_all <- function(resources, server = "ext") {
})
od <- lapply(parsed, attr, "od")

data.frame(
data_frame(
name = sapply(resources, function(x) x$name),
last_modified = lapply(od, function(x) x$last_modified) %>% do.call(c, .),
cached = lapply(od, function(x) x$cached) %>% do.call(c, .),
size = sapply(od, function(x) x$size),
download = vapply(od, function(x) x$download, 1.0),
parsed = sapply(od, function(x) x$parsed),
data = I(parsed %>% lapply(`attr<-`, "od", NULL)),
stringsAsFactors = FALSE
data = I(parsed %>% lapply(`attr<-`, "od", NULL))
)
}

Expand All @@ -171,9 +171,9 @@ od_resources_check <- function(json) {

od_normalize_columns <- function(x, suffix) {
if (!is.null(suffix)) {
col_indices <- c(1, 2, 2, switch(suffix, HEADER = 3, c(4, 3)))
col_indices <- c(1, 2, 2, switch(suffix, HEADER = 3, c(4, 3)), 5, 7)
col_names <- c("code", "label", "label_de", "label_en",
switch(suffix, HEADER = NULL, "parent"))
switch(suffix, HEADER = NULL, "parent"), "de_desc", "en_desc")
x <- x[, col_indices] %>% `names<-`(col_names)
x$label <- NA_character_
x$label_en <- as.character(x$label_en)
Expand Down Expand Up @@ -224,5 +224,8 @@ od_resource_all <- function(id, json = od_json(id), server = "ext") {
check_header(out$data[[2]])
out$data[[2]] %<>% od_normalize_columns("HEADER")
out$data[seq(3, nrow(out))] %<>% lapply(od_normalize_columns, "FIELD")
out %>% `class<-`(c("tbl", "data.frame"))
class(out$name) <- c("ogd_file", "character")
class(out$last_modified) <- c("sc_dttm", class(out$last_modified))
class(out$cached) <- c("sc_dttm", class(out$cached))
out
}
Loading

0 comments on commit a24efcc

Please sign in to comment.