diff --git a/DESCRIPTION b/DESCRIPTION index 5bd0dde0..58e9ba1b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: STATcubeR Title: R interface for the STATcube REST API and Open Government Data -Version: 0.5.0 +Version: 0.5.2 Authors@R: c( person("Gregor", "de Cillia", , "Gregor.deCillia@statistik.gv.at", role = "aut"), person("Bernhard", "Meindl", , "Bernhard.Meindl@statistik.gv.at", role = "ctb"), @@ -15,14 +15,17 @@ Description: Import data from the STATcube REST API or from the open data License: GPL (>= 2) URL: https://statistikat.github.io/STATcubeR, https://github.com/statistikat/STATcubeR +BugReports: https://github.com/statistikat/STATcubeR/issues Imports: cli (>= 3.4.1), httr, jsonlite, - magrittr -Suggests: + magrittr, + pillar (>= 1.5.0), + vctrs (>= 0.5.2) +Suggests: + spelling, data.tree, - pillar, rappdirs, xml2, reactable, @@ -32,3 +35,4 @@ Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 +Language: en-US \ No newline at end of file diff --git a/NAMESPACE b/NAMESPACE index 5e0e91da..8336ba67 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,16 +2,29 @@ S3method(as.character,od_json) S3method(as.character,sc_json) +S3method(as.character,sc_schema_uri) S3method(as.data.frame,sc_data) +S3method(format,pillar_shaft_ogd_file) +S3method(format,sc_schema_uri) +S3method(format,sdmx_table) +S3method(pillar_shaft,ogd_file) +S3method(pillar_shaft,sc_dttm) +S3method(pillar_shaft,sc_schema_type) +S3method(pillar_shaft,sc_schema_uri) S3method(print,od_cache_file) S3method(print,od_json) S3method(print,od_revisions) S3method(print,od_table) S3method(print,sc_rate_limit_table) S3method(print,sc_schema) +S3method(print,sc_schema_flatten) S3method(print,sc_table) S3method(print,sc_tibble_meta) S3method(print,sc_url) +S3method(print,sdmx_description) +S3method(tbl_format_footer,sc_meta) +S3method(tbl_sum,sc_meta) +S3method(tbl_sum,sc_tibble) export("%>%") export(od_cache_clear) export(od_cache_dir) @@ -53,6 +66,7 @@ export(sc_last_error_parsed) export(sc_rate_limit_schema) export(sc_rate_limit_table) export(sc_rate_limits) +export(sc_recode) export(sc_schema) export(sc_schema_catalogue) export(sc_schema_db) @@ -62,6 +76,10 @@ export(sc_table_custom) export(sc_table_saved) export(sc_table_saved_list) export(sc_tabulate) +export(sdmx_table) importFrom(magrittr,"%<>%") importFrom(magrittr,"%>%") importFrom(magrittr,"%T>%") +importFrom(pillar,pillar_shaft) +importFrom(pillar,tbl_format_footer) +importFrom(pillar,tbl_sum) diff --git a/NEWS.md b/NEWS.md index 2073ecf3..13c5213f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,16 @@ +# Upcoming (0.6.0) + +* Update print methods with the `{tibble}` package (#32) + +# STATcubeR 0.5.2 + +* Add filters and other recodes to `sc_table_custom()` (#33) +* Add global option `STATcubeR.language` to override the default language +* `od_table()`: Add descriptions to `x$header` and `x$field(i)` +* Depend on cli >= 3.4.1 (@matmo, #35) +* Allow json strings in `sc_table()` (@matmo, #36) +* add `sdmx_table()` to import sdmx archives (.zip) + # STATcubeR 0.5.0 * adapt `od_list()` to data.statistik.at update ([`2249b66`](https://github.com/statistikat/STATcubeR/commit/2249b6607cb822a4aac56c6258cbe967832171f1)) @@ -38,7 +51,7 @@ * Allow recodes of `sc_data` objects (#17) * Better parsing of time variables (#15, #16) * Use bootstrap 5 and `{pkgdown}` 2.0.0 for the website -* Allow export and import of open data using tar archves (#20) +* Allow export and import of open data using tar archives (#20) # STATcubeR 0.2.4 @@ -85,7 +98,7 @@ This version finalizes #11 https://data.statistik.gv.at/ * new class `od_table` to get OGD data -* methods to tabulate reponses +* methods to tabulate responses * caching * four new pkgdown articles for `od_table()`, `od_list()`, `od_resource()` and `sc_data` diff --git a/R/browse.R b/R/browse.R index b31a541d..9347ccb4 100644 --- a/R/browse.R +++ b/R/browse.R @@ -14,7 +14,7 @@ sc_browse <- function(server = "ext") { sc_url(sc_url_gui(server), "home") } -#' @describeIn sc_browse opens the preference menu with the api key +#' @describeIn sc_browse opens the preference menu with the API key #' @examples #' sc_browse_preferences() #' @export @@ -75,7 +75,7 @@ in_stat <- function() { } sc_url_gui <- function(server = "ext") { - if (server == "ext" && !in_stat()) + if (server == "ext" && (!in_stat() || Sys.getenv("NOT_CRAN") != "")) return("https://portal.statistik.at/statistik.at/ext/statcube/") if (server == "test") return("http://sdbtest:8081/statistik.at/wdev/statcube/") diff --git a/R/cache.R b/R/cache.R index 9d471d52..0f9860e9 100644 --- a/R/cache.R +++ b/R/cache.R @@ -10,7 +10,7 @@ #' Caching can be set up using environment variables. To set up a persistent cache #' for both Open Data and the REST API, the following lines in `.Renviron` can #' be used. -#' The paths in this example are only applicalble for UNIX-based operating systems. +#' The paths in this example are only applicable for UNIX-based operating systems. #' #' ```sh #' STATCUBE_KEY_EXT = YOUR_API_KEY_GOES_HERE @@ -23,7 +23,7 @@ #' Caching is not implemented for the #' endpoints [sc_info()] and [sc_rate_limit_table()]. #' @rdname sc_cache -#' @param verbose print instuctions on how to set up caching persistently +#' @param verbose print instructions on how to set up caching persistently #' via environment variables? #' @name sc_cache NULL @@ -49,14 +49,14 @@ sc_cache_disable <- function() { Sys.unsetenv("STATCUBE_CACHE") } -#' @describeIn sc_cache informs wether the cache is currently enabled +#' @describeIn sc_cache informs whether the cache is currently enabled #' @export sc_cache_enabled <- function() { Sys.getenv("STATCUBE_CACHE") != "" } #' @export -#' @param dir a chace directory +#' @param dir a cache directory #' @describeIn sc_cache get/set the directory used for caching sc_cache_dir <- function(dir = NULL) { if (is.null(dir)) diff --git a/R/error.R b/R/error.R index cf04b9e1..47595ec7 100644 --- a/R/error.R +++ b/R/error.R @@ -1,15 +1,15 @@ #' Error handling for the STATcube REST API #' #' @description -#' In case API requests are unsuccessfull, `STATcubeR` will throw errors +#' In case API requests are unsuccessful, `STATcubeR` will throw errors #' to summarize the httr error type and its meaning. -#' Requests are considered unsuccessfull if one of the following applies +#' Requests are considered unsuccessful if one of the following applies #' * The response returns `TRUE` for `httr::http_error()`. #' * The response is not of type `"application/json"` #' #' In some cases it is useful to get direct access to a faulty response object. #' For that purpose, it is possible to use [sc_last_error()] which will provide -#' the httr response object for the last unsuccessfull request. +#' the httr response object for the last unsuccessful request. #' @return The return value from `httr::GET()` or `httr::POST()`. #' @examplesIf sc_key_exists() #' try(sc_table_saved("invalid_id")) diff --git a/R/od_cache.R b/R/od_cache.R index 25fef480..8f7dbf1f 100644 --- a/R/od_cache.R +++ b/R/od_cache.R @@ -15,7 +15,7 @@ #' od_downloads() #' @details #' [od_cache_summary()] provides an overview of all contents of the cache through -#' a data.frame. It hasone row for each dataset and the following columns. +#' a data.frame. It has one row for each dataset and the following columns. #' All file sizes are given in bytes #' - **`id`** the dataset id #' - **`updated`** the last modified time for `${id}.json` @@ -41,7 +41,7 @@ od_cache_summary <- function(server = "ext") { field <- substr(files[is_field], 1 + pos_underscore[is_field], nchar(files[is_field]) - 4) id <- substr(files[is_field], 1, pos_underscore[is_field] - 1) sizes_fields <- file.size(file.path(od_cache_dir(), files[is_field])) %>% split(id) %>% sapply(sum) - fields <- data.frame(id, field, stringsAsFactors = FALSE) + fields <- list(id = id, field = field) files <- files[!is_field] pos_underscore <- as.integer(gregexpr("_HEADER", files)) @@ -50,17 +50,18 @@ od_cache_summary <- function(server = "ext") { files <- files[!is_header] id_data <- substr(files, 1, nchar(files) - 4) all_ids <- unique(c(id_data, id_header, fields$id)) - data.frame( - id = all_ids, + res <- data_frame( + id = all_ids %>% `class<-`(c("ogd_id", "character")), updated = file.mtime(paste0(cache_dir, all_ids, ".json")), json = file.size(paste0(cache_dir, all_ids, ".json")), data = file.size(paste0(cache_dir, all_ids, ".csv")), header = file.size(paste0(cache_dir, all_ids, "_HEADER.csv")), fields = sizes_fields[match(unique(fields$id), all_ids)], n_fields = match(fields$id, all_ids) %>% factor(seq_along(all_ids)) %>% - table() %>% as.integer(), - row.names = NULL, stringsAsFactors = FALSE - ) %>% `class<-`(c("tbl", "data.frame")) + table() %>% as.integer() + ) + class(res$updated) <- c("sc_dttm", class(res$updated)) + res } diff --git a/R/od_list.R b/R/od_list.R index 2eff9fd8..937ed554 100644 --- a/R/od_list.R +++ b/R/od_list.R @@ -3,7 +3,7 @@ #' [od_list()] returns a `data.frame ` containing all datasets published at #' [data.statistik.gv.at](https://data.statistik.gv.at) #' -#' @param unique some datasets are pulbished under multiple groups. +#' @param unique some datasets are published under multiple groups. #' They will only be listed once with the first group they appear in unless #' this parameter is set to `FALSE`. #' @param server the open data server to use. Either `ext` for the external @@ -43,11 +43,10 @@ od_list <- function(unique = TRUE, server = c("ext", "red")) { xml2::xml_find_all(".//a") # ids - df <- data.frame( - category = "NA", + df <- data_frame( + category = rep("NA", length(el)), id = el %>% xml2::xml_attr("aria-label"), - label = el %>% xml2::xml_text(), - stringsAsFactors = FALSE + label = el %>% xml2::xml_text() ) ignored_labels <- c("[Alle \u00f6ffnen]", "[Alle schlie\u00dfen]", @@ -67,7 +66,9 @@ od_list <- function(unique = TRUE, server = c("ext", "red")) { df <- df[!(df$id %in% od_resource_blacklist), ] rownames(df) <- NULL attr(df, "od") <- r$times[["total"]] - df %>% `class<-`(c("tbl", "data.frame")) + class(df$id) <- c("ogd_id", "character") + class(df) <- c("tbl_df", class(df)) + df } #' Get a catalogue for OGD datasets @@ -95,7 +96,7 @@ od_list <- function(unique = TRUE, server = c("ext", "red")) { #' |json |`list`| Full json metadata #' #' The type `datetime` refers to the `POSIXct` format as returned by [Sys.time()]. -#' The last column `"json"` containes the full json metadata as returned by +#' The last column `"json"` contains the full json metadata as returned by #' [od_json()]. #' #' @inheritParams od_table @@ -120,7 +121,15 @@ od_catalogue <- function(server = "ext", local = TRUE) { ids <- od_revisions(server = server) } timestamp <- switch(as.character(local), "TRUE" = NULL, "FALSE" = Sys.time()) - jsons <- lapply(ids, od_json, timestamp, server) + jsons <- lapply( + cli::cli_progress_along( + ids, type = "tasks", "downloading json metadata files"), + function(i) { + od_json(ids[i], timestamp, server) + } + ) + if (!local) + cli::cli_text("\rDownloaded {.field {length(ids)}} metadata files with {.fn od_json}") as_df_jsons(jsons) } @@ -130,7 +139,7 @@ as_df_jsons <- function(jsons) { } descs <- sapply(jsons, function(x) x$extras$attribute_description) %>% paste0(";", .) - out <- data.frame( + out <- data_frame( title = sapply(jsons, function(x) x$title), measures = gregexpr(";F-", descs) %>% sapply(length), fields = gregexpr(";C-", descs) %>% sapply(length), @@ -145,12 +154,11 @@ as_df_jsons <- function(jsons) { update_frequency = sapply(jsons, function(x) x$extras$update_frequency), tags = I(lapply(jsons, function(x) unlist(x$tags))), categorization = sapply(jsons, function(x) unlist(x$extras$categorization[1])), - json = I(jsons), - stringsAsFactors = FALSE + json = I(jsons) ) out$modified <- parse_time(out$modified) out$created <- parse_time(out$created) - class(out) <- c("tbl", class(out)) + class(out$id) <- c("ogd_id", "character") out } diff --git a/R/od_resource.R b/R/od_resource.R index 71dd364f..70da1ff8 100644 --- a/R/od_resource.R +++ b/R/od_resource.R @@ -20,8 +20,9 @@ od_resource_blacklist <- c( ) od_resource_check_id <- function(id) { - if (substr(id, 1, 4) != "OGD_") - stop("Dataset ids must begin with \"OGD_\": ", shQuote(id), call. = FALSE) + if (!grepl("^OGD_", id) && !grepl("^STAT_", id)) + stop("Dataset ids must begin with \"OGD_\" or \"STAT_\": ", + shQuote(id), call. = FALSE) if (id %in% od_resource_blacklist) stop("Dataset ", shQuote(id), " was blacklisted in STATcubeR ", "because of inconsistent formats", call. = FALSE) @@ -141,15 +142,14 @@ od_resource_parse_all <- function(resources, server = "ext") { }) od <- lapply(parsed, attr, "od") - data.frame( + data_frame( name = sapply(resources, function(x) x$name), last_modified = lapply(od, function(x) x$last_modified) %>% do.call(c, .), cached = lapply(od, function(x) x$cached) %>% do.call(c, .), size = sapply(od, function(x) x$size), download = vapply(od, function(x) x$download, 1.0), parsed = sapply(od, function(x) x$parsed), - data = I(parsed %>% lapply(`attr<-`, "od", NULL)), - stringsAsFactors = FALSE + data = I(parsed %>% lapply(`attr<-`, "od", NULL)) ) } @@ -171,9 +171,9 @@ od_resources_check <- function(json) { od_normalize_columns <- function(x, suffix) { if (!is.null(suffix)) { - col_indices <- c(1, 2, 2, switch(suffix, HEADER = 3, c(4, 3))) + col_indices <- c(1, 2, 2, switch(suffix, HEADER = 3, c(4, 3)), 5, 7) col_names <- c("code", "label", "label_de", "label_en", - switch(suffix, HEADER = NULL, "parent")) + switch(suffix, HEADER = NULL, "parent"), "de_desc", "en_desc") x <- x[, col_indices] %>% `names<-`(col_names) x$label <- NA_character_ x$label_en <- as.character(x$label_en) @@ -224,5 +224,8 @@ od_resource_all <- function(id, json = od_json(id), server = "ext") { check_header(out$data[[2]]) out$data[[2]] %<>% od_normalize_columns("HEADER") out$data[seq(3, nrow(out))] %<>% lapply(od_normalize_columns, "FIELD") - out %>% `class<-`(c("tbl", "data.frame")) + class(out$name) <- c("ogd_file", "character") + class(out$last_modified) <- c("sc_dttm", class(out$last_modified)) + class(out$cached) <- c("sc_dttm", class(out$cached)) + out } diff --git a/R/od_revisions.R b/R/od_revisions.R index e3a7122f..02d954e0 100644 --- a/R/od_revisions.R +++ b/R/od_revisions.R @@ -3,7 +3,7 @@ #' Use the `/revision` endpoint of the OGD server to get a list #' of all datasets that have changed since a certain timestamp. #' @param since (optional) A timestamp. If supplied, only datasets updated -#' later will be returned. Otherwise, all datasets are retured. +#' later will be returned. Otherwise, all datasets are returned. #' Can be in either one of the following formats #' * a native R time type that is compatible with `strftime()` #' such as the return values of `Sys.Date()`, `Sys.time()` and `file.mtime()`. @@ -11,7 +11,7 @@ #' * a string of the form `YYYY-MM-DDThh:mm:ss` to specify a day and a time. #' @param exclude_ext If `TRUE` (default) exclude all results that have #' `OGDEXT_` as a prefix -#' @return a character verctor with dataset ids +#' @return a character vector with dataset ids #' @inheritParams od_list #' @examples #' # get all datasets (including OGDEXT_*) @@ -41,15 +41,15 @@ print.od_revisions <- function(x, ...) { since <- attr(x, "since") response <- attr(x, "response") if (!is.null(since)) - cli::cli_text("{.strong {length(x)}} changes between + cli::format_inline("{.strong {length(x)}} changes between {.timestamp {attr(x, 'since')}} and - {.timestamp {response$date}}") + {.timestamp {response$date}}") %>% cat() else - cli::cli_text("{.strong {length(x)}} datasets are available - ({.timestamp {response$date}})") + cli::format_inline("{.strong {length(x)}} datasets are available ", + "({.timestamp {response$date}})\n") %>% cat() if (length(x) > 0) { y <- cli::cli_vec(x, list("vec-trunc" = 3)) - cli::cli_text("{.strong ids}: {.emph {y}}") + cli::format_inline("{.strong ids}: {.emph {y}}") %>% cat() } invisible(x) } diff --git a/R/od_table.R b/R/od_table.R index 60a4613d..20c81252 100644 --- a/R/od_table.R +++ b/R/od_table.R @@ -14,7 +14,7 @@ #' `$field(code) `| `https://data.statistik.gv.at/data/${id}_${code}.csv` #' `$json `| `https://data.statistik.gv.at/ogd/json?dataset=${id}` #' -#' @param id the id of the data-set that should be accessed +#' @param id the id of the dataset that should be accessed #' @param language language to be used for labeling. `"en"` or `"de"` #' @param server the OGD-server to be used. `"ext"` (the default) for the #' external server or `prod` for the production server @@ -66,7 +66,7 @@ od_table_class <- R6::R6Class( public = list( #' @description This class is not exported. Use [od_table()] to #' initialize objects of class `od_table`. - #' @param id the id of the data-set that should be accessed + #' @param id the id of the dataset that should be accessed #' @param language language to be used for labeling. `"en"` or `"de"` #' @param server the OGD-Server server to be used initialize = function(id, language = NULL, server = "ext") { @@ -105,9 +105,13 @@ od_table_class <- R6::R6Class( private$cache$header %>% sc_tibble_meta(c("label_de", "label_en")) }, #' @field resources - #' lists all files downloaded from the server to contruct this table + #' lists all files downloaded from the server to construct this table resources = function() { - private$cache$resources %>% `class<-`(c("tbl", "data.frame")) + resources <- private$cache$resources + class(resources$name) <- c("ogd_file", "character") + class(resources$last_modified) <- c("sc_dttm", class(resources$last_modified)) + class(resources$cached) <- c("sc_dttm", class(resources$cached)) + resources }, #' @field od_server #' The server used for initialization (see to `?od_table`) diff --git a/R/od_table_save.R b/R/od_table_save.R index 704ba3c3..22dc9d52 100644 --- a/R/od_table_save.R +++ b/R/od_table_save.R @@ -83,9 +83,16 @@ od_table_local <- function(file) { od_table_local_paths <- function() { extracted <- dir() stopifnot(length(extracted) == 1) - json <- jsonlite::read_json(file.path(extracted, "meta.json")) + json_file <- file.path(extracted, "meta.json") + json <- jsonlite::read_json(json_file) id <- json$resources[[1]]$name stopifnot(is.character(id), length(id) == 1) + if (json$extras$metadata_modified == "$PublDateTime$") { + readLines(json_file) %>% + gsub("\\$PublDateTime\\$", json$extras$begin_datetime, .) %>% + writeLines(json_file) + json <- jsonlite::read_json(json_file) + } timestamps <- sapply(json$resources, function(x) x$last_modified) %>% as.POSIXct(format = "%Y-%m-%dT%H:%M:%OS") stopifnot(all(timestamps <= Sys.time())) @@ -93,7 +100,7 @@ od_table_local_paths <- function() { classifications = dir(file.path(extracted, "classifications"), full.names = TRUE), data = file.path(extracted, "data.csv"), header = file.path(extracted, "header.csv"), - meta = file.path(extracted, "meta.json"), + meta = json_file, id = id ) stopifnot(all(file.exists(c(paths$data, paths$header, paths$meta)))) diff --git a/R/od_utils.R b/R/od_utils.R index 0093e4db..7e432f5b 100644 --- a/R/od_utils.R +++ b/R/od_utils.R @@ -29,19 +29,18 @@ od_attr <- function(json) { code <- c(code, substr(desc, index_code[i] + 1, next_col - 1)) label <- c(label, substr(desc, next_col + 1, index_end[i])) } - data.frame(label = label, code = code, stringsAsFactors = FALSE) + data_frame(label = label, code = code) } od_create_data <- function(id, json = od_json(id), lang = NULL, - server = "ext", verbose = FALSE) { + server = "ext") { lang <- sc_language(lang) resources <- od_resource_all(id, json, server) dat <- resources$data[[1]] header <- resources$data[[2]] meta <- list( - source = data.frame(code = id, label = NA, label_de = json$title, - label_en = json$extras$en_title_and_desc, - stringsAsFactors = FALSE), + source = data_frame(code = id, label = NA, label_de = json$title, + label_en = json$extras$en_title_and_desc), measures = header[substr(header$code, 1, 1) == "F", ], fields = header[substr(header$code, 1, 1) == "C", ] ) @@ -49,12 +48,6 @@ od_create_data <- function(id, json = od_json(id), lang = NULL, fields <- lapply(seq_along(meta$fields$code), function(i) { code <- meta$fields$code[i] fld <- resources$data[[2 + i]] - udc <- unique(dat[[code]]) - stopifnot(all(udc %in% fld$code)) - if (verbose && length(udc) != nrow(fld)) - message("dropping unused levels in ", shQuote(code), ": ", - paste(shQuote(setdiff(fld$code, udc)), collapse = ", ")) - fld <- fld[fld$code %in% udc, ] fld$label_en[is.na(fld$label_en)] <- fld$label_de[is.na(fld$label_en)] fld }) @@ -81,10 +74,10 @@ od_create_data <- function(id, json = od_json(id), lang = NULL, resources$name <- paste0(resources$name, ".csv") od <- attr(json, "od") - resources <- rbind(data.frame( + resources <- rbind(data_frame( name = paste0(id, ".json"), last_modified = json$extras$metadata_modified %>% as.POSIXct(format = "%Y-%m-%dT%H:%M:%OS"), cached = od$cached, - size = od$size, download = od$download, parsed = NA, stringsAsFactors = FALSE), resources[1:6] + size = od$size, download = od$download, parsed = NA), resources[1:6] ) list(data = dat, meta = meta, fields = fields, resources = resources, diff --git a/R/other_endpoints.R b/R/other_endpoints.R index b8b33872..6cd40fe6 100644 --- a/R/other_endpoints.R +++ b/R/other_endpoints.R @@ -25,7 +25,7 @@ sc_info <- function(language = c("en", "de"), key = NULL, server = "ext") { info_content <- httr::content(response) info_content$languages %>% lapply(function(x) - data.frame(locale = x$locale, displayName = x$displayName)) %>% + data_frame(locale = x$locale, displayName = x$displayName)) %>% do.call(rbind, .) } @@ -34,8 +34,8 @@ sc_info <- function(language = c("en", "de"), key = NULL, server = "ext") { #' * `remaining` how much requests can be sent to the `/table` #' endpoint until the rate limit is reached. #' * `limit` the number of requests allowed per hour. -#' * `reset` a tiestamp when the rate limit will be reset. -#' Ususally, this should be less than one hour `after the current time. +#' * `reset` a timestamp when the rate limit will be reset. +#' Usually, this should be less than one hour `after the current time. #' @export sc_rate_limit_table <- function(language = c("en", "de"), key = NULL, server = "ext") { response <- httr::GET( diff --git a/R/print.R b/R/print.R index 03c7c3d9..fc3c1b78 100644 --- a/R/print.R +++ b/R/print.R @@ -8,6 +8,8 @@ sc_tibble_meta <- function(x, names_keep = c()) { x } +#' @importFrom pillar tbl_sum +#' @export tbl_sum.sc_meta <- function(x, ...) { paste0("STATcubeR metadata: ", format(nrow(x), big.mark = ","), " x ", ncol(x) + length(attr(x, "names_skip"))) @@ -15,6 +17,8 @@ tbl_sum.sc_meta <- function(x, ...) { style_subtle <- cli::make_ansi_style('#999999') +#' @importFrom pillar tbl_format_footer +#' @export tbl_format_footer.sc_meta <- function(x, setup, ...) { names_skip <- attr(x, "names_skip") c(NextMethod(), if (length(names_skip)) style_subtle( @@ -35,10 +39,82 @@ print.sc_tibble_meta <- function(x, ...) { } sc_tibble <- function(x) { - class(x) <- c("sc_tibble", "tbl", class(x)) + class(x) <- unique(c("sc_tibble", "tbl", class(x))) x } +#' @importFrom pillar tbl_sum +#' @export tbl_sum.sc_tibble <- function(x, ...) { paste0("A STATcubeR tibble: ", format(nrow(x), big.mark = ","), " x ", ncol(x)) } + +#' @importFrom pillar pillar_shaft +#' @export +pillar_shaft.sc_dttm <- function(x, ...) { + ymd <- format(x, "%Y-%m-%d") + hms <- cli::col_silver(format(x, "%H:%M:%S")) + short <- ymd + ind <- !is.na(x) & as.numeric(Sys.time()) - as.numeric(x) < 60*24 + short[ind] <- hms[ind] + long <- paste(ymd, hms) + long[is.na(x)] <- NA + short[is.na(x)] <- NA + pillar::new_pillar_shaft_simple( + long, + width = 19, + min_width = 10, + short_formatted = short, + type_sum = "dttm" + ) +} + +#' @importFrom pillar pillar_shaft +#' @export +pillar_shaft.ogd_file <- function(x, ...) { + pillar::new_pillar_shaft( + list(x = x), + width = pillar::get_max_extent(x), + min_width = 20, + class = "pillar_shaft_ogd_file", + type_sum = "chr" + ) +} + +#' @export +format.pillar_shaft_ogd_file <- function(x, width, ...) { + files <- x$x + if (in_pkgdown()) { + id <- substr(files[1], 1, nchar(files[1]) - 5) + files[1:2] <- c("meta.json", "data.csv") + files <- gsub(paste0(id, "_"), "", files, fixed = TRUE) + } + too_long <- nchar(files) > width + files[too_long] <- paste0(substring(files[too_long], 1, width - 2), + cli::symbol$ellipsis) + if (in_pkgdown()) { + files <- cli::style_hyperlink( + files, paste0("https://data.statistik.gv.at/data/", x$x)) + } else { + files <- cli::style_hyperlink(files, paste0("file://", path.expand( + od_cache_dir()), x$x)) %>% as.character() + } + pillar::new_ornament(files, align = "left") +} + +pillar_shaft.ogd_id <- function(x, ...) { + pillar::new_pillar_shaft(list(x = x), width = pillar::get_max_extent(x), + min_width = 20, class = "pillar_shaft_ogd_id", + type_sum = "chr") +} + +format.pillar_shaft_ogd_id <- function(x, width, ...) { + id <- x$x + too_long <- nchar(id) > width + id[too_long] <- paste0(substring(id[too_long], 1, width - 2), + cli::symbol$ellipsis) + id <- cli::style_hyperlink(id, paste0( + "https://data.statistik.gv.at/web/meta.jsp?dataset=", x$x)) + pillar::new_ornament(id, align = "left") +} + diff --git a/R/recoder.R b/R/recoder.R index 2c45cc9d..e5f58f9c 100644 --- a/R/recoder.R +++ b/R/recoder.R @@ -1,7 +1,7 @@ #' @title Recode sc_table objects #' @description #' A collection of methods that can be used to modify an object of class -#' sc_table by reference. Typical usage is to acces the `recode` binding +#' sc_table by reference. Typical usage is to access the `recode` binding #' of an `sc_table` object and then use method chaining to perform recode #' operations. #' @@ -80,7 +80,7 @@ sc_recoder <- R6::R6Class( private$x$p_fields[[i]][j, private$l(language)] <- new invisible(self) }, - #' @description Cheange the total code for a field + #' @description Change the total code for a field #' @param field a field code #' @param new a level code for the field or `NA`. Will be used as the #' new total code. In case of `NA`, the total code will be unset. @@ -91,7 +91,7 @@ sc_recoder <- R6::R6Class( invisible(self) }, #' @description set the visibility of a level. Invisible levels are - #' ommited in the output of `$tabulate()` but don't affect aggregation + #' omitted in the output of `$tabulate()` but don't affect aggregation #' @param field a field code #' @param level a level code for the field #' @param new visibility. `TRUE` or `FALSE` diff --git a/R/sc_data.R b/R/sc_data.R index 7b4565ef..0845e78c 100644 --- a/R/sc_data.R +++ b/R/sc_data.R @@ -2,7 +2,7 @@ #' #' @description #' This class represents a common interface for datasets returned from the -#' STATcube REST API and OGD datasets. `sc_data` obects are usually created with +#' STATcube REST API and OGD datasets. `sc_data` objects are usually created with #' [od_table()] or [sc_table()]. #' @examples #' ## create a new sc_data object via od_table() @@ -40,9 +40,9 @@ sc_data <- R6::R6Class( private$recoder <- sc_recoder$new(private) }, #' @description get information about a specific field. The format of - #' the reurn value is similar to `$meta`. A `data.frame` that includes + #' the return value is similar to `$meta`. A `data.frame` that includes #' codes and labels for each level of the field. - #' @param i specifier for the field. Integer or character. If an interger + #' @param i specifier for the field. Integer or character. If an integer #' is provided, it should match the row number in `$meta$fields`. If #' a character is provided, the field is matched using [pmatch()] on #' all available codes and labels. diff --git a/R/schema.R b/R/schema.R index 25ddfe56..aac8801c 100644 --- a/R/schema.R +++ b/R/schema.R @@ -4,9 +4,9 @@ #' This endpoint can be used to get all available databases and tables #' as well as metadata about specific databases. #' -#' The main function `sc_schema()` can be used with any resouce id. +#' The main function `sc_schema()` can be used with any resource id. #' [sc_schema_catalogue()] and [sc_schema_db()] are very simple -#' wrapper functions around [`sc_schema()`] and are comparabable to the +#' wrapper functions around [`sc_schema()`] and are comparable to the #' [catalogue explorer](`r sc_browse_catalogue()`) or the #' [table view](`r sc_browse_database('deake005', open = TRUE)`) of the STATcube GUI. #' @@ -56,7 +56,7 @@ print_schema_with_tree <- function(x, ...) { #' @rdname sc_schema #' @param x an object of class `sc_schema()` i.e. the return value of #' [sc_schema()], [sc_schema_db()] or [sc_schema_catalogue()]. -#' @param tree wether to use the [`data.tree`](https://rdrr.io/cran/data.tree/man/data.tree.html) package for printing. +#' @param tree whether to use the [`data.tree`](https://rdrr.io/cran/data.tree/man/data.tree.html) package for printing. #' @param limit,... passed to [data.tree::print.Node()] if `tree` is set #' to `TRUE`. Ignored otherwise. #' @section Printing with data.tree: @@ -74,29 +74,44 @@ print.sc_schema <- function(x, tree = NULL, ..., limit = 30) { classes <- sapply(x, class) if (tree && any(classes == "sc_schema")) return(print_schema_with_tree(x, limit = limit, ...)) - cat(x$type, ": ", x$label, "\n", sep = "") - sc_schema_print_children(x, message_empty = switch( + style <- cli::make_ansi_style(sc_schema_colors()[[x$type]]) + cat(style(x$type), ": ", cli::style_bold(x$label), "\n", sep = "") + short_id <- strsplit(x$id, ":")[[1]][3] + message_empty <- switch( x$type, - DATABASE = "# Get more info with {.run STATcubeR::sc_schema_db('{x$id}')}", - TABLE = "Get the data with {.run STATcubeR::sc_table_saved('{x$id}')}", + DATABASE = c("# Get more metdata with {.run [sc_schema_db('{short_id}')]", + "(STATcubeR::sc_schema_db('{x$id}'))}"), + TABLE = c("# Get the data with {.run [sc_table_saved('{short_id}')]", + "(STATcubeR::sc_table_saved('{x$id}'))}"), NULL - )) + ) + sc_schema_print_children(x, message_empty = message_empty, ...) + invisible(x) } -sc_schema_print_children <- function(x, message_empty = NULL) { +sc_schema_print_children <- function(x, message_empty = NULL, ...) { classes <- sapply(x, class) - child_schemas <- names(x)[classes == "sc_schema"] + ind <- which(classes == "sc_schema") + child_schemas <- names(x)[ind] if (length(child_schemas) > 0) { - data.frame( - child = child_schemas, - type = sapply(x[child_schemas], function(x) x$type), - n_childs = sapply(x[child_schemas], function(x) { - sum(sapply(x, class) == "sc_schema") - }), - stringsAsFactors = FALSE - ) %>% `class<-`(c("tbl", "data.frame")) %>% `row.names<-`(NULL) %>% print() - } else if (!is.null(message_empty)) + children <- vctrs::new_data_frame(list( + child = new_schema_uri( + label = child_schemas, + uri = sapply(ind, function(i) x[[i]]$id) + ), + type = sc_schema_type(sapply(ind, function(i) x[[i]]$type)), + n = sapply(ind, function(i) { + sum(sapply(x[[i]], class) == "sc_schema") + }) + ), class = c("tbl_df", "tbl")) + if (all(children$n == 0)) + children$n <- NULL + formatted <- format(children, ...) + cat(formatted[seq(4, length(formatted))], sep = "\n") + } else if (!is.null(message_empty)) { + short_id <- strsplit(x$id, ":")[[1]][3] cat(cli::format_inline(message_empty), "\n") + } } sc_as_nested_list <- function(x) { @@ -113,33 +128,45 @@ sc_as_nested_list <- function(x) { #' @export sc_schema_flatten <- function(x, type) { stopifnot(inherits(x, "sc_schema")) + type <- match.arg(toupper(type), names(sc_schema_colors())) response <- attr(x, "response") stopifnot(!is.null(response)) response <- httr::content(response) flattened <- sc_schema_flatten_impl(response, type) - flattened <- as.data.frame(flattened, stringsAsFactors = FALSE) - class(flattened) <- c("tbl", "data.frame") + flattened <- vctrs::new_data_frame(flattened, + class = c("sc_schema_flatten", "tbl", "tbl_df")) flattened } +#' @export +print.sc_schema_flatten <- function(x, ...) { + y <- x + y$id <- new_schema_uri(x$id, x$id) + class(y) <- setdiff(class(x), "sc_schema_flatten") + print(y, ...) + invisible(x) +} + sc_schema_flatten_impl <- function(resp, type) { - if (resp$type == type) - return(list(id = resp$id, label = resp$label)) - if (is.null(resp$children)) - return(NULL) - ret <- lapply(resp$children, sc_schema_flatten_impl, type) - list( - id = lapply(ret, function(x) { x$id }) %>% unlist(), - label = lapply(ret, function(x) { x$label }) %>% unlist() - ) + id <- character() + label <- character() + if (!is.null(resp$children)) { + ret <- lapply(resp$children, sc_schema_flatten_impl, type) + id <- lapply(ret, function(x) x$id) %>% unlist() + label <- lapply(ret, function(x) x$label) %>% unlist() + } + if (resp$type == type) { + id <- c(resp$id, id) + label <- c(resp$label, label) + } + list(id = id, label = label) } #' @describeIn sc_schema is similar to the -#' [catalogue explorer](`r sc_browse_catalogue()`) of the STATcube GUI and reurns +#' [catalogue explorer](`r sc_browse_catalogue()`) of the STATcube GUI and returns #' a tree-type object containing all databases and tables. #' @export -sc_schema_catalogue <- function(depth = "folder", language = c("en", "de"), - key = NULL, server = "ext") { - sc_schema(depth = depth, language = language, key = key, server = server) +sc_schema_catalogue <- function(depth = "folder", ...) { + sc_schema(id = NULL, depth = depth, ...) } diff --git a/R/schema_db.R b/R/schema_db.R index 0eeb0a2a..223b5988 100644 --- a/R/schema_db.R +++ b/R/schema_db.R @@ -30,7 +30,7 @@ #' print(tree = TRUE) #' @describeIn sc_schema is similar to the #' [table view](`r sc_browse_database('deake005', open = TRUE)`) -#' of ths STATcube GUI and gives information about all measures and +#' of the STATcube GUI and gives information about all measures and #' classification fields for a specific database #' @export sc_schema_db <- function(id, depth = "valueset", language = c("en", "de"), diff --git a/R/schema_uri.R b/R/schema_uri.R new file mode 100644 index 00000000..ccd8741e --- /dev/null +++ b/R/schema_uri.R @@ -0,0 +1,95 @@ +new_schema_uri <- function(label, uri) { + vctrs::vec_assert(label, character()) + vctrs::vec_assert(uri, character()) + vctrs::new_rcrd(list(label = label, uri = uri), class = "sc_schema_uri") +} + +#' @export +format.sc_schema_uri <- function(x, ...) { + format(vctrs::field(x, "label"), ...) +} + +sc_schema_run <- function(uri) { + run <- paste0("STATcubeR::sc_schema(\"", uri, "\")") + is_table <- grep("^str:table", uri) + run[is_table] <- paste0("STATcubeR::sc_table_saved(\"", uri[is_table], "\")") + run +} + +sc_schema_url <- function(uri) { + url <- rep(NA_character_, length(uri)) + is_database <- grep("^str:database", uri) + if (length(is_database) > 0) + url[is_database] <- uri[is_database] %>% + sub("^str:database:", "", .) %>% + sc_browse_database(server = "ext") %>% + as.character() + is_table <- grepl("^str:table", uri) & + !grepl("^([0-9a-f-])+$", sub("str:table:", "", uri)) + if (length(is_table) > 0) + url[is_table] <- uri[is_table] %>% + sub("^str:table:", "", .) %>% + sc_browse_table(server = "ext") %>% + as.character() + url +} + +#' @importFrom pillar pillar_shaft +#' @export +pillar_shaft.sc_schema_uri <- function(x, ...) { + label <- vctrs::field(x, "label") + formatted <- label + short_formatted <- substr(formatted, 1, 40) + uri <- vctrs::field(x, "uri") + if (cli::ansi_hyperlink_types()$run) { + run <- sc_schema_run(uri) + template <- cli::format_inline("{.run [%s](%s)}") %>% cli::style_underline() + formatted <- sprintf(template, run, formatted) + short_formatted <- sprintf(template, run, short_formatted) + } else if (cli::ansi_has_hyperlink_support()) { + url <- sc_schema_url(uri) + formatted[!is.na(url)] <- cli::style_hyperlink(formatted[!is.na(url)], + url[!is.na(url)]) + short_formatted[!is.na(url)] <- cli::style_hyperlink( + short_formatted[!is.na(url)], url[!is.na(url)]) + } + pillar::new_pillar_shaft_simple( + formatted, + width = max(nchar(label)), + min_width = 40, + type_sum = "chr", + short_formatted = short_formatted + ) +} + +#' @export +as.character.sc_schema_uri <- function(x, ...) { + format(x) +} + +sc_schema_colors <- function() { + if (!is.null(getOption("STATcubeR.schema_colors"))) + return(getOption("STATcubeR.schema_colors")) + list( + "FOLDER" = "#8470FF", "DATABASE" = "cadetblue", "TABLE" = "peru", + "GROUP" = "#8470FF", "FIELD" = "cyan", "VALUESET" = "cadetblue", + "VALUE" = "#8470FF", "MEASURE" = "yellow", "STAT_FUNCTION" = "cadetblue", + "COUNT" = "cadetblue" + ) +} + +sc_schema_type <- function(type) { + stopifnot(is.character(type), all(type %in% names(sc_schema_colors()))) + vctrs::new_vctr(type, class = "sc_schema_type", inherit_base_type = TRUE) +} + +#' @export +pillar_shaft.sc_schema_type <- function(x, ...) { + type <- vctrs::vec_data(x) + stl <- sc_schema_colors() + formatted <- sapply(type, function(y) { + style <- cli::make_ansi_style(stl[[y]]) + style(y) + }) + pillar::new_pillar_shaft_simple(formatted, type_sum = "chr") +} diff --git a/R/sdmx_table.R b/R/sdmx_table.R new file mode 100644 index 00000000..bd476a99 --- /dev/null +++ b/R/sdmx_table.R @@ -0,0 +1,218 @@ +#' Import data from SDMX +#' +#' Function that reads STATcube data from an sdmx archive - a zip file +#' consisting of `structure.xml` with metadata and `dataset.xml` for the +#' values. +#' +#' @param file a "sdmx archive" file that was downloaded from STATcube. +#' @return An object of class `sc_data` +#' @keywords experimental +#' @examples +#' x <- "sdmx/dedemo.zip" %>% system.file(package = "STATcubeR") %>% sdmx_table() +#' # print and tabulate +#' x +#' x$tabulate() +#' # explore hierarchies +#' nuts2 <- x$field("C-B00-0") +#' data.frame(label = nuts2$label, +#' parent = nuts2$label[match(nuts2$parent, nuts2$code)]) +#' # extract more data from the raw xml +#' x$xml$meta %>% xml2::xml_find_first(".//Name") +#' @export +sdmx_table <- function(file) { + sdmx_table_class$new(file) +} + +sdmx_read <- function(folder = ".") { + list( + meta = folder %>% sprintf('%s/structure.xml', .) %>% xml2::read_xml() %>% + xml2::xml_ns_strip(), + data = folder %>% sprintf('%s/dataset.xml', .) %>% xml2::read_xml() %>% + xml2::xml_ns_strip() + ) +} + +sdmx_read_zip <- function(zip_file) { + exdir <- tempfile() + dir.create(exdir) + on.exit(unlink(exdir, recursive = TRUE)) + unzip(zipfile = zip_file, exdir = exdir) + sdmx_read(exdir) +} + +sdmx_as_raw_df <- function(x) { + obs <- x$data %>% xml2::xml_find_all(".//ObsValue") %>% + xml2::xml_attr("value") %>% as.numeric() + val <- x$data %>% xml2::xml_find_all(".//SeriesKey//Value") %>% + xml2::xml_attr("value") + # assume that entries of SeriesKey always use the same order + val_lab <- x$data %>% xml2::xml_find_first(".//SeriesKey") %>% + xml2::xml_find_all(".//Value") %>% xml2::xml_attr("concept") + val_split <- split(val, rep( + seq_len(length(val)/length(obs)), length(obs) + )) + names(val_split) <- val_lab + n <- which(val_lab == "MEASURES_DIMENSION") + obs_split <- split(obs, val_split[[n]]) + ind <- val_split[[n]] == val_split[[n]][1] + res <- c( + lapply(val_split[-n], function(x) { + u <- x[ind] + factor(u, unique(u), sdmx_codes(unique(u))) + }), + obs_split + ) %>% vctrs::new_data_frame() + names(res) <- gsub("F-DATA_", "", names(res)) %>% sdmx_unescape_codes() + res +} + +sdmx_fields <- function(x) { + fields <- x$meta %>% + xml2::xml_find_all(".//CodeList[(@id and starts-with(@id, 'C-'))]") + lapply(fields, function(field) { + names <- xml2::xml_find_all(field, ".//Name") %>% + xml2::xml_text() + codes <- xml2::xml_find_all(field, "Code") + values <- xml2::xml_attr(codes, "value") + desc <- codes %>% xml2::xml_find_all("Description") %>% + xml2::xml_text() + ann_list <- codes %>% + xml2::xml_find_all(".//common:AnnotationText", flatten = FALSE) %>% + lapply(xml2::xml_text) + has_ann <- vapply(ann_list, length, 0) > 0 + ann_de <- ann_en <- rep(NA_character_, length(ann_list)) + ann_de[has_ann] <- vapply(ann_list[has_ann], function(x) {x[1]}, "") + ann_en[has_ann] <- vapply(ann_list[has_ann], function(x) {x[2]}, "") + ind_de <- seq(1, length(desc), 2) + ind_en <- seq(2, length(desc), 2) + parent <- xml2::xml_attr(codes, "parentCode") + list( + id = field %>% xml2::xml_attr("id"), + label_en = names[2], + label_de = names[1], + elements = vctrs::new_data_frame(list( + label = desc[ind_en], + code = sdmx_codes(values), + parsed = desc[ind_en], + label_de = desc[ind_de], + label_en = desc[ind_en], + parent = factor(parent, levels = values, labels = sdmx_codes(values)), + de_desc = ann_de, + en_desc = ann_en + )) + ) + }) +} + +sdmx_meta <- function(x) { + code_measure <- x$meta %>% xml2::xml_find_all( + ".//*[(@id = 'CL_MEASURES_DIMENSION')]/Code") %>% xml2::xml_attr("value") + label_measure <- x$meta %>% xml2::xml_find_all( + ".//*[(@id = 'CL_MEASURES_DIMENSION')]/Code/Description") %>% + xml2::xml_text() + code_db <- xml2::xml_find_all(x$meta, ".//ConceptScheme") %>% + xml2::xml_attr("id") %>% sdmx_unescape_codes() + label_dataset <- x$meta %>% xml2::xml_find_all(".//ConceptScheme/Name") %>% + xml2::xml_text() + ind_de <- seq(1, length(label_measure), 2) + ind_en <- seq(2, length(label_measure), 2) + prepared <- x$meta %>% xml2::xml_find_all(".//message:Prepared") %>% + xml2::xml_text() %>% as.POSIXct(format = "%FT%T") + list( + source = data_frame(label = label_dataset[2], code = code_db, + lang = "en", label_de = label_dataset[1], + label_en = label_dataset[2], prepared = prepared), + measures = data_frame( + label = label_measure[ind_en], + code = sdmx_unescape_codes(code_measure), + label_de = label_measure[ind_de], + label_en = label_measure[ind_en], + NAs = rep(0, length(code_measure)) + ) + ) +} + +#' @export +format.sdmx_table <- function(x, ...) { + c( + cli::style_bold(strwrap(x$meta$source$label)), + "", + cli_dl2(list( + Database = cli::style_hyperlink( + x$meta$source$code, sprintf( + "https://statcube.at/statistik.at/ext/statcube/openinfopage?id=%s", + x$meta$source$code) + ), + Measures = x$meta$measures$label, + Fields = x$meta$fields$label + )), + "", + cli_dl2(list( + Downloaded = cli_class(x$meta$source$prepared, "timestamp"), + STATcubeR = cli_class(x$meta$source$scr_version, "version") + )) + ) +} + +sdmx_codes <- function(codes) { + simplified <- gsub("^.*_", "", codes) + if (anyDuplicated(simplified)) + codes + else + simplified +} + +sdmx_esc <- function(codes, char) { + int <- utf8ToInt(char) + gsub(sprintf("@%x@", int), char, codes, fixed = TRUE) +} + +sdmx_unescape_codes <- function(codes) { + codes %>% sdmx_esc("\u5f") %>% sdmx_esc("\u7c") %>% sdmx_esc("\u2b") %>% + sdmx_esc("\u2e") %>% sdmx_esc("\u23") %>% sdmx_esc("\u40") +} + +sdmx_table_class <- R6::R6Class( + classname = "sdmx_table", class = TRUE, + inherit = sc_data, + list( + initialize = function(file) { + x <- sdmx_read_zip(file) + df <- sdmx_as_raw_df(x) + fields <- sdmx_fields(x) + meta <- sdmx_meta(x) + meta$fields <- data_frame( + code = vapply(fields, function(x) x$id, ""), + label = vapply(fields, function(x) x$label_en, "") , + label_de = vapply(fields, function(x) x$label_de, ""), + label_en = vapply(fields, function(x) x$label_en, ""), + total_code = rep(NA, length(fields)), + nitems = sapply(fields, function(x) {nrow(x$elements)}), + type = rep("Category", length(fields)) + ) + fields2 <- lapply(fields, function(x) x$elements) + names(df) <- c(meta$fields$code, meta$measures$code) + super$initialize(df, meta, fields2) + self$language <- "en" + private$p_xml <- x + } + ), + list(p_xml = NULL), + list( + xml = function() { private$p_xml }, + description = function() { + self$xml$meta %>% xml2::xml_find_first( + sprintf(".//ConceptScheme/Description[(@xml:lang='%s')]", self$language) + ) %>% xml2::xml_text() %>% `class<-`("sdmx_description") + } + ) +) + +#' @export +print.sdmx_description <- function(x, ...) { + desc <- strsplit(x, "\r\n")[[1]] + desc <- ifelse(grepl("^.)", desc), + cli::style_bold(substring(desc, 4)), + desc) + cat(desc, sep = "\n") +} diff --git a/R/table.R b/R/table.R index 809977af..f1e23f8f 100644 --- a/R/table.R +++ b/R/table.R @@ -26,7 +26,7 @@ sc_table_class <- R6::R6Class( cloneable = FALSE, inherit = sc_data, public = list( - #' @description Ususally, objects of class `sc_table` are generated with + #' @description Usually, objects of class `sc_table` are generated with #' one of the factory methods [sc_table()], [sc_table_saved()] or #' [sc_table_custom()]. If this constructor is invoked directly, #' either omit the parameters `json` and `file` or make sure that they @@ -36,7 +36,7 @@ sc_table_class <- R6::R6Class( #' @param json the json file used in the request as a string. #' @param file the file path to the json file #' @param add_totals was the json request modified by adding totals via - #' the add_toals parameter in one of the factory functions (`sc_table()`, + #' the add_totals parameter in one of the factory functions (`sc_table()`, #' `sc_table_custom()`). Necessary, in order to also request totals via #' the `$add_language()` method. initialize = function(response, json = NULL, file = NULL, add_totals = FALSE) { @@ -88,12 +88,14 @@ sc_table_class <- R6::R6Class( #' @description An extension of [sc_tabulate()] with additional #' parameters. #' @param ... Parameters which are passed down to [sc_tabulate()] - #' @param round apply rounding to each measure accoring to the precision + #' @param round apply rounding to each measure according to the precision #' provided by the API. #' @param annotations Include separate annotation columns in the returned #' table. This parameter is currently broken and needs to be re-implemented - tabulate = function(..., round = TRUE, annotations = FALSE) { - sc_table_tabulate(self, ..., round = round, annotations = annotations) + #' @param recode_zeros interpret zero values as missings? + tabulate = function(..., round = FALSE, annotations = FALSE, recode_zeros = FALSE) { + sc_table_tabulate(self, ..., round = round, annotations = annotations, + recode_zeros = recode_zeros) }, #' @description open the dataset in a browser browse = function() { @@ -129,22 +131,21 @@ sc_table_class <- R6::R6Class( #' the raw response content raw = function() httr::content(self$response), #' @field annotation_legend - #' list of all annotations occuring in the data as a `data.frame` with + #' list of all annotations occurring in the data as a `data.frame` with #' two columns for the annotation keys and annotation labels. annotation_legend = function() { am <- self$raw$annotationMap - data.frame(annotation = names(am), label = unlist(am), row.names = NULL) + data_frame(annotation = names(am), label = unlist(am)) }, #' @field rate_limit #' how much requests were left after the POST request for this table was sent? #' Uses the same format as [sc_rate_limit_table()]. rate_limit = function() { headers <- self$response$headers - res <- data.frame( + res <- list( remaining = headers$`x-ratelimit-remaining-table`, limit = headers$`x-ratelimit-table`, - reset = headers$`x-ratelimit-reset-table`, - stringsAsFactors = FALSE + reset = headers$`x-ratelimit-reset-table` ) class(res) <- "sc_rate_limit_table" res @@ -168,8 +169,9 @@ sc_table_class <- R6::R6Class( #' * [sc_table_saved()] uses a table uri of a saved table. #' #' Those three functions all return an object of class `"sc_table"`. -#' @param json_file path to a json file, which was downloaded via the STATcube -#' GUI ("Open Data API Abfrage") +#' @param json Path to a json file, which was downloaded via the STATcube +#' GUI ("Open Data API Request"). Alternatively, a json string which +#' passes [jsonlite::validate()]. #' @param add_totals Should totals be added for each classification field in #' the json request? #' @return An object of class `sc_table` which contains the return @@ -178,12 +180,13 @@ sc_table_class <- R6::R6Class( #' [sc_table_class] for the class documentation. #' @inheritParams sc_key #' @param language The language to be used for labeling. `"en"` (the default) -#' will use english. `"de"` uses german. +#' will use english. `"de"` uses German. #' The third option `"both"` will import both languages by sending two requests #' to the `/table` endpoint. +#' @param json_file Deprecated. Use `json` instead #' @family functions for /table #' @examplesIf sc_key_exists() -#' my_table <- sc_table(json_file = sc_example("population_timeseries.json")) +#' my_table <- sc_table(json = sc_example("population_timeseries.json")) #' #' # print #' my_table @@ -206,14 +209,15 @@ sc_table_class <- R6::R6Class( #' my_response <- sc_table_saved(table_uri) #' as.data.frame(my_response) #' @export -sc_table <- function(json_file, language = NULL, add_totals = TRUE, - key = NULL) { +sc_table <- function(json, language = NULL, add_totals = TRUE, key = NULL, + json_file = NA) { + json <- normalize_json(json, json_file) language <- sc_language(language, c("en", "de", "both")) both <- language == "both" if (both) language <- "de" - res <- sc_table_json_post(readLines(json_file, warn = FALSE), language, add_totals, key) %>% - sc_table_class$new(file = json_file, add_totals = add_totals) + res <- sc_table_json_post(json$string, language, add_totals, key) %>% + sc_table_class$new(json$string, json$file, add_totals) if (both) res$add_language("en", key) res @@ -239,6 +243,19 @@ print.sc_table <- function(x, ...) { cat(format(x, ...), sep = "\n") } +normalize_json <- function(json, json_file) { + if (!is.na(json_file)) { + json <- json_file + warning("parameter `json_file` was renamed to `json`") + } + file <- NULL + if (length(json) == 1 && !jsonlite::validate(json)) { + file <- json + json <- readLines(file, warn = FALSE) + } + list(file = file, string = json) +} + format.sc_table <- function(x, ...) { c( cli::style_bold(strwrap(x$meta$source$label)), diff --git a/R/table_as_data_frame.R b/R/table_as_data_frame.R index dde9d200..31a9414d 100644 --- a/R/table_as_data_frame.R +++ b/R/table_as_data_frame.R @@ -1,4 +1,6 @@ unlist_n <- function(x, times) { + if (times <= 0) + return(x) x <- unlist(x, recursive = FALSE) if (times == 1) return(x) @@ -28,7 +30,7 @@ sc_model_matrix <- function(dims) { rep(times = times, each = each) times <- times * dims[i] } - as.data.frame(out) + vctrs::new_data_frame(out) } sc_table_create_data <- function(content) { diff --git a/R/table_custom.R b/R/table_custom.R index cb415811..e5ae249b 100644 --- a/R/table_custom.R +++ b/R/table_custom.R @@ -6,14 +6,43 @@ #' See the [Custom tables article](https://statistikat.github.io/STATcubeR/articles/sc_table_custom.html) #' for more details. #' -#' @param db The uid of a database -#' @param measures A character vector of uids for measures. Can be either of -#' type `MEASURE` or of type `STAT_FUNCTION` +#' @param db The uid of a database. Must be of type `DATASET` +#' @param measures A character vector of uids for measures. Each entry must be +#' of type `MEASURE`, `STAT_FUNCTION` or `COUNT`. #' @param dimensions A character vector of dimensions for the cube. Can be #' either of type `FIELD` or type `VALUESET`. Those entries are referred to #' as `fields` in the parsed API response -#' @keywords internal -#' @examples +#' @param add_totals Should totals be added for each classification field in +#' the json request? Ignored if `recodes` is used. +#' @param recodes One or more recodes that were generated via [sc_recode()]. +#' If more than one recode is supplied, recodes should be concatenated with +#' [c()]. +#' @param language The language to be used for labeling. "en" +#' (the default) will use English. "de" uses German. +#' @param dry_run If `TRUE`, no request is sent to the API. Instead, type +#' checks are performed and the json request is returned as a string. +#' Defaults to `FALSE`. +#' @inheritParams sc_table +#' @section Schema objects in parameters: +#' it is possible to pass `sc_schema` objects (usually generated by +#' [sc_schema_db()]) instead of ids in [sc_table_custom()] and [sc_recode()]. +#' If provided, the schema objects will be converted into ids via `$id`. +#' @section Error handling: +#' Unfortunately, the API gives fairly vague error messages in case a +#' custom table request is ill defined. For this reason, [sc_table_custom()] +#' applies some simple heuristics and throws warnings if inconsistencies +#' in the provided parameters are recognized. The following conditions are +#' currently checked +#' * the parameter `db` is of type `DATABASE` +#' * all entries in `measures` are of type `MEASURE`, `COUNT` or +#' `STATFN` +#' * all entries in `dimensions` are of type `VALUESET` or `FIELD` +#' * all entries in `field` are of type `VALUESET` or `FIELD` +#' * all entries in `map` are of type `VALUE` +#' * all fields in `recodes` are also present in `dimensions` +#' * the first two arguments of `sc_recode()` are consistent, i.e. +#' if the provided `VALUE`s belong to the `VALUESET/FIELD` +#' @examplesIf sc_key_exists() #' sc_table_custom("str:database:detouextregsai") #' #' sc_table_custom( @@ -32,12 +61,104 @@ #' "str:valueset:detouextregsai:F-DATA1:C-C93-2:C-C93SUM-0" #' ) #' ) +#' +#' schema <- sc_schema_db("detouextregsai") +#' region <- schema$`Other Classifications`$`Tourism commune [ABO]`$ +#' `Regionale Gliederung (Ebene +1)` +#' month <- schema$`Mandatory fields`$`Season/Tourism Month` +#' +#' x <- sc_table_custom( +#' schema, +#' schema$Facts$Arrivals, +#' list(month, region), +#' recodes = c( +#' sc_recode(region, total = FALSE, map = list( +#' region$Achensee, +#' list(region$Arlberg, region$`Ausseerland-Salzkammergut`) +#' )), +#' sc_recode(month, total = FALSE) +#' ) +#' ) +#' x$tabulate() #' @export -sc_table_custom <- function(db, measures = c(), dimensions = c(), language = c("en", "de"), - add_totals = TRUE, key = NULL) { +sc_table_custom <- function(db, measures = c(), dimensions = c(), + language = c("en", "de"), + add_totals = TRUE, key = NULL, recodes = NULL, + dry_run = FALSE) { + db <- as_id(db) + measures <- as_id(measures, TRUE) + dimensions <- as_id(dimensions, TRUE) json_list <- list(database = db, measures = as.list(measures), - dimensions = lapply(dimensions, list)) + dimensions = lapply(dimensions, I)) + if (!is.null(recodes)) { + json_list$recodes <- recodes + add_totals <- FALSE + } json <- jsonlite::toJSON(json_list, auto_unbox = TRUE, pretty = TRUE) + if (!all(names(recodes) %in% dimensions)) + warning("`recodes` and `dimensions` might be inconsistent") + if (!all(grepl("^str:valueset", dimensions) | grepl("^str:field", dimensions))) + warning("parameter `dimensions` is not of type `FIELD` or `VALUESET`") + if (!all(grepl("^str:measure", measures) | grepl("^str:statfn", measures) | + grepl("^str:count", measures))) + warning("parameter `measures` is not of type `MEASURE`, `STATFN` or `COUNT`") + if (!grepl("^str:database", db)) + warning("parameter `db` is not of type `DATABASE`") + if (dry_run) + return(json) response <- sc_table_json_post(json, language, add_totals, key) sc_table_class$new(response, toString(json)) } + +#' @describeIn sc_table_custom creates a recode object which can be used +#' for the `recode` parameter of [sc_table_custom()] +#' @param field An uid of a classification field to be recoded. The provided +#' uid should also be passed in the `dimensions` parameter of +#' [sc_table_custom()]. +#' @param map A list of ids for values (type `VALUE`) This can also be a nested +#' list if items should be grouped. See examples +#' @param total Add totals to the field? If `map` is provided, the totals +#' will correspond to the filtered data. +#' @export +sc_recode <- function(field, map = NULL, total = FALSE) { + if (is.null(map)) + return(stats::setNames(list(list(total = total)), as_id(field))) + if (inherits(map, "sc_schema")) + map <- list(map) + else + map <- stats::setNames(map, NULL) + recode <- stats::setNames( + list(list( + map = lapply(map, function(value) { + I(as_id(value, TRUE)) + }), + total = total + )), + as_id(field) + ) + code_parent <- gsub("^.*:", "", names(recode)) + codes_children <- unlist(recode[[1]]$map) + if (!all(grepl(code_parent, codes_children))) + warning("parameters `field` and `map` might be inconsistent") + if (!all(grepl("^str:value:", codes_children))) + warning("some entries in `map` are not of type VALUE") + if (!grepl("^str:valueset", names(recode)) && !grepl("^str:field", names(recode))) + warning("parameter `field` is not of type `FIELD` or `VALUESET`") + recode +} + +as_id <- function(x, multiple = FALSE) { + if (length(x) == 0) + return(c()) + if (inherits(x, "sc_schema")) + return(x$id) + if (is.character(x) && length(x) == 1) + return(x) + if (!multiple) + stop("invalid id") + if (is.character(x)) + return(x) + if (is.list(x)) + return(sapply(x, as_id)) + stop("invalid ids") +} diff --git a/R/table_field.R b/R/table_field.R index 28f2a528..f0a72faa 100644 --- a/R/table_field.R +++ b/R/table_field.R @@ -22,19 +22,22 @@ sc_field_type <- function(field) { codes_numeric <- all(!is.na(suppressWarnings(as.numeric(varcodes)))) if (!codes_numeric) return("Category") - if (!all(diff(nchar(varcodes)) == 0) || !(nchar(varcodes)[1] %in% 4:6)) + if (!all(diff(nchar(varcodes)) == 0) || !(nchar(varcodes)[1] %in% c(4:6, 8))) return("Category") year <- as.numeric(substr(varcodes, 1, 4)) if (!all(year %in% 1900:2150)) return("Category") time_type <- switch( as.character(nchar(varcodes[1])), - `4` = "year", `5` = "quarter", `6` = "month" + `4` = "year", `5` = "quarter", `6` = "month", `8` = "date" ) if ((time_type == "quarter") && all(substr(varcodes, 5, 5) %in% 5:6)) time_type <- "half-year" if ((time_type == "month") && any(as.numeric(substr(varcodes, 5, 6)) > 12)) time_type <- "week" + if (time_type == "date" && (any(as.numeric(substr(varcodes, 5, 6)) > 12) || + any(as.numeric(substr(varcodes, 7, 8)) > 31))) + return("Category") paste0("Time (", time_type, ")") } @@ -69,6 +72,12 @@ sc_field_parse_week <- function(year, week) { first_day + 7 * (as.numeric(week) - 1) } +sc_field_parse_iso_date <- function(year, remainder) { + month <- substr(remainder, 1, 2) + day <- substr(remainder, 3, 4) + as.Date(paste(year, month, day, sep = "-")) +} + sc_field_parse_time <- function(field) { if (is.character(field)) varcodes <- sapply(field, function(x) utils::tail(strsplit(x, "-")[[1]], 1)) @@ -77,12 +86,16 @@ sc_field_parse_time <- function(field) { varcodes[varcodes == "SC_TOTAL"] <- NA year <- substr(varcodes, 1, 4) remainder <- substr(varcodes, 5, 8) + ind <- is.na(varcodes) + nc <- nchar(remainder[!ind][1]) + stopifnot(all(nchar(remainder[!ind]) == nc)) + if (nc == 4) + return(sc_field_parse_iso_date(year, remainder)) if (any(as.numeric(remainder) > 12, na.rm = TRUE)) return(sc_field_parse_week(year, remainder)) month <- sc_field_parse_time_month(remainder) parsed <- as.Date(rep(NA, length(varcodes))) - ind <- is.na(varcodes) parsed[!ind] <- sc_as_time(year, month, ind) parsed } diff --git a/R/table_meta.R b/R/table_meta.R index 8235578d..d9f17224 100644 --- a/R/table_meta.R +++ b/R/table_meta.R @@ -36,42 +36,38 @@ summarize_annotations <- function(content, i) { sc_meta <- function(content) { measure_info <- lapply(seq_along(content$measures), function(i) { measure <- content$measures[[i]] - data.frame( + data_frame( label = measure$label, code = get_var_code(measure$measure), fun = measure$`function`, precision = content$cubes[[i]]$precision, annotations = summarize_annotations(content, i), - NAs = sum(unlist(content$cubes[[i]]$values) == 0), - stringsAsFactors = FALSE + NAs = sum(unlist(content$cubes[[i]]$values) == 0) ) }) %>% do.call(rbind, .) field_info <- lapply(content$fields, function(field) { has_total <- field$items[[length(field$items)]]$type == "Total" - data.frame( + data_frame( label = field$label, code = get_var_code(field$uri), nitems = length(field$items), type = sc_field_type(field), - total_code = ifelse(has_total, "SC_TOTAL", NA_character_), - stringsAsFactors = FALSE + total_code = ifelse(has_total, "SC_TOTAL", NA_character_) ) }) %>% do.call(rbind, .) - db_info <- data.frame( + db_info <- data_frame( label = content$database$label, - code = content$database$id, - stringsAsFactors = FALSE + code = content$database$id ) list(source = db_info, measures = measure_info, fields = field_info) } sc_meta_field <- function(field) { res <- lapply(field$items, function(item) { - data.frame( + data_frame( label = paste(item$labels, collapse = ";"), - code = get_item_code(item), - stringsAsFactors = FALSE + code = get_item_code(item) ) }) %>% do.call(rbind, .) res$parsed <- sc_field_parse(field) diff --git a/R/table_saved.R b/R/table_saved.R index 522413b0..9475c81f 100644 --- a/R/table_saved.R +++ b/R/table_saved.R @@ -9,10 +9,13 @@ sc_table_saved_list <- function(key = NULL, server = "ext") { tables <- schema %>% sapply(function(x) x$type == "TABLE") saved_tables <- schema[tables] - data.frame( + vctrs::new_data_frame(list( label = sapply(saved_tables, function(x) x$label), - id = sapply(saved_tables, function(x) x$id) - ) + id = new_schema_uri( + vapply(saved_tables, function(x) x$id, ""), + vapply(saved_tables, function(x) x$id, "") + ) + ), class = c("tbl", "tbl_df")) } #' @param table_uri Identifier of a saved table as returned by @@ -21,6 +24,7 @@ sc_table_saved_list <- function(key = NULL, server = "ext") { #' @export sc_table_saved <- function(table_uri, language = NULL, key = NULL, server = "ext") { language <- sc_language(language) + table_uri <- as.character(table_uri) if (substr(table_uri, 1, 3) != "str") table_uri <- paste0("str:table:", table_uri) sc_with_cache(c("sc_table_saved", table_uri, language, key), function() { diff --git a/R/tabulate.R b/R/tabulate.R index 1a8abbdb..5a2858d8 100644 --- a/R/tabulate.R +++ b/R/tabulate.R @@ -21,14 +21,14 @@ #' @param .list allows to define the arguments for `...` as a character vector. #' @param raw If FALSE (the default), apply labeling to the dataset. #' Otherwise, return codes. -#' @param language The language to be used for labelling. By default, the +#' @param language The language to be used for labeling. By default, the #' dataset language (`table$language`) is used. #' @param sort If `TRUE`, the resulting data will be sorted by all provided #' field values #' @details #' Aggregation is done as follows #' -#' * First, all columns that priovide a total code via `table$total_codes()` +#' * First, all columns that provide a total code via `table$total_codes()` #' will be used to filter for `column == total_code` or `column != total_code` #' * Then, the remaining data is aggregated using [rowsum()] #' @@ -37,7 +37,7 @@ #' #' For objects of class `sc_table` two additional operations are performed. #' * zeros are recoded to `NA`s -#' * rounding is done according to the precision of each measure. Ronding +#' * rounding is done according to the precision of each measure. Rounding #' happens after the recoding to `NA` values #' @seealso sc_table_class #' @examples diff --git a/R/utils.R b/R/utils.R index 092bf3c7..d0d2a380 100644 --- a/R/utils.R +++ b/R/utils.R @@ -14,15 +14,19 @@ sc_parse_time <- function(timestamp) { (as.numeric(timestamp) / 1000) %>% as.POSIXct(origin = "1970-01-01") } -sc_headers <- function(language = c("en", "de"), key = NULL, server = "ext", ...) { +sc_user_agent <- function(){ + paste0("STATcubeR/", sc_version(FALSE), + " (http://github.com/statistikat/STATcubeR)", + " httr/", utils::packageVersion("httr"), + " R/", R.version$major, ".", R.version$minor) +} + +sc_headers <- function(language = NULL, key = NULL, server = "ext", ...) { if (is.null(key)) key <- sc_key(server) httr::add_headers( - APIKey = key, `Accept-Language` = match.arg(language), ..., - `User-Agent` = paste0("STATcubeR/", sc_version(FALSE), - " (http://github.com/statistikat/STATcubeR)", - " httr/", utils::packageVersion("httr"), - " R/", R.version$major, ".", R.version$minor)) + APIKey = key, `Accept-Language` = sc_language(language), ..., + `User-Agent` = sc_user_agent()) } sc_language <- function(language = NULL, options = c("en", "de")) { @@ -30,3 +34,7 @@ sc_language <- function(language = NULL, options = c("en", "de")) { language <- getOption("STATcubeR.language") match.arg(language, options) } + +data_frame <- function(...) { + vctrs::new_data_frame(list(...), class = "tbl") +} diff --git a/R/zzz.R b/R/zzz.R index 81fbdbc7..80d5e635 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,17 +1,10 @@ .onLoad <- function(...) { - if (requireNamespace("pillar", quietly = TRUE)) { - register_s3 <- function(method, class, fun, pkg = "pillar") - registerS3method(method, class, fun, asNamespace(pkg)) - register_s3("tbl_format_footer", "sc_meta", tbl_format_footer.sc_meta) - register_s3("tbl_sum", "sc_meta", tbl_sum.sc_meta) - register_s3("tbl_sum", "sc_tibble", tbl_sum.sc_tibble) - } - if (in_pkgdown()) cli_theme_pkgdown() } cli_theme_pkgdown <- function() { + options(cli.hyperlink_run = FALSE) options(cli.theme = list( ".field" = list("color" = "#0d0d73"), ".code" = list("color" = "blue"), @@ -27,6 +20,7 @@ cli_theme_pkgdown <- function() { } cli_theme_reset <- function() { + options(cli.hyperlink_run = TRUE) Sys.unsetenv("R_CLI_HYPERLINK_MODE") options(cli.theme = NULL) options(fansi.warn = NULL) diff --git a/README.md b/README.md index e884bd04..b57e4d00 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -# STATcubeR +# STATcubeR [![R-CMD-check](https://github.com/statistikat/STATcubeR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/statistikat/STATcubeR/actions/workflows/R-CMD-check.yaml) @@ -32,14 +32,14 @@ you can also download the package as a tar archive from https://github.com/stati The package can then be installed by providing a path to the downloaded archive file. ```r -install.packages('STATcubeR-0.4.3.tar.gz', repos = NULL) +install.packages('STATcubeR-0.6.0.tar.gz', repos = NULL) ``` ## Open Data To import datasets from https://data.statistik.gv.at, pass the dataset -id to the `od_table()` function. For example, OGD data about the [austrian population in 2020](https://data.statistik.gv.at/web/meta.jsp?dataset=OGD_bevstandjbab2002_BevStand_2020) +id to the `od_table()` function. For example, OGD data about the [Austrian population in 2020](https://data.statistik.gv.at/web/meta.jsp?dataset=OGD_bevstandjbab2002_BevStand_2020) can be accessed as follows. ```r @@ -67,8 +67,8 @@ an overview of the 315 datasets that are compatible with `od_table()`. ## STATcube API In order to use the REST API, it is required to set up an API key. As mentioned in the -[api key article](https://statistikat.github.io/STATcubeR/articles/sc_key.html), -this requres a STATcube subscription. +[API key article](https://statistikat.github.io/STATcubeR/articles/sc_key.html), +this requires a STATcube subscription. There are four main functions that interact with the API diff --git a/config/jenkins.yaml b/config/jenkins.yaml index 89e10158..9ed6156d 100644 --- a/config/jenkins.yaml +++ b/config/jenkins.yaml @@ -15,4 +15,4 @@ pkg_options: features: build_branches: - - statbucket + - tibble_pkg diff --git a/inst/WORDLIST b/inst/WORDLIST new file mode 100644 index 00000000..f8d3d71d --- /dev/null +++ b/inst/WORDLIST @@ -0,0 +1,74 @@ +Bundesländer +CLI +CMD +Catalogue +EOL +GRP +Herkunftsland +Infos +JSON +Lifecycle +MDN +OGD +Opendata +Recode +Recodes +Recoding +Rmd +SES +STATatlas +STATcube +Schemas +Typechecks +URI +URIs +catalogue +cli +cloneable +csv +datasources +de +df +english +fileserver +github +gui +gv +http +https +httr +infopage +infos +json +jsons +microdata +od +opendata +pkgdown +programmatically +quartiles +readme +recode +recoded +recoder +recodes +recoding +recurse +sc +schemas +statistik +tabset +tibbles +timeseries +tooltip +tooltips +uid +uids +unparsed +uri +webscraping +wingarc +yaml +dataset +SDMX +sdmx diff --git a/inst/json_examples/foreign_trade.json b/inst/json_examples/foreign_trade.json index dafd5f99..65a9d692 100644 --- a/inst/json_examples/foreign_trade.json +++ b/inst/json_examples/foreign_trade.json @@ -14,17 +14,17 @@ [ "str:value:denatec06:F-DATA:C-NATEC_CPA-0:C-NATEC_CPA-0:NATEC_CPA-CPA_E" ] ] }, - "str:field:denatec06:F-DATA:C-NACEK2_3-0" : { + "str:field:denatec06:F-DATA:C-NACEK2_4-0" : { "map" : [ - [ "str:value:denatec06:F-DATA:C-NACEK2_3-0:C-NACEK2_2-0:NACE-BTE" ], - [ "str:value:denatec06:F-DATA:C-NACEK2_3-0:C-NACEK2_2-0:NACE-G" ], - [ "str:value:denatec06:F-DATA:C-NACEK2_3-0:C-NACEK2_2-0:NACE-AFHTU" ] + [ "str:value:denatec06:F-DATA:C-NACEK2_4-0:C-NACEK2_4-0:NACE-BTE" ], + [ "str:value:denatec06:F-DATA:C-NACEK2_4-0:C-NACEK2_4-0:NACE-G" ], + [ "str:value:denatec06:F-DATA:C-NACEK2_4-0:C-NACEK2_4-0:NACE-AFHTU" ] ] } }, "dimensions" : [ [ "str:field:denatec06:F-DATA:C-NATEC_CPA-0" ], [ "str:field:denatec06:F-DATA:C-A10-0" ], - [ "str:field:denatec06:F-DATA:C-NACEK2_3-0" ] + [ "str:field:denatec06:F-DATA:C-NACEK2_4-0" ] ] } diff --git a/inst/sdmx/README.md b/inst/sdmx/README.md new file mode 100644 index 00000000..42b271aa --- /dev/null +++ b/inst/sdmx/README.md @@ -0,0 +1,6 @@ +# sdmx demo datasets + +sdmx demo data intended to be used for documentation and automated tests. +Currently, one dataset is available which was generated from +[Communes (Demo)](https://statcube.at/statistik.at/ext/statcube/openinfopage?tableId=defaulttable_dedemo) +database. diff --git a/inst/sdmx/dedemo.zip b/inst/sdmx/dedemo.zip new file mode 100644 index 00000000..0ae4c0b4 Binary files /dev/null and b/inst/sdmx/dedemo.zip differ diff --git a/man/figures/logo2.svg b/man/figures/logo2.svg new file mode 100644 index 00000000..e4522516 --- /dev/null +++ b/man/figures/logo2.svg @@ -0,0 +1,44 @@ + diff --git a/man/od_cache.Rd b/man/od_cache.Rd index 5711c5b6..57f2c72e 100644 --- a/man/od_cache.Rd +++ b/man/od_cache.Rd @@ -19,7 +19,7 @@ Functions to inspect the contents of the current cache. } \details{ \code{\link[=od_cache_summary]{od_cache_summary()}} provides an overview of all contents of the cache through -a data.frame. It hasone row for each dataset and the following columns. +a data.frame. It has one row for each dataset and the following columns. All file sizes are given in bytes \itemize{ \item \strong{\code{id}} the dataset id diff --git a/man/od_catalogue.Rd b/man/od_catalogue.Rd index a74b8b4c..17d1dd93 100644 --- a/man/od_catalogue.Rd +++ b/man/od_catalogue.Rd @@ -39,7 +39,7 @@ Currently, the following columns are provided.\tabular{lll}{ The type \code{datetime} refers to the \code{POSIXct} format as returned by \code{\link[=Sys.time]{Sys.time()}}. -The last column \code{"json"} containes the full json metadata as returned by +The last column \code{"json"} contains the full json metadata as returned by \code{\link[=od_json]{od_json()}}. } \examples{ diff --git a/man/od_list.Rd b/man/od_list.Rd index afd336b1..f037d752 100644 --- a/man/od_list.Rd +++ b/man/od_list.Rd @@ -7,7 +7,7 @@ od_list(unique = TRUE, server = c("ext", "red")) } \arguments{ -\item{unique}{some datasets are pulbished under multiple groups. +\item{unique}{some datasets are published under multiple groups. They will only be listed once with the first group they appear in unless this parameter is set to \code{FALSE}.} diff --git a/man/od_revisions.Rd b/man/od_revisions.Rd index 3a1df1ce..f7ae5957 100644 --- a/man/od_revisions.Rd +++ b/man/od_revisions.Rd @@ -8,7 +8,7 @@ od_revisions(since = NULL, exclude_ext = TRUE, server = "ext") } \arguments{ \item{since}{(optional) A timestamp. If supplied, only datasets updated -later will be returned. Otherwise, all datasets are retured. +later will be returned. Otherwise, all datasets are returned. Can be in either one of the following formats \itemize{ \item a native R time type that is compatible with \code{strftime()} @@ -25,7 +25,7 @@ server (the default) or \code{red} for the editing server. The editing server is only accessible for employees of Statistics Austria} } \value{ -a character verctor with dataset ids +a character vector with dataset ids } \description{ Use the \verb{/revision} endpoint of the OGD server to get a list diff --git a/man/od_table.Rd b/man/od_table.Rd index eb743e33..d122840b 100644 --- a/man/od_table.Rd +++ b/man/od_table.Rd @@ -7,7 +7,7 @@ od_table(id, language = NULL, server = "ext") } \arguments{ -\item{id}{the id of the data-set that should be accessed} +\item{id}{the id of the dataset that should be accessed} \item{language}{language to be used for labeling. \code{"en"} or \code{"de"}} diff --git a/man/od_table_class.Rd b/man/od_table_class.Rd index 9e602e03..e33bbdb8 100644 --- a/man/od_table_class.Rd +++ b/man/od_table_class.Rd @@ -19,7 +19,7 @@ R6 Class open data datasets. Similar contents can be found in \verb{$meta}.} -\item{\code{resources}}{lists all files downloaded from the server to contruct this table} +\item{\code{resources}}{lists all files downloaded from the server to construct this table} \item{\code{od_server}}{The server used for initialization (see to \code{?od_table})} } @@ -54,7 +54,7 @@ initialize objects of class \code{od_table}. \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{id}}{the id of the data-set that should be accessed} +\item{\code{id}}{the id of the dataset that should be accessed} \item{\code{language}}{language to be used for labeling. \code{"en"} or \code{"de"}} diff --git a/man/other_endpoints.Rd b/man/other_endpoints.Rd index 5e9fc11a..e7e5bbc6 100644 --- a/man/other_endpoints.Rd +++ b/man/other_endpoints.Rd @@ -43,8 +43,8 @@ for calls against the \verb{/table} endpoint. \item \code{remaining} how much requests can be sent to the \verb{/table} endpoint until the rate limit is reached. \item \code{limit} the number of requests allowed per hour. -\item \code{reset} a tiestamp when the rate limit will be reset. -Ususally, this should be less than one hour `after the current time. +\item \code{reset} a timestamp when the rate limit will be reset. +Usually, this should be less than one hour `after the current time. } \item \code{sc_rate_limits()}: gets rate limits from response headers diff --git a/man/sc_browse.Rd b/man/sc_browse.Rd index c9d8603d..4d8b74e6 100644 --- a/man/sc_browse.Rd +++ b/man/sc_browse.Rd @@ -40,7 +40,7 @@ A collection of links, to browse important STATcube pages. \itemize{ \item \code{sc_browse()}: opens the home menu of STATcube -\item \code{sc_browse_preferences()}: opens the preference menu with the api key +\item \code{sc_browse_preferences()}: opens the preference menu with the API key \item \code{sc_browse_table()}: shows the info page for a table diff --git a/man/sc_cache.Rd b/man/sc_cache.Rd index 9b566c9a..d879d37f 100644 --- a/man/sc_cache.Rd +++ b/man/sc_cache.Rd @@ -23,10 +23,10 @@ sc_cache_files(x) sc_cache_clear() } \arguments{ -\item{verbose}{print instuctions on how to set up caching persistently +\item{verbose}{print instructions on how to set up caching persistently via environment variables?} -\item{dir}{a chace directory} +\item{dir}{a cache directory} \item{x}{an object of class \code{sc_table} or \code{sc_schema}} } @@ -42,7 +42,7 @@ old cache entries. Caching can be set up using environment variables. To set up a persistent cache for both Open Data and the REST API, the following lines in \code{.Renviron} can be used. -The paths in this example are only applicalble for UNIX-based operating systems. +The paths in this example are only applicable for UNIX-based operating systems. \if{html}{\out{
}}\preformatted{STATCUBE_KEY_EXT = YOUR_API_KEY_GOES_HERE STATCUBE_CACHE = TRUE @@ -62,7 +62,7 @@ endpoints \code{\link[=sc_info]{sc_info()}} and \code{\link[=sc_rate_limit_table \item \code{sc_cache_disable()}: disables caching for the current R session sc_cache_disable() -\item \code{sc_cache_enabled()}: informs wether the cache is currently enabled +\item \code{sc_cache_enabled()}: informs whether the cache is currently enabled \item \code{sc_cache_dir()}: get/set the directory used for caching diff --git a/man/sc_data.Rd b/man/sc_data.Rd index 2d7a3cf9..a83663ba 100644 --- a/man/sc_data.Rd +++ b/man/sc_data.Rd @@ -5,7 +5,7 @@ \title{Common interface for STATcubeR datasets} \description{ This class represents a common interface for datasets returned from the -STATcube REST API and OGD datasets. \code{sc_data} obects are usually created with +STATcube REST API and OGD datasets. \code{sc_data} objects are usually created with \code{\link[=od_table]{od_table()}} or \code{\link[=sc_table]{sc_table()}}. } \examples{ @@ -105,7 +105,7 @@ Do not use directly but initialize objects with \code{\link[=sc_table]{sc_table( \if{latex}{\out{\hypertarget{method-sc_data-field}{}}} \subsection{Method \code{field()}}{ get information about a specific field. The format of -the reurn value is similar to \verb{$meta}. A \code{data.frame} that includes +the return value is similar to \verb{$meta}. A \code{data.frame} that includes codes and labels for each level of the field. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{sc_data$field(i = 1)}\if{html}{\out{
}} @@ -114,7 +114,7 @@ codes and labels for each level of the field. \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{i}}{specifier for the field. Integer or character. If an interger +\item{\code{i}}{specifier for the field. Integer or character. If an integer is provided, it should match the row number in \verb{$meta$fields}. If a character is provided, the field is matched using \code{\link[=pmatch]{pmatch()}} on all available codes and labels.} diff --git a/man/sc_last_error.Rd b/man/sc_last_error.Rd index f518d7a4..de058bf0 100644 --- a/man/sc_last_error.Rd +++ b/man/sc_last_error.Rd @@ -13,9 +13,9 @@ sc_last_error_parsed() The return value from \code{httr::GET()} or \code{httr::POST()}. } \description{ -In case API requests are unsuccessfull, \code{STATcubeR} will throw errors +In case API requests are unsuccessful, \code{STATcubeR} will throw errors to summarize the httr error type and its meaning. -Requests are considered unsuccessfull if one of the following applies +Requests are considered unsuccessful if one of the following applies \itemize{ \item The response returns \code{TRUE} for \code{httr::http_error()}. \item The response is not of type \code{"application/json"} @@ -23,7 +23,7 @@ Requests are considered unsuccessfull if one of the following applies In some cases it is useful to get direct access to a faulty response object. For that purpose, it is possible to use \code{\link[=sc_last_error]{sc_last_error()}} which will provide -the httr response object for the last unsuccessfull request. +the httr response object for the last unsuccessful request. } \section{Functions}{ \itemize{ diff --git a/man/sc_recoder.Rd b/man/sc_recoder.Rd index a886c54d..63d024f7 100644 --- a/man/sc_recoder.Rd +++ b/man/sc_recoder.Rd @@ -5,7 +5,7 @@ \title{Recode sc_table objects} \description{ A collection of methods that can be used to modify an object of class -sc_table by reference. Typical usage is to acces the \code{recode} binding +sc_table by reference. Typical usage is to access the \code{recode} binding of an \code{sc_table} object and then use method chaining to perform recode operations. @@ -142,7 +142,7 @@ Change the labels of a level \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-sc_recoder-total_codes}{}}} \subsection{Method \code{total_codes()}}{ -Cheange the total code for a field +Change the total code for a field \subsection{Usage}{ \if{html}{\out{
}}\preformatted{sc_recoder$total_codes(field, new)}\if{html}{\out{
}} } @@ -163,7 +163,7 @@ new total code. In case of \code{NA}, the total code will be unset.} \if{latex}{\out{\hypertarget{method-sc_recoder-visible}{}}} \subsection{Method \code{visible()}}{ set the visibility of a level. Invisible levels are -ommited in the output of \verb{$tabulate()} but don't affect aggregation +omitted in the output of \verb{$tabulate()} but don't affect aggregation \subsection{Usage}{ \if{html}{\out{
}}\preformatted{sc_recoder$visible(field, level, new)}\if{html}{\out{
}} } diff --git a/man/sc_schema.Rd b/man/sc_schema.Rd index 2cd632ec..b19b1126 100644 --- a/man/sc_schema.Rd +++ b/man/sc_schema.Rd @@ -14,12 +14,7 @@ sc_schema(id = NULL, depth = NULL, language = NULL, key = NULL, server = "ext") sc_schema_flatten(x, type) -sc_schema_catalogue( - depth = "folder", - language = c("en", "de"), - key = NULL, - server = "ext" -) +sc_schema_catalogue(depth = "folder", ...) sc_schema_db(id, depth = "valueset", language = c("en", "de"), key = NULL) } @@ -45,7 +40,7 @@ the production server. External users should always use the default option \code \item{x}{an object of class \code{sc_schema()} i.e. the return value of \code{\link[=sc_schema]{sc_schema()}}, \code{\link[=sc_schema_db]{sc_schema_db()}} or \code{\link[=sc_schema_catalogue]{sc_schema_catalogue()}}.} -\item{tree}{wether to use the \href{https://rdrr.io/cran/data.tree/man/data.tree.html}{\code{data.tree}} package for printing.} +\item{tree}{whether to use the \href{https://rdrr.io/cran/data.tree/man/data.tree.html}{\code{data.tree}} package for printing.} \item{limit, ...}{passed to \code{\link[data.tree:print.Node]{data.tree::print.Node()}} if \code{tree} is set to \code{TRUE}. Ignored otherwise.} @@ -58,9 +53,9 @@ Invoke the \href{https://docs.wingarc.com.au/superstar/9.12/open-data-api/open-d This endpoint can be used to get all available databases and tables as well as metadata about specific databases. -The main function \code{sc_schema()} can be used with any resouce id. +The main function \code{sc_schema()} can be used with any resource id. \code{\link[=sc_schema_catalogue]{sc_schema_catalogue()}} and \code{\link[=sc_schema_db]{sc_schema_db()}} are very simple -wrapper functions around \code{\link[=sc_schema]{sc_schema()}} and are comparabable to the +wrapper functions around \code{\link[=sc_schema]{sc_schema()}} and are comparable to the \href{https://portal.statistik.at/statistik.at/ext/statcube/jsf/dataCatalogueExplorer.xhtml}{catalogue explorer} or the \href{https://portal.statistik.at/statistik.at/ext/statcube/opendatabase?id=deake005}{table view} of the STATcube GUI. @@ -72,12 +67,12 @@ are wrapped into a class called \code{sc_schema} to simplify the usage in R. \item \code{sc_schema_flatten()}: turns a \code{sc_schema} object into a \code{data.frame} \item \code{sc_schema_catalogue()}: is similar to the -\href{https://portal.statistik.at/statistik.at/ext/statcube/jsf/dataCatalogueExplorer.xhtml}{catalogue explorer} of the STATcube GUI and reurns +\href{https://portal.statistik.at/statistik.at/ext/statcube/jsf/dataCatalogueExplorer.xhtml}{catalogue explorer} of the STATcube GUI and returns a tree-type object containing all databases and tables. \item \code{sc_schema_db()}: is similar to the \href{https://portal.statistik.at/statistik.at/ext/statcube/opendatabase?id=deake005}{table view} -of ths STATcube GUI and gives information about all measures and +of the STATcube GUI and gives information about all measures and classification fields for a specific database }} diff --git a/man/sc_table.Rd b/man/sc_table.Rd index dd937647..fc205904 100644 --- a/man/sc_table.Rd +++ b/man/sc_table.Rd @@ -8,7 +8,7 @@ \alias{sc_table_saved} \title{Create a request against the /table endpoint} \usage{ -sc_table(json_file, language = NULL, add_totals = TRUE, key = NULL) +sc_table(json, language = NULL, add_totals = TRUE, key = NULL, json_file = NA) sc_examples_list() @@ -19,11 +19,12 @@ sc_table_saved_list(key = NULL, server = "ext") sc_table_saved(table_uri, language = NULL, key = NULL, server = "ext") } \arguments{ -\item{json_file}{path to a json file, which was downloaded via the STATcube -GUI ("Open Data API Abfrage")} +\item{json}{Path to a json file, which was downloaded via the STATcube +GUI ("Open Data API Request"). Alternatively, a json string which +passes \code{\link[jsonlite:validate]{jsonlite::validate()}}.} \item{language}{The language to be used for labeling. \code{"en"} (the default) -will use english. \code{"de"} uses german. +will use english. \code{"de"} uses German. The third option \code{"both"} will import both languages by sending two requests to the \verb{/table} endpoint.} @@ -33,6 +34,8 @@ the json request?} \item{key}{(\code{string}) An API key. To display your key, call \code{\link[=sc_browse_preferences]{sc_browse_preferences()}}.} +\item{json_file}{Deprecated. Use \code{json} instead} + \item{filename}{The name of an example json file.} \item{server}{A STATcube API server. Defaults to the external Server via @@ -61,7 +64,7 @@ Those three functions all return an object of class \code{"sc_table"}. } \examples{ \dontshow{if (sc_key_exists()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -my_table <- sc_table(json_file = sc_example("population_timeseries.json")) +my_table <- sc_table(json = sc_example("population_timeseries.json")) # print my_table diff --git a/man/sc_table_class.Rd b/man/sc_table_class.Rd index 415a5d3a..d0c55907 100644 --- a/man/sc_table_class.Rd +++ b/man/sc_table_class.Rd @@ -18,7 +18,7 @@ STATcube REST API. \item{\code{raw}}{the raw response content} -\item{\code{annotation_legend}}{list of all annotations occuring in the data as a \code{data.frame} with +\item{\code{annotation_legend}}{list of all annotations occurring in the data as a \code{data.frame} with two columns for the annotation keys and annotation labels.} \item{\code{rate_limit}}{how much requests were left after the POST request for this table was sent? @@ -50,7 +50,7 @@ Uses the same format as \code{\link[=sc_rate_limit_table]{sc_rate_limit_table()} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-sc_table-new}{}}} \subsection{Method \code{new()}}{ -Ususally, objects of class \code{sc_table} are generated with +Usually, objects of class \code{sc_table} are generated with one of the factory methods \code{\link[=sc_table]{sc_table()}}, \code{\link[=sc_table_saved]{sc_table_saved()}} or \code{\link[=sc_table_custom]{sc_table_custom()}}. If this constructor is invoked directly, either omit the parameters \code{json} and \code{file} or make sure that they @@ -70,7 +70,7 @@ endpoint.} \item{\code{file}}{the file path to the json file} \item{\code{add_totals}}{was the json request modified by adding totals via -the add_toals parameter in one of the factory functions (\code{sc_table()}, +the add_totals parameter in one of the factory functions (\code{sc_table()}, \code{sc_table_custom()}). Necessary, in order to also request totals via the \verb{$add_language()} method.} } @@ -97,7 +97,12 @@ added to a timeseries An extension of \code{\link[=sc_tabulate]{sc_tabulate()}} with additional parameters. \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{sc_table_class$tabulate(..., round = TRUE, annotations = FALSE)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{sc_table_class$tabulate( + ..., + round = FALSE, + annotations = FALSE, + recode_zeros = FALSE +)}\if{html}{\out{
}} } \subsection{Arguments}{ @@ -105,11 +110,13 @@ parameters. \describe{ \item{\code{...}}{Parameters which are passed down to \code{\link[=sc_tabulate]{sc_tabulate()}}} -\item{\code{round}}{apply rounding to each measure accoring to the precision +\item{\code{round}}{apply rounding to each measure according to the precision provided by the API.} \item{\code{annotations}}{Include separate annotation columns in the returned table. This parameter is currently broken and needs to be re-implemented} + +\item{\code{recode_zeros}}{interpret zero values as missings?} } \if{html}{\out{
}} } diff --git a/man/sc_table_custom.Rd b/man/sc_table_custom.Rd index 72fb1cef..15035197 100644 --- a/man/sc_table_custom.Rd +++ b/man/sc_table_custom.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/table_custom.R \name{sc_table_custom} \alias{sc_table_custom} +\alias{sc_recode} \title{Create custom tables} \usage{ sc_table_custom( @@ -10,18 +11,49 @@ sc_table_custom( dimensions = c(), language = c("en", "de"), add_totals = TRUE, - key = NULL + key = NULL, + recodes = NULL, + dry_run = FALSE ) + +sc_recode(field, map = NULL, total = FALSE) } \arguments{ -\item{db}{The uid of a database} +\item{db}{The uid of a database. Must be of type \code{DATASET}} -\item{measures}{A character vector of uids for measures. Can be either of -type \code{MEASURE} or of type \code{STAT_FUNCTION}} +\item{measures}{A character vector of uids for measures. Each entry must be +of type \code{MEASURE}, \code{STAT_FUNCTION} or \code{COUNT}.} \item{dimensions}{A character vector of dimensions for the cube. Can be either of type \code{FIELD} or type \code{VALUESET}. Those entries are referred to as \code{fields} in the parsed API response} + +\item{language}{The language to be used for labeling. "en" +(the default) will use English. "de" uses German.} + +\item{add_totals}{Should totals be added for each classification field in +the json request? Ignored if \code{recodes} is used.} + +\item{key}{(\code{string}) An API key. To display your key, call +\code{\link[=sc_browse_preferences]{sc_browse_preferences()}}.} + +\item{recodes}{One or more recodes that were generated via \code{\link[=sc_recode]{sc_recode()}}. +If more than one recode is supplied, recodes should be concatenated with +\code{\link[=c]{c()}}.} + +\item{dry_run}{If \code{TRUE}, no request is sent to the API. Instead, type +checks are performed and the json request is returned as a string. +Defaults to \code{FALSE}.} + +\item{field}{An uid of a classification field to be recoded. The provided +uid should also be passed in the \code{dimensions} parameter of +\code{\link[=sc_table_custom]{sc_table_custom()}}.} + +\item{map}{A list of ids for values (type \code{VALUE}) This can also be a nested +list if items should be grouped. See examples} + +\item{total}{Add totals to the field? If \code{map} is provided, the totals +will correspond to the filtered data.} } \description{ Define requests against the /table endpoint by providing @@ -30,7 +62,41 @@ The URIs can be obtained using \code{\link[=sc_schema_db]{sc_schema_db()}}. See the \href{https://statistikat.github.io/STATcubeR/articles/sc_table_custom.html}{Custom tables article} for more details. } +\section{Functions}{ +\itemize{ +\item \code{sc_recode()}: creates a recode object which can be used +for the \code{recode} parameter of \code{\link[=sc_table_custom]{sc_table_custom()}} + +}} +\section{Schema objects in parameters}{ + +it is possible to pass \code{sc_schema} objects (usually generated by +\code{\link[=sc_schema_db]{sc_schema_db()}}) instead of ids in \code{\link[=sc_table_custom]{sc_table_custom()}} and \code{\link[=sc_recode]{sc_recode()}}. +If provided, the schema objects will be converted into ids via \verb{$id}. +} + +\section{Error handling}{ + +Unfortunately, the API gives fairly vague error messages in case a +custom table request is ill defined. For this reason, \code{\link[=sc_table_custom]{sc_table_custom()}} +applies some simple heuristics and throws warnings if inconsistencies +in the provided parameters are recognized. The following conditions are +currently checked +\itemize{ +\item the parameter \code{db} is of type \code{DATABASE} +\item all entries in \code{measures} are of type \code{MEASURE}, \code{COUNT} or +\code{STATFN} +\item all entries in \code{dimensions} are of type \code{VALUESET} or \code{FIELD} +\item all entries in \code{field} are of type \code{VALUESET} or \code{FIELD} +\item all entries in \code{map} are of type \code{VALUE} +\item all fields in \code{recodes} are also present in \code{dimensions} +\item the first two arguments of \code{sc_recode()} are consistent, i.e. +if the provided \code{VALUE}s belong to the \code{VALUESET/FIELD} +} +} + \examples{ +\dontshow{if (sc_key_exists()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} sc_table_custom("str:database:detouextregsai") sc_table_custom( @@ -49,5 +115,24 @@ sc_table_custom( "str:valueset:detouextregsai:F-DATA1:C-C93-2:C-C93SUM-0" ) ) + +schema <- sc_schema_db("detouextregsai") +region <- schema$`Other Classifications`$`Tourism commune [ABO]`$ + `Regionale Gliederung (Ebene +1)` +month <- schema$`Mandatory fields`$`Season/Tourism Month` + +x <- sc_table_custom( + schema, + schema$Facts$Arrivals, + list(month, region), + recodes = c( + sc_recode(region, total = FALSE, map = list( + region$Achensee, + list(region$Arlberg, region$`Ausseerland-Salzkammergut`) + )), + sc_recode(month, total = FALSE) + ) +) +x$tabulate() +\dontshow{\}) # examplesIf} } -\keyword{internal} diff --git a/man/sc_tabulate.Rd b/man/sc_tabulate.Rd index 3c93406f..2b2ec328 100644 --- a/man/sc_tabulate.Rd +++ b/man/sc_tabulate.Rd @@ -30,7 +30,7 @@ Ignored if \code{raw} is set to \code{TRUE}.} \item{recode_zeros}{turn zero values into \code{NA}s} -\item{language}{The language to be used for labelling. By default, the +\item{language}{The language to be used for labeling. By default, the dataset language (\code{table$language}) is used.} \item{sort}{If \code{TRUE}, the resulting data will be sorted by all provided @@ -52,7 +52,7 @@ is true for fields. \details{ Aggregation is done as follows \itemize{ -\item First, all columns that priovide a total code via \code{table$total_codes()} +\item First, all columns that provide a total code via \code{table$total_codes()} will be used to filter for \code{column == total_code} or \code{column != total_code} \item Then, the remaining data is aggregated using \code{\link[=rowsum]{rowsum()}} } @@ -63,7 +63,7 @@ See Examples For objects of class \code{sc_table} two additional operations are performed. \itemize{ \item zeros are recoded to \code{NA}s -\item rounding is done according to the precision of each measure. Ronding +\item rounding is done according to the precision of each measure. Rounding happens after the recoding to \code{NA} values } } diff --git a/man/sdmx_table.Rd b/man/sdmx_table.Rd new file mode 100644 index 00000000..4cadeb94 --- /dev/null +++ b/man/sdmx_table.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sdmx_table.R +\name{sdmx_table} +\alias{sdmx_table} +\title{Import data from SDMX} +\usage{ +sdmx_table(file) +} +\arguments{ +\item{file}{a "sdmx archive" file that was downloaded from STATcube.} +} +\value{ +An object of class \code{sc_data} +} +\description{ +Function that reads STATcube data from an sdmx archive - a zip file +consisting of \code{structure.xml} with metadata and \code{dataset.xml} for the +values. +} +\examples{ +x <- "sdmx/dedemo.zip" \%>\% system.file(package = "STATcubeR") \%>\% sdmx_table() +# print and tabulate +x +x$tabulate() +# explore hierarchies +nuts2 <- x$field("C-B00-0") +data.frame(label = nuts2$label, + parent = nuts2$label[match(nuts2$parent, nuts2$code)]) +# extract more data from the raw xml +x$xml$meta \%>\% xml2::xml_find_first(".//Name") +} +\keyword{experimental} diff --git a/tests/spelling.R b/tests/spelling.R new file mode 100644 index 00000000..6713838f --- /dev/null +++ b/tests/spelling.R @@ -0,0 +1,3 @@ +if(requireNamespace('spelling', quietly = TRUE)) + spelling::spell_check_test(vignettes = TRUE, error = FALSE, + skip_on_cran = TRUE) diff --git a/vignettes/R/df_print.R b/vignettes/R/df_print.R index 89b18ebe..466bccd3 100644 --- a/vignettes/R/df_print.R +++ b/vignettes/R/df_print.R @@ -16,16 +16,6 @@ registerS3method( envir = asNamespace("knitr") ) -registerS3method( - "knit_print", "sc_meta", knit_print.data.frame, - envir = asNamespace("knitr") -) - -registerS3method( - "knit_print", "sc_schema", knit_print.data.frame, - envir = asNamespace("knitr") -) - options(crayon.enabled = TRUE) options(pillar.min_chars = 30) options(pillar.bold = TRUE) diff --git a/vignettes/R/setup.R b/vignettes/R/setup.R index cd240676..4da029b2 100644 --- a/vignettes/R/setup.R +++ b/vignettes/R/setup.R @@ -2,4 +2,12 @@ library(STATcubeR) sc_key_set(Sys.getenv("STATCUBE_KEY_EXT")) source("R/df_print.R") knitr::opts_chunk$set(comment = "#>") +options( + STATcubeR.schema_colors = list( + "FOLDER" = "#4400cc", "DATABASE" = "#186868", "TABLE" = "#624918", + "GROUP" = "#4400cc", "FIELD" = "cyan", "VALUESET" = "cadetblue", + "VALUE" = "#4400cc", "MEASURE" = "#624918", "STAT_FUNCTION" = "cadetblue", + "COUNT" = "#624918" + ) +) source("R/add_tooltip.R")$value diff --git a/vignettes/od_list.Rmd b/vignettes/od_list.Rmd index 23d5ce86..116d7e17 100644 --- a/vignettes/od_list.Rmd +++ b/vignettes/od_list.Rmd @@ -21,7 +21,7 @@ This list is not updated regularly, so to get the most recent list, a call to od ## Interactive overview -Since some of the metadata contained in the OGD JSON files is only available in german, the following overview uses german labels. +Since some of the metadata contained in the OGD JSON files is only available in German, the following overview uses German labels. Click on the individual table cells to get more information. ```{r, echo = FALSE} @@ -119,8 +119,8 @@ od_index %>% ## CLI usage -To get a simplified viersion of this summary, use the `od_list()` function. -It uses webscraping techniques to get dataset ids and german labels based on the contents of https://data.statistik.gv.at/web/catalog.jsp. +To get a simplified version of this summary, use the `od_list()` function. +It uses webscraping techniques to get dataset ids and German labels based on the contents of https://data.statistik.gv.at/web/catalog.jsp. ```{r, eval = FALSE} all_datasets <- od_list() @@ -142,7 +142,7 @@ json <- od_json(id) json ``` -This output is generated from `r style_resource(id, ext = "json")` and shows a summary of the available metatata. Other parts of the metadata can be extracted with `$` using the keys from the json specification. +This output is generated from `r style_resource(id, ext = "json")` and shows a summary of the available metadata. Other parts of the metadata can be extracted with `$` using the keys from the json specification. ```{r} json$extras$update_frequency @@ -156,7 +156,7 @@ json$extras$update_frequency x <- od_table("OGD_bevstandjbab2002_BevStand_2020") ``` -The `r tippy_dataset(x, "population dataset")` measures the austrian population +The `r tippy_dataset(x, "population dataset")` measures the Austrian population for `r nrow(x$field("Commune"))` different regions. ```{r} diff --git a/vignettes/od_resources.Rmd b/vignettes/od_resources.Rmd index 842a4f5b..48ef4b99 100644 --- a/vignettes/od_resources.Rmd +++ b/vignettes/od_resources.Rmd @@ -27,7 +27,7 @@ By default, `r STATcubeR` caches all accessed resources from `r ogd_portal` in t od_cache_dir() ``` -Let's examine for example what happens when the data from the structure of earnings suvey (SES) is requested. +Let's examine for example what happens when the data from the structure of earnings survey (SES) is requested. ```{r} earnings <- od_table("OGD_veste309_Veste309_1") @@ -42,12 +42,12 @@ earnings$resources ``` `last_modified` tells us when the resource was changed on the fileserver. -If a resource does not exist in the cache or if the last modified entry in the json is newer than the cached file, it will be dowloaded from the server. +If a resource does not exist in the cache or if the last modified entry in the json is newer than the cached file, it will be downloaded from the server. Otherwise, the cached version is reused. ## Access and Updates -Cached files can be acessed with `od_cache_file()`. +Cached files can be accessed with `od_cache_file()`. If the specified file exists in the cache, a path to the file will be returned. Otherwise, the file is downloaded to the cache and then the path is returned. The files use the same naming conventions as the open data fileserver. @@ -68,7 +68,7 @@ od_resource("OGD_veste309_Veste309_1", "C-A11-0") ``` The parser behaves differently for header files, data files and fields. -Json files can be acessed with `od_json()`. +Json files can be accessed with `od_json()`. ```{r} json <- od_json("OGD_veste309_Veste309_1") diff --git a/vignettes/od_table.Rmd b/vignettes/od_table.Rmd index c9e08978..e7249ac9 100644 --- a/vignettes/od_table.Rmd +++ b/vignettes/od_table.Rmd @@ -39,7 +39,7 @@ To import a dataset, provide the dataset id as an argument. table <- od_table("OGD_krebs_ext_KREBS_1") ``` -This returns an object of class [`od_table`], which bundles all the data from the OGD portal that correspons to this dataset. +This returns an object of class [`od_table`], which bundles all the data from the OGD portal that corresponds to this dataset. Printing the object will show a summary of the contents. ```{r} @@ -50,20 +50,20 @@ The dataset contains the number of cancer patients by several classification fie - **tumor type** differentiates **<95>** types of cancers - The **reporting period** spans **<37>** years (1983 to 2019). -- The **regional** variable contains the **<9>** NUTS-2 regions of austria. +- The **regional** variable contains the **<9>** NUTS-2 regions of Austria. - The **demographic** variable "Sex" is reported with **<2>** levels ## Convert to a data frame The method `$tabulate()` can be used to turn the object into a `data.frame` in long format, -which contains labled data. +which contains labeled data. ```{r} table$tabulate() ``` The dataset contains `r nrow(table$data)` rows. -If every combination of tumor type, year, region and sex would contain a seperate row the number of rows would be the following. +If every combination of tumor type, year, region and sex would contain a separate row the number of rows would be the following. \[ 95\times37\times9\times2 = 63270 @@ -98,13 +98,13 @@ options(tibble.print_min = 5) ``` The method `table$field()` can be used to get information about specific classification fields. -Thise contain data from `{dataset_id}_{field_code}.csv`. -Unlike the metadata in `sc_table`, the `od_table` class always contains german and english labels. +These contain data from `{dataset_id}_{field_code}.csv`. +Unlike the metadata in `sc_table`, the `od_table` class always contains German and English labels. Both can be used to label the dataset. #### Tumor type -The following call gives access to the german and english labels for the 95 differen tumor types in the `"cancer type"` classification. +The following call gives access to the German and English labels for the 95 different tumor types in the `"cancer type"` classification. Click `"Year"` above to see information about the years. ```{r} @@ -127,7 +127,7 @@ table$field("C-BERJ-0") #### Province -The regional classification contains 9 elements which correspond to the NUTS2 regions ("Bundesländer") of austria. +The regional classification contains 9 elements which correspond to the NUTS2 regions ("Bundesländer") of Austria. ```{r} table$field("C-BUNDESLAND-0") @@ -137,7 +137,7 @@ table$field("C-BUNDESLAND-0") #### Sex -Sex is coded as a ditochome variable with the classification elements `"male"` and `"female"`. +Sex is coded as a dichotomous variable with the classification elements `"male"` and `"female"`. ```{r} table$field("C-KRE_GESCHLECHT-0") @@ -147,7 +147,7 @@ table$field("C-KRE_GESCHLECHT-0") ### json Metadata {.tabset .tabset-pills} -The json metadatafile `r style_resource("OGD_krebs_ext_KREBS_1", ext = "json")` is available via the `$json` binding. +The json metadata file `r style_resource("OGD_krebs_ext_KREBS_1", ext = "json")` is available via the `$json` binding. #### Cancer @@ -193,8 +193,8 @@ table$data levels(table$data$`C-BUNDESLAND-0`) == table$field("C-BUNDESLAND-0")$code ``` -As mentioned above, a labelled version of the data can be obtained via `table$tabulate()`. -The labelling is done by taking the raw dataset and then joining the labes from `$header` and `$field()`. +As mentioned above, a labeled version of the data can be obtained via `table$tabulate()`. +The labeling is done by taking the raw dataset and then joining the labels from `$header` and `$field()`. ```{r} table$tabulate() @@ -205,9 +205,9 @@ You can read more about `$tabulate()` in the `r ticle('sc_tabulate')`. ## A Trip to Germany {#sauerkraut} -It is possible to switch the language used for labelling the dataset using the `$language` field. +It is possible to switch the language used for labeling the dataset using the `$language` field. This field can be used to get and set the language. -Allowed options are `"en"` for english and `"de"` for german. +Allowed options are `"en"` for English and `"de"` for German. ```{r, collapse=TRUE} table$language @@ -216,7 +216,7 @@ table$language ``` This option affects the `print()` method as well as the output of `$tabulate()`. -If no english labels are available, the german labels are used as a fallback mechanism. +If no English labels are available, the German labels are used as a fallback mechanism. ```{r} table diff --git a/vignettes/sc_cache.Rmd b/vignettes/sc_cache.Rmd index a81948fb..5c7cc1eb 100644 --- a/vignettes/sc_cache.Rmd +++ b/vignettes/sc_cache.Rmd @@ -40,11 +40,11 @@ Caching will affect all calls to `sc_table()` and `sc_schema()` as well as their `sc_table_saved()`, `sc_table_custom()`, `sc_schema_db()`, `sc_schema_catalogue()`. If the same resource is requested several times, the last valid API response is reused. -Invalid resposes (such as 404 responses) will not be added to the cache. +Invalid responses (such as 404 responses) will not be added to the cache. Cache files always contain unparsed API responses as returned by `httr::GET()` or `httr::POST()`. Responses are stored in an `rds` format. -If caching is enabled, the corresponding cache files to an object of class `sc_schema` or `sc_table` can be retieved using `sc_cache_files()`. +If caching is enabled, the corresponding cache files to an object of class `sc_schema` or `sc_table` can be retrieved using `sc_cache_files()`. ```{r} sc_example("accomodation") %>% sc_table(language = "both") %>% sc_cache_files() @@ -52,7 +52,7 @@ sc_schema_catalogue() %>% sc_cache_files() ``` Note that the first call to `sc_cache_files()` returned two paths. -Since the table was requested in two languages, two api responses are necessary to construct the table object. +Since the table was requested in two languages, two API responses are necessary to construct the table object. The content of the cache files can be parsed using `readRDS()` and `httr::content()`. This gives direct access to the API response in a `list()` format. @@ -76,8 +76,8 @@ sc_cache_clear() ## Should I use caching? If you are using `r STATcubeR` interactively, the answer is probably no. -However, when building applications that rely on STATcube data caching can be a usefiul way to decrease the traffic with the STATcube server. -Another usecase for caching is if you are writing `{rmarkdown}` documents that rely on STATcube data. +However, when building applications that rely on STATcube data caching can be a useful way to decrease the traffic with the STATcube server. +Another use case for caching is if you are writing `{rmarkdown}` documents that rely on STATcube data. Caching makes those documents both reproducible and quicker to render. Please note that there is currently no reliable way to invalidate the cache. diff --git a/vignettes/sc_data.Rmd b/vignettes/sc_data.Rmd index 322807e9..1a1c6049 100644 --- a/vignettes/sc_data.Rmd +++ b/vignettes/sc_data.Rmd @@ -15,7 +15,7 @@ options(tibble.print_min = 5) ``` The class [sc_data] defines a common interface for open data datasets and responses from the `/table` endpoint of the STATcube REST API. -It defines methods that are applicable to both datasources like aquiring metadata, labeling the data and aggregating results. +It defines methods that are applicable to both datasources like acquiring metadata, labeling the data and aggregating results. ## Constructing sc_data objects @@ -29,8 +29,8 @@ Therefore, objects of the class should be created with one of the following func * `sc_table_saved()` and `sc_table_custom()` also use the `/table` endpoint. However, the request is specified via ids rather than a json file. -To illustrate, we will use one of the OGD datasets to showcase the functionalities of this class. -Notice however, that objects created with `sc_table()` can be used interchangibly. +To illustrate, we will use one of the OGD datasets to showcase the functionality of this class. +Notice however, that objects created with `sc_table()` can be used interchangeably. ```{r} x <- od_table("OGD_krebs_ext_KREBS_1") @@ -117,7 +117,7 @@ x$field("Sex") ## Tabulation The method `$tabulate()` can be used to turn `sc_table` objects into tidy data.frames. -See the `r ticle("sc_tabulate")` for more defails. +See the `r ticle("sc_tabulate")` for more defaults. ```{r} x$tabulate() diff --git a/vignettes/sc_info.Rmd b/vignettes/sc_info.Rmd index be2d6b74..e8069472 100644 --- a/vignettes/sc_info.Rmd +++ b/vignettes/sc_info.Rmd @@ -98,7 +98,7 @@ request to `sc_table()` is sent several times, this will not count towards the rate-limit (100 requests per hour).~~ Server-Side caching of [`/table`] responses is currently disabled due to security reasons. -Therefore, all requests against the [`/table`] endpoint count towards the ratelimit. +Therefore, all requests against the [`/table`] endpoint count towards the rate-limit. [`/info`]: https://docs.wingarc.com.au/superstar/9.12/open-data-api/open-data-api-reference/info-endpoint [`/rate_limit`]: https://docs.wingarc.com.au/superstar/9.12/open-data-api/open-data-api-reference/rate-limit diff --git a/vignettes/sc_key.Rmd b/vignettes/sc_key.Rmd index 6098103e..7ee2978e 100644 --- a/vignettes/sc_key.Rmd +++ b/vignettes/sc_key.Rmd @@ -49,7 +49,7 @@ You can set the key persistently by modifying `.Renviron`. This can be done easily with `usethis::edit_r_environ()`. By default, `sc_key_set()` will perform a test request to verify that the key is valid -and throw an error if the test request is unsuccessfull. +and throw an error if the test request is unsuccessful. ```r sc_key_set('wrong key') diff --git a/vignettes/sc_last_error.Rmd b/vignettes/sc_last_error.Rmd index 6b047df8..0ae122be 100644 --- a/vignettes/sc_last_error.Rmd +++ b/vignettes/sc_last_error.Rmd @@ -15,7 +15,7 @@ This article explains how `r STATcubeR` deals with situations where the communic ## Throw all the Errors All http errors codes from the REST API will be turned into R errors. -More precisely, the following conditions are checked to determine wether a request to the STATcube API was successful +More precisely, the following conditions are checked to determine whether a request to the STATcube API was successful * The response returns `FALSE` for `httr::http_error()` which means the response status is less than 400. See the [MDN reference] for more information about http status codes. @@ -53,7 +53,7 @@ See `?httr::content` to and `?httr::headers` to get started. This section showcases the most common types of errors that you might encounter when interacting with the API. Please feel free to open a new issue on the [STATcubeR issue tracker] if you get an error which is not listed here. -### Invalid API Key +### Invalid API Key {#AUTHENTICATION} If an invalid API key is used for a request, a 401 status is returned. @@ -66,7 +66,7 @@ In practice, you should set up your key according to the `r ticle('sc_key')`. ### API Not Accessible -This occurs if `r STATcubeR` tries to send requests to a server which is not accessible for the current environment. This will result in a timout error. +This occurs if `r STATcubeR` tries to send requests to a server which is not accessible for the current environment. This will result in a timeout error. ``` Error in curl::curl_fetch_memory(url, handle = handle) : @@ -88,7 +88,7 @@ Reasons this error might occur 4. (Statistics Austria Employees only) You downloaded a json request from one of our internal STATcube servers and try to use this request with `sc_table()` from outside. -Note to future-self: It might be a good idea to set up some environment variables on Statistic Austrias internal R servers to avoid (3) and (4). +Note to future-self: It might be a good idea to set up some environment variables on Statistic Austria's internal R servers to avoid (3) and (4). ### Rate Limit Exceeded @@ -102,11 +102,11 @@ sc_table_saved("defaulttable_deake005") readRDS("sc_last_error/rate_limit.rds") %>% STATcubeR:::sc_check_response() ``` -If you encounter this error, please check if the rate limits are in fact a plauible reason by using `sc_rate_limit_table()`. +If you encounter this error, please check if the rate limits are in fact a plausible reason by using `sc_rate_limit_table()`. Unfortunately, the response for exceeded rate limits is very generic and can not be differentiated from the response for invalid json-bodies (see below). This is why the error message lists two possible reasons. -### Schema +### Schema {#SCHEMA_COMPONENT_NOT_FOUND} Invalid URIs used with `sc_schema` will be displayed with a special error type `SCHEMA_COMPONENT_NOT_FOUND`. @@ -118,7 +118,7 @@ sc_schema("invalid_uri") readRDS("sc_last_error/schema.rds") %>% STATcubeR:::sc_check_response() ``` -### Saved Tables +### Saved Tables {#TXD_NOT_FOUND} As mentioned in the `r ticle("sc_table_saved")`, the function `sc_table_saved()` can only access default tables and tables that are saved under the current user. @@ -166,6 +166,28 @@ If you encounter this error during the workflow described in the `r ticle("sc_ta This is because json request that are downloaded by the STATcube GUI should always contain valid URIs. However, if you either modify the downloaded json requests or use `sc_table_custom()`, the reason "invalid json body" is plausible. +### Cell Limit Exceeded {#CELL_COUNT} + +This error occurs if more than 1 million cells are requested via a single +call to `sc_table()` or `sc_table_custom()`. +If you encounter this error, consider splitting up the request into multiple smaller requests or defining a filter in the gui or via a +[custom table filter](sc_table_custom.html#filter). + +```{r, eval = FALSE} +sc_table_custom( + "str:database:debevstand", + "str:measure:debevstand:F-BEVSTAND:F-ISIS-1", + c("str:field:debevstand:F-BEVSTAND:C-A10-0", + "str:valueset:debevstand:F-BEVSTAND:C-GNU-2:C-GNU-2", + "str:valueset:debevstand:F-BEVSTAND:C-BESC51-0:C-BESC51-0", + "str:valueset:debevstand:F-BEVSTAND:C-BESC11-0:C-BESC11-0") +) +``` + +```{r, echo = FALSE, error = TRUE} +readRDS("sc_last_error/cell_limit.rds") %>% STATcubeR:::sc_check_response() +``` + ## Custom Error Handling If you want to use your own error-handling instead of the default `r STATcubeR` error handlers, you can get started with the following code sample from one of our `{shiny}` applications. @@ -183,7 +205,7 @@ shiny::observeEvent(input$button_load_data, { ``` `try()` will turn errors into "error-objects" of class `"try-error"`. -A conditional is then used to perform different actions for successfull and unsccessfull requests. +A conditional is then used to perform different actions for successful and unsuccessful requests. If an error occurs, the error details are fetched via `sc_last_error_parsed()` and then sent to an error handler. Otherwise, the return value from `sc_table_saved()` is processed by the success handler. diff --git a/vignettes/sc_schema.Rmd b/vignettes/sc_schema.Rmd index 01db5ed6..637cd4f3 100644 --- a/vignettes/sc_schema.Rmd +++ b/vignettes/sc_schema.Rmd @@ -24,16 +24,16 @@ endpoint. ## Browsing the Catalogue -The first function shows the catalogue, which lists all available +The first function shows the catalog, which lists all available databases in a tree form. The tree structure is determined by the API and -closely resembles the "Catalogue" view in the GUI. +closely resembles the "Catalog" view in the GUI. ```{r} my_catalogue <- sc_schema_catalogue() my_catalogue ``` -We see that the catalog has 8 child nodes: Four childs of type `FOLDER` and four childs of type `TABLE`. +We see that the catalog has 8 child nodes: Four children of type `FOLDER` and four children of type `TABLE`. The table nodes correspond to the saved tables as described in the `r ticle("sc_table_saved")`. The folders include all folders from the root level in the [catalogue explorer](`r sc_browse_catalogue()`): "Statistics", "Publication and Services" as well as "Examples". @@ -53,13 +53,13 @@ options(tibble.print_max = 5) my_catalogue$Statistics ``` -The child node `Statistics` is also of class `sc_schema` and shows all entries of the subfolder. +The child node `Statistics` is also of class `sc_schema` and shows all entries of the sub-folder. ```{r,fig.align='center', out.width='50%', echo=FALSE} knitr::include_graphics("img/catalogue3.png") ``` -This syntax can be used to navigate through folders and subfolders. +This syntax can be used to navigate through folders and sub-folders. ```{r} my_catalogue$Statistics$`Foreign Trade` @@ -70,6 +70,7 @@ knitr::include_graphics("img/catalogue4.png") ``` In some cases, the API shows more folders than the GUI in which case the folders from the API will be empty. +Seeing an empty folder usually means that your STATcube user is not permitted to view the contents of the folder. ```{r} my_catalogue$Statistics$`Foreign Trade`$Außenhandelsindizes @@ -77,7 +78,7 @@ my_catalogue$Statistics$`Foreign Trade`$Außenhandelsindizes ## Databases and Tables -Inside the catalogue, the leafs^[In the context of tree-like data structures, leafs are used to describe nodes of a tree which have no child nodes] of the tree are mostly of type `DATABASE` and `TABLE`. +Inside the catalog, the leafs^[In the context of tree-like data structures, leafs are used to describe nodes of a tree which have no child nodes] of the tree are mostly of type `DATABASE` and `TABLE`. ```{r} my_catalogue$Statistics$`Foreign Trade`$`Regional data by federal provinces` @@ -138,7 +139,7 @@ The leafs of database schemas are mostly of type `VALUE` and `MEASURE`. ## Data Structure of sc_schema Objects -As shown above, `sc_schema` objets have a tree like structure. +As shown above, `sc_schema` objects have a tree like structure. Each `sc_schema` object has `id`, `label`, `location` and `type` as the last four entries ```{r} @@ -149,11 +150,11 @@ str(tail(my_catalogue$Statistics, 4)) Schema objects can have an arbitrary amount of children. Children are always of type `sc_schema`. `x$type` contains the type of the schema object. -A complete list of schema types is avilable in the [API reference](https://docs.wingarc.com.au/superstar/9.12/open-data-api/open-data-api-reference/schema-endpoint). +A complete list of schema types is available in the [API reference](https://docs.wingarc.com.au/superstar/9.12/open-data-api/open-data-api-reference/schema-endpoint). ## Other Resources -Information about resources other than databases and the catalogue can +Information about resources other than databases and the catalog can be obtained by passing the resource id to `sc_schema()`. ```{r, collapse=TRUE} @@ -184,7 +185,7 @@ group_info <- my_db_info$`Demographic Characteristics`$id %>% ## Printing with data.tree If the `{data.tree}` package is installed, it can be used for an alternative -print mehtod. +print method. ```{r} print(group_info, tree = TRUE) @@ -199,7 +200,7 @@ options(STATcubeR.print_tree = TRUE) ## Flatten a Schema The function `sc_schema_flatten()` can be used to turn responses from the `/schema` endpoint into `data.frame`s. -The following call extracts all databasess from the catalogue and displays their ids and labels. +The following call extracts all databases from the catalog and displays their ids and labels. ```{r} sc_schema_catalogue() %>% @@ -208,9 +209,9 @@ sc_schema_catalogue() %>% The string `"DATABASE"` in the previous example acts as a filter to make sure only nodes with the schema type `DATABASE` are included in the table. -If `"DATABASE"` is relaced with `"TABLE"`, all tables will be displayed. This includes +If `"DATABASE"` is replaced with `"TABLE"`, all tables will be displayed. This includes -* All the defaulttables on STATcube. +* All the default-tables on STATcube. Most databases have an associated default table. * All saved tables for the current user as described in the `r ticle("sc_table_saved")`. * Other saved tables. @@ -230,3 +231,14 @@ The following example shows all available measures from the [economic trend moni sc_schema_db("dekonjunkturmonitor") %>% sc_schema_flatten("MEASURE") ``` + +## Further Reading + +* Schemas can be used to construct table requests as described in + the `r ticle("sc_table_custom")` +* See the `r ticle("sc_table_saved")` to get access to the data for table + nodes in the schema. + +```{js, echo=FALSE} +$('[href^="https://portal.statistik.at/statistik.at/ext/statcube"]').attr("target", "_blank"); +``` diff --git a/vignettes/sc_table.Rmd b/vignettes/sc_table.Rmd index fe6f544f..dad763c9 100644 --- a/vignettes/sc_table.Rmd +++ b/vignettes/sc_table.Rmd @@ -29,7 +29,7 @@ It is assumed that you already provided your API key as described in the `r ticl Use the graphical user interface of STATcube to create a table. Visit [STATcube] and select a database. This will open the table view where you can -create a table. See the [STATcube manual] for details. +create a table. See the [STATcube documentation] for details. ## Download an API request @@ -48,7 +48,8 @@ This means that the current user is not permitted to use the API. Provide the path to the downloaded json file as a string in `sc_table()`. ``` r -my_table <- sc_table(json_file = "path/to/api_request.json") +library(STATcubeR) +my_table <- sc_table(json = "path/to/api_request.json") ``` This will send the json-request to the [`/table` endpoint] of the API and return an object of class `sc_table`. @@ -83,13 +84,13 @@ If you prefer to use codes rather than labels, use `my_table$data` instead. my_table$data ``` -## Example datasets {.tabsert .tabset-pills} +## Example datasets {.tabset .tabset-pills} -This article used a dataset about the austrian populatio n via `sc_example()`. +This article used a dataset about the Austrian population via `sc_example()`. `r STATcubeR` contains more example jsons to get started. The datasets can be listed with `sc_examples_list()`. -### Accomodation +### Accommodation ```{r, eval = FALSE} sc_example("accomodation.json") %>% sc_table() @@ -131,16 +132,63 @@ sc_example("agriculture_prices.json") %>% sc_table() sc_example("economic_trend_monitor.json") %>% sc_table() ``` +## Choosing the Language {.tabset .tabset-pills} + +The language which is used for labeling can be changed via the `language` +parameter of `sc_table()`. + +### Accommodation + +```{r} +sc_example("accomodation.json") %>% sc_table("de") +``` + +### STATatlas + +```{r} +sc_example("economic_atlas.json") %>% sc_table("de") +``` + +### Trade + +```{r} +sc_example("foreign_trade.json") %>% sc_table("de") +``` + +### GDP + +```{r} +sc_example("gross_regional_product.json") %>% sc_table("de") +``` + +### Working Hours + +```{r} +sc_example("labor_force_survey.json") %>% sc_table("de") +``` + +### Agriculture + +```{r} +sc_example("agriculture_prices.json") %>% sc_table("de") +``` + +### monitor.statistik.at + +```{r} +sc_example("economic_trend_monitor.json") %>% sc_table("de") +``` + ## Further reading -* Functionalities of the returned object are explained in the `r ticle("sc_data")`. -* `sc_tabulate()` provides a more flixble way of turning STATcube tables into +* The functionality of the returned object are explained in the `r ticle("sc_data")`. +* `sc_tabulate()` provides a more flexible way of turning STATcube tables into `data.frame`s. See the `r ticle("sc_tabulate")` for more details. * The `r ticle("sc_table_saved")` shows an alternative way of importing tables. * If you are interested in other API endpoints, see the `r ticle("sc_schema")` - ot the `r ticle("sc_info")` + or the `r ticle("sc_info")` [`/table` endpoint]: https://docs.wingarc.com.au/superstar/9.12/open-data-api/open-data-api-reference/table-endpoint [download options]: https://docs.wingarc.com.au/superstar/9.12/superweb2/user-guide/download-tables -[STATcube]: https://statcube.at/statcube/home -[STATcube manual]: http://www.statistik.at/wcm/idc/idcplg?IdcService=GET_PDF_FILE&dDocName=105692 +[STATcube]: https://www.statistik.at/datenbanken/statcube-statistische-datenbank/login +[STATcube documentation]: https://www.statistik.at/datenbanken/statcube-statistische-datenbank/dokumente-downloads diff --git a/vignettes/sc_table_custom.Rmd b/vignettes/sc_table_custom.Rmd index e00b2681..fec167f7 100644 --- a/vignettes/sc_table_custom.Rmd +++ b/vignettes/sc_table_custom.Rmd @@ -1,7 +1,7 @@ --- title: "Define Custom Tables" description: > - Define custom requests against the `/table` endpoint programatically + Define custom requests against the `/table` endpoint programmatically by providing character vectors with ids of databases, measures and classification fields. link_text: "custom tables article" --- @@ -13,21 +13,44 @@ source("R/setup.R")$value ```{r, include = FALSE} if (!sc_key_exists()) knitr::opts_chunk$set(eval = FALSE) +show_json <- function(x) { + paste( + "
", + "Show json request", + "", + "```r", + "x$json", + "```", + "", + x$json$.__enclos_env__$private$json_content %>% + paste("```json\n", . , "\n```") %>% knitr::asis_output(), + "", + "
", + sep = "\n" + ) %>% knitr::asis_output() +} ``` -The function `sc_table_custom()` allows you to define requests against the `/table` endpoint programatically. +The function `sc_table_custom()` allows you to define requests against the `/table` endpoint programmatically. This can be useful to automate the generation of `/table` request rather than relying on the GUI to do so. -The function accepts the three arguments. +The function accepts the four arguments. + +- A **database** id +- ids of **measures** to be imported (type `MEASURE`, `STAT_FUNCTION` or `COUNT`) +- ids of **fields** to be imported (type `FIELD` or `VALUESET`) +- a list of **recodes** that can be used customize fields + +## Building a Custom Table Step by Step {#step-by-step} -* A database id -* ids of measures to be imported (type `MEASURE` or `STAT_FUNCTION`) -* ids of fields to be imported (type `FIELD` or `VALUESET`) +The first part of this Article will showcase how custom tables can be +created with a [database about tourism](`r sc_browse_database("detouextregsai")`). +This database will also be used in most other examples of this article. -## Starting Simple +### Starting Simple {#database} -First, we want to just send a database id to `sc_table_custom()`. -We will use a [database about accomodation](`r sc_browse_database("detouextregsai")`) througout this article. -This returns a table with one single row. +First, we want to just send the database id to `sc_table_custom()`. +This will request only the mandatory fields and default measures for that database. +In case of the tourism database, a table with one single row is returned. ```{r} database <- "str:database:detouextregsai" @@ -35,138 +58,296 @@ x <- sc_table_custom(database) x$tabulate() ``` -we see that `r format(x$tabulate()[[2]], big.mark = " ")` nights were spent in austrian tourism establishments in the month of `r x$tabulate()[[1]]`. +```{r, echo=FALSE} +show_json(x) +``` + +We see that `r format(x$tabulate()[[2]], big.mark = " ")` nights were spent in Austrian tourism establishments in the month of `r x$tabulate()[[1]]`. -## Adding Countries +### Adding Countries {#field} Now we want to add a classification to the table. This can be done by getting the database schema and showing all classification fields. ```{r} -schema <- sc_schema_db(database) -(fields <- sc_schema_flatten(schema, "FIELD")) +tourism <- sc_schema_db(database) +(fields <- sc_schema_flatten(tourism, "FIELD")) ``` -If we want to add "Country of origin" we need to include the fouth entry of the `id` column in our request. +If we want to add "Country of origin" we need to include the fourth entry of the `id` column in our request. ```{r} -x <- sc_table_custom(database, dimensions = fields$id[4]) +x <- sc_table_custom(tourism, dimensions = fields$id[4]) x$tabulate() ``` -## Adding Tourism Communes +```{r, echo=FALSE} +show_json(x) +``` + +Alternatively, we could also pass the schema object for "country of origin". + +```{r} +origin <- tourism$`Other Classifications`$`Country of origin` +x <- sc_table_custom(tourism, dimensions = origin) +``` + +### Adding Tourism Communes {#fields} The `dimensions` parameter in `sc_schema_custom()` accepts vectors of field ids. Therefore, we can add the communes easily. ```{r} -x <- sc_table_custom(database, dimensions = fields$id[c(2, 4)]) +x <- sc_table_custom(tourism, dimensions = fields$id[c(2, 4)]) x$tabulate() ``` -## Add Another Measure +```{r, echo=FALSE} +show_json(x) +``` + +### Add Another Measure {#measure} Currently, the table only returns the default measure for the database which is the number of nights spent. We can add a second measure by again using the database schema and passing a measure id ```{r} -(measures <- sc_schema_flatten(schema, "MEASURE")) +(measures <- sc_schema_flatten(tourism, "MEASURE")) ``` We can add both measures to the request by using `measures$id`. Just like the `dimensions` parameter, the `measures` parameters accepts vectors of resource ids. ```{r} -x <- sc_table_custom(database, measures = measures$id, +x <- sc_table_custom(tourism, measures = measures$id, dimensions = fields$id[c(2, 4)]) x$tabulate() ``` -## Using Valuesets +```{r, echo=FALSE} +show_json(x) +``` + +### Changing the hierarchy level {#hierarchy} We can see in [the GUI](`r sc_browse_database("detouextregsai")`) that "Country of origin" is a hierarchical classification. If we look at the table above, only the top level of the hierarchy (Austria, Germany, other) is used. -This can be changed by providing the the valueset that corresponds to the more granular classification of "country of origin" +This can be changed by providing the the value-set that corresponds to the more granular classification of "country of origin" ```{r,fig.align='center', out.width='50%', echo=FALSE} knitr::include_graphics("img/hierarchical_classification.png") ``` -The different valuesets for "country of origin" can be compared by browsing the database schema. +The different value-sets for "country of origin" can be compared by browsing the database schema. ```{r} -(valuesets <- schema$`Other Classifications`$`Country of origin`) +(valuesets <- tourism$`Other Classifications`$`Country of origin`) ``` -We can see that the two levels of the hierarchy are represented by the two valuesets. -The valueset "Herkunftsland" uses 3 classification elements and represents the top level of the hierarchy (Austria, Germany, Other). -The valueset "Country of origin" uses 87 (10+8+69) classification elements and is the bottom level of the hierarchy. -For classification with more levels of hierarchies, more valuesets will be present. +We can see that the two levels of the hierarchy are represented by the two value-sets. +The value-set "Herkunftsland" uses 3 classification elements and represents the top level of the hierarchy (Austria, Germany, Other). +The value-set "Country of origin" uses 87 (10+8+69) classification elements and is the bottom level of the hierarchy. +For classifications with more levels of hierarchies, more value-sets will be present. -We will now use the id for the first valueset in the `dimensions` parmaeter of `sc_table_custom`. +We will now use the id for the first value-set in the `dimensions` parameter of `sc_table_custom`. ```{r} x <- sc_table_custom( - db = "str:database:detouextregsai", - measures = measures$id[1:2], - dimensions = valuesets$`Country of origin`$id + db = tourism, + measures = measures$id, + dimensions = valuesets$`Country of origin` ) x$tabulate() ``` -It is possible to use a mixture of valuesets and fields in the `dimensions` parameter. +```{r, echo=FALSE} +show_json(x) +``` -## Filtering Data +It is possible to use a mixture of value-sets and fields in the `dimensions` parameter. -Omitting certain classification elements from the query is possible with the `recodes` parameter of the `/table` endpoint. Currently, `sc_table_custom()` does not provide support for recodes. -Please issue a [feature request] if you see this as a useful extension of `r STATcubeR`. +## Using Counts {#counts} -
-Example - -For example, the last call to `sc_table_custom()` will send the following json -to the server. - -```json -{ - "database": "str:database:detouextregsai", - "measures": [ "str:measure:detouextregsai:F-DATA1:F-ANK", - "str:measure:detouextregsai:F-DATA1:F-UEB" ], - "dimensions": [ - [ "str:valueset:detouextregsai:F-DATA1:C-C93-2:C-C93-2" ] ] -} +Instead of Measures and Value-sets, it is also possible to provide counts +in the `measure` parameter of `sc_table_custom()`. + +```{r} +population <- sc_schema_db("debevstand") +(count <- population$`Datensätze/Records`$`F-BEVSTAND`) +x <- sc_table_custom(population, count) +x$tabulate() +``` + +```{r, echo=FALSE} +show_json(x) ``` -For comparison, this is how the same request was specified in `r STATcubeR` +## Recodes {#recodes} + +Data can be filtered on the server side by using the `recodes` parameter +of `sc_table_custom()`. +This might be more complicated than filtering the data in R but offers some +important advantages. + +- **performance** Traffic between the client and server is reduced which might + lead to considerably faster API responses. +- **cell limits (user)** Apart from rate limits (see `?sc_rate_limits`), + STATcube also + limits the amount of cells that can be fetched per user. + Filtering data can be useful to preserve this quota. +- **cell limits (request)** If a single request would contain more than 1 million + cells, a [cell count error](sc_last_error.html#CELL_COUNT) is thrown. -```{r, eval = FALSE} -sc_table_custom( - "str:database:detouextregsai", - c("str:measure:detouextregsai:F-DATA1:F-ANK", - "str:measure:detouextregsai:F-DATA1:F-UEB"), - "str:valueset:detouextregsai:F-DATA1:C-C93-2:C-C93-2" +### Filtering Data {#filter} + +As an example for filtering data, we can request a table from the tourism +database and only select some countries for `Country of origin`. + +```{r} +origin <- tourism$`Other Classifications`$`Country of origin`$`Country of origin` +month <- tourism$`Mandatory fields`$`Season/Tourism Month`$`Season/Tourism Month` +x <- sc_table_custom( + db = tourism, + measures = measures$id, + dimensions = list(month, origin), + recodes = sc_recode(origin, list(origin$`Italy <29>`, origin$`Germany <12>`)) ) +x$tabulate() ``` -It is now possible to add recodes in order to only show results for Vienna. -For this, the regional classification (`C-C93`) is limited to the -code for vienna, which is `C-C93-2:01`. - -```json -{ - "database": "str:database:detouextregsai", - "measures": [ "str:measure:detouextregsai:F-DATA1:F-ANK", - "str:measure:detouextregsai:F-DATA1:F-UEB" ], - "dimensions": [ - [ "str:valueset:detouextregsai:F-DATA1:C-C93-2:C-C93-2" ] ], - "recodes": [ - "str:valueset:detouextregsai:F-DATA1:C-C93-2:C-C93-2": { - "map": [[ "str:value:detouextregsai:F-DATA1:C-C93-2:C-C93-2:01" ]] - } - ] -} +```{r, echo=FALSE} +show_json(x) +``` + +This table only contains two countries rather than +`r length(valuesets[["Country of origin"]])-4` so the amount of cells in the +table is also 40 times less compared to a table that would omit this filter. + +### Grouping items {#group} + +Other options from the [recodes specification] are also available via `sc_recode()`. +It is possible to group items and specify recodes for several classifications. + +```{r} +x <- sc_table_custom( + db = tourism, + measures = measures$id, + dimensions = list(month, origin), + recodes = c( + sc_recode(origin, list( + list(origin$`Germany <12>`, origin$`Netherlands <25>`), + list(origin$`Italy <29>`, origin$`France (incl.Monaco) <14>`) + )), + sc_recode(month, list( + month$Nov.99, month$Feb.00, month$Apr.09, month$`Jan. 22` + )) + ) +) +x$tabulate() +``` + +```{r, echo=FALSE} +show_json(x) +``` + +This table contains data for two country-groups and two months. +In this case, the cell values for Germany and the Netherlands are just added +to calculate the entries for Arrivals and Nights spent. +However, in other cases STATcube might decide it is more appropriate +to use weighted means or other more complicated aggregation methods. + +### Adding Totals {#totals} + +The `total` parameter in `sc_recode()` can be used to request totals for +classifications. As an example, let's look at the tourism activity in the +capital cities of Austria + +```{r} +destination <- tourism$`Other Classifications`$`Tourism commune [ABO]`$ + `Regionale Gliederung (Ebene +1)` +x <- sc_table_custom( + tourism, + measures = measures$id, + dimensions = list(month, destination), + recodes = c( + sc_recode(destination, total = TRUE, list( + destination$Wien, destination$`Stadt Salzburg`, destination$Linz)), + sc_recode(month, total = FALSE, list(month$Nov.99, month$Apr.09)) + ) +) +as.data.frame(x) +``` + +```{r, echo=FALSE} +show_json(x) +``` + +We see that there are two rows in the table where Tourism commune is set +to "Total". The corresponding values represent the sum of all Arrivals +or Nights spent in either of these three cities during that month. + +### Recoding across hierarchies {#recode-hierarchy} + +To use a recode that includes several hierarchy levels, the corresponding +`FIELD` should be used as the first parameter of `sc_recode()`. +For example, a recode with countries and federal states from the "Country of origin" +classification can be defined as follows. + + +```{r} +origin1 <- tourism$`Other Classifications`$`Country of origin` +origin2 <- origin1$`Country of origin` +origin3 <- origin1$`Herkunftsland (Ebene +1)` +x <- sc_table_custom( + tourism, measures$id, origin1, + recodes = sc_recode(origin1, list( + origin2$`Vienna <01>`, origin3$Germany, + list(origin2$`Bavaria (beg.05/03) <80>`, origin3$`other countries`)) + ) +) +x$tabulate() +``` + +```{r, echo=FALSE} +show_json(x) +``` + +## Typechecks + +Since custom tables can become quite complicated, `sc_table_custom()` performs +type-checks before sending the request to the API. +If inconsistencies are detected, warnings will be generated. +See `?sc_table_custom` for a comprehensive list of the performed checks. + +```{r} +sc_table_custom(tourism, measures = tourism, dry_run = TRUE) +``` + +
+Advanced example + +```{r} +sc_table_custom("A", measures = "B", dimensions = "C", + recodes = sc_recode("D", "E"), dry_run = TRUE) ```
-[feature request]: https://github.com/statistikat/STATcubeR/issues +If `dry_run` is set to `FALSE` (the default), STATcubeR will send the request +to the API even if inconsistencies are detected. +This will likely lead to an error of the form ["expected json but got html"]. + +If you get spurious warnings or have suggestions on how these type-checks might +be improved, please issue a feature request to the [STATcubeR bug tracker]. + +## Further Reading + +* If you've come this far, you are probably already familiar with `sc_schema()`. + But in case you are not, the `r ticle("sc_schema")` contains more information + on how to get metadata from the API. +* The `r ticle("sc_data")` showcases different ways to extract data and metadata + from the return value of `sc_table_custom()`. + +[recodes specification]: https://docs.wingarc.com.au/superstar/9.12/open-data-api/open-data-api-reference/table-endpoint +[STATcubeR bugtracker]: https://github.com/statistikat/STATcubeR/issues +["expected json but got html"]: sc_last_error.html#invalid-json diff --git a/vignettes/sc_tabulate.Rmd b/vignettes/sc_tabulate.Rmd index f9f57bfd..f85e05c9 100644 --- a/vignettes/sc_tabulate.Rmd +++ b/vignettes/sc_tabulate.Rmd @@ -166,11 +166,11 @@ x <- sc_table(sc_example("accomodation")) x$meta$fields ``` -### Including totals in the oputput +### Including totals in the output It is not necessary that all fields have totals. For example, suppose we want to include the totals for `Sex` in the output table. -We can just remove the toal code before running `sc_tabulate()`. +We can just remove the total code before running `sc_tabulate()`. The special symbol `NA` can be used to unset a total code. ```{r} @@ -188,21 +188,21 @@ earnings$language <- "de" earnings$tabulate("Geschlecht") ``` -To skip labelling altogether and use variable codes in the output, use `raw=TRUE`. +To skip labeling altogether and use variable codes in the output, use `raw=TRUE`. ```{r} earnings$tabulate("Geschlecht", raw = TRUE) ``` Switching languages is always available for `od_table()` objects. -For `sc_table()`, it depends on whcih languages were requested. +For `sc_table()`, it depends on which languages were requested. ```{r} -# default: get labels in german and english +# default: get labels in German and English x <- sc_table(sc_example("accomodation")) -# only get english labels +# only get English labels x <- sc_table(sc_example("accomodation"), lang = "en") -# only get german labels +# only get German labels x <- sc_table(sc_example("accomodation"), lang = "de") ``` @@ -220,10 +220,10 @@ In the above example, `"2. Quartil"` was matched to `"2. Quartil (Median)"`. ## Programmatic usage -Notice that we used the german label for the column `"Sex"` in the last calls +Notice that we used the German label for the column `"Sex"` in the last calls to `tabulate()`. This is necessary because only the "active" labels are available to define the tabulation. If you want to use `r STATcubeR` -programatically, always use codes to define the tabulation and also use the +programmatically, always use codes to define the tabulation and also use the `.list` parameter if you want to pass several codes. ```{r, echo = FALSE}