Skip to content

Commit

Permalink
Merge pull request #47 from alexkowa/master
Browse files Browse the repository at this point in the history
Edits from Fork
  • Loading branch information
bernhard-da authored Oct 10, 2024
2 parents c717d5b + ebbc264 commit 7d5d0d2
Show file tree
Hide file tree
Showing 36 changed files with 187 additions and 224 deletions.
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
Type: Package
Package: STATcubeR
Title: R Interface for the STATcube REST API and Open Government Data
Version: 1.0.0
Version: 0.5.2
Date: 2024-10-07
Authors@R: c(
person("Gregor", "de Cillia", , "[email protected]", role = "aut"),
person("Gregor", "de Cillia", , "", role = "aut"),
person("Bernhard", "Meindl", , "[email protected]", role = "ctb"),
person("Alexander", "Kowarik", , "[email protected]",
role = c("ctb","cre"),comment=c(ORCID="0000-0001-8598-4130"))
Expand All @@ -17,14 +18,15 @@ License: GPL (>= 2)
URL: https://statistikat.github.io/STATcubeR/,
https://github.com/statistikat/STATcubeR
BugReports: https://github.com/statistikat/STATcubeR/issues
Depends: R (>= 3.5.0)
Imports:
cli (>= 3.4.1),
httr,
jsonlite,
magrittr,
pillar (>= 1.5.0),
vctrs (>= 0.5.2)
Suggests:
magrittr,
spelling,
data.tree,
rappdirs,
Expand Down
4 changes: 0 additions & 4 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ S3method(print,sdmx_description)
S3method(tbl_format_footer,sc_meta)
S3method(tbl_sum,sc_meta)
S3method(tbl_sum,sc_tibble)
export("%>%")
export(od_cache_clear)
export(od_cache_dir)
export(od_cache_file)
Expand Down Expand Up @@ -83,9 +82,6 @@ export(sc_table_saved)
export(sc_table_saved_list)
export(sc_tabulate)
export(sdmx_table)
importFrom(magrittr,"%<>%")
importFrom(magrittr,"%>%")
importFrom(magrittr,"%T>%")
importFrom(pillar,pillar_shaft)
importFrom(pillar,tbl_format_footer)
importFrom(pillar,tbl_sum)
10 changes: 4 additions & 6 deletions R/cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,24 +74,22 @@ sc_cache_dir_get <- function() {
}

sc_checksum <- function(x) {
httr::sha1_hash(NULL, x) %>%
gsub("/", "-", .)
gsub("/", "-", httr::sha1_hash(NULL, x))
}

#' @describeIn sc_cache get the cache file associated with an object
#' @param x an object of class `sc_table` or `sc_schema`
#' @export
sc_cache_files <- function(x) {
if (inherits(x, "sc_table"))
return(x$response %>% attr("sc_cache_file"))
return(attr(x$response, "sc_cache_file"))
if (inherits(x, "sc_schema"))
return(x %>% attr("response") %>% attr("sc_cache_file"))
return( attr(attr(x, "response"), "sc_cache_file"))
stop("sc_cache_file() can only be used with sc_table and sc_schema objects")
}

sc_cache_file <- function(params, ext = ".rds") {
sc_checksum(params) %>%
paste0(sc_cache_dir(), "/", ., ext)
paste0(sc_cache_dir(), "/", sc_checksum(params), ext)
}

#' @describeIn sc_cache removes all files from the cache
Expand Down
5 changes: 2 additions & 3 deletions R/error.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#' try(sc_table_saved("invalid_id"))
#' last_error <- sc_last_error()
#' httr::content(last_error)
#' sc_last_error_parsed() %>% str()
#' str(sc_last_error_parsed())
#' @export
sc_last_error <- function() {
sc_env$last_error
Expand Down Expand Up @@ -61,8 +61,7 @@ sc_check_response <- function(response) {
sc_env$last_error <- response
message <- paste0(httr::http_status(response)$message, "\n")
if (httr::http_type(response) == "application/json")
message <- httr::content(response, as = "text") %>%
jsonlite::prettify(indent = 2) %>% paste0(message, .)
message <- paste0(message, jsonlite::prettify(httr::content(response, as = "text"),indent = 2))
message <- paste0(message, message_sc_last_error())
stop(message, call. = FALSE)
}
Expand Down
22 changes: 13 additions & 9 deletions R/od_cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ od_cache_summary <- function(server = "ext") {
is_field <- pos_underscore != -1
field <- substr(files[is_field], 1 + pos_underscore[is_field], nchar(files[is_field]) - 4)
id <- substr(files[is_field], 1, pos_underscore[is_field] - 1)
sizes_fields <- file.size(file.path(od_cache_dir(), files[is_field])) %>% split(id) %>% sapply(sum)
sizes_fields <- sapply(
split(file.size(file.path(od_cache_dir(), files[is_field])),id),
sum)
fields <- list(id = id, field = field)

files <- files[!is_field]
Expand All @@ -51,27 +53,29 @@ od_cache_summary <- function(server = "ext") {
id_data <- substr(files, 1, nchar(files) - 4)
all_ids <- unique(c(id_data, id_header, fields$id))
res <- data_frame(
id = all_ids %>% `class<-`(c("ogd_id", "character")),
id = `class<-`(all_ids,c("ogd_id", "character")),
updated = file.mtime(paste0(cache_dir, all_ids, ".json")),
json = file.size(paste0(cache_dir, all_ids, ".json")),
data = file.size(paste0(cache_dir, all_ids, ".csv")),
header = file.size(paste0(cache_dir, all_ids, "_HEADER.csv")),
fields = sizes_fields[match(unique(fields$id), all_ids)],
n_fields = match(fields$id, all_ids) %>% factor(seq_along(all_ids)) %>%
table() %>% as.integer()
n_fields = as.integer(table(factor(match(fields$id, all_ids),seq_along(all_ids))))
)
class(res$updated) <- c("sc_dttm", class(res$updated))
res
}


#' @rdname od_cache
#' @importFrom magrittr %T>%
#' @export
od_downloads <- function(server = "ext") {
x <- od_cache_path(server, "downloads.log") %T>%
(function(x) {if (!file.exists(x)) stop("No file 'downloads.log' in cache")}) %>%
utils::read.csv(header = FALSE) %>% `names<-`(c("time", "file", "downloaded"))
x <- od_cache_path(server, "downloads.log")
if (!file.exists(x))
stop("No file 'downloads.log' in cache")
x <- utils::read.csv(x,header = FALSE)
names(x) <- c("time", "file", "downloaded")
x$time <- as.POSIXct(x$time)
x %>% .[rev(seq_len(nrow(.))), ] %>% `class<-`(c("tbl", "data.frame"))
x[rev(seq_len(nrow(x))), ]
class(x) <- c("tbl", "data.frame")
return(x)
}
31 changes: 16 additions & 15 deletions R/od_list.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,20 +33,20 @@ od_list <- function(unique = TRUE, server = c("ext", "red")) {
html <- httr::content(r, encoding = "UTF-8")

# main-groups
grp <- html %>%
xml2::xml_find_all('//*[@class="panel-heading"]') %>%
xml2::xml_find_all(".//a") %>%
grp <- html |>
xml2::xml_find_all('//*[@class="panel-heading"]') |>
xml2::xml_find_all(".//a") |>
xml2::xml_text()

el <- html %>%
xml2::xml_find_all(".//h4") %>%
el <- html |>
xml2::xml_find_all(".//h4") |>
xml2::xml_find_all(".//a")

# ids
df <- data_frame(
category = rep("NA", length(el)),
id = el %>% xml2::xml_attr("aria-label"),
label = el %>% xml2::xml_text()
id = el |> xml2::xml_attr("aria-label"),
label = el |> xml2::xml_text()
)

ignored_labels <- c("[Alle \u00f6ffnen]", "[Alle schlie\u00dfen]",
Expand Down Expand Up @@ -106,12 +106,12 @@ od_list <- function(unique = TRUE, server = c("ext", "red")) {
#' @examples
#' catalogue <- od_catalogue()
#' catalogue
#' catalogue$update_frequency %>% table()
#' catalogue$categorization %>% table()
#' table(catalogue$update_frequency)
#' table(catalogue$categorization)
#' catalogue[catalogue$categorization == "Gesundheit", 1:4]
#' catalogue[catalogue$measures >= 70, 1:3]
#' catalogue$json[[1]]
#' catalogue$database %>% head()
#' head(catalogue$database)
#' @export
od_catalogue <- function(server = "ext", local = TRUE) {
if (local) {
Expand All @@ -138,16 +138,17 @@ as_df_jsons <- function(jsons) {
as.POSIXct(x, format = "%Y-%m-%dT%H:%M:%OS")
}

descs <- sapply(jsons, function(x) x$extras$attribute_description) %>% paste0(";", .)
descs <- paste0(";", sapply(jsons, function(x) x$extras$attribute_description))
tmpfn <- function(x) {x[!grepl("statcube", x)] <- NA_character_; x}
out <- data_frame(
title = sapply(jsons, function(x) x$title),
measures = gregexpr(";F-", descs) %>% sapply(length),
fields = gregexpr(";C-", descs) %>% sapply(length),
measures = sapply(gregexpr(";F-", descs),length),
fields = sapply(gregexpr(";C-", descs),length),
modified = sapply(jsons, function(x) x$extras$metadata_modified),
created = sapply(jsons, function(x) x$resources[[1]]$created),
id = sapply(jsons, function(x) x$resources[[1]]$name),
database = sapply(jsons, function(x) x$extras$metadata_linkage[[1]]) %>%
(function(x) {x[!grepl("statcube", x)] <- NA_character_; x}) %>% strsplit("?id=") %>%
database = sapply(jsons, function(x) x$extras$metadata_linkage[[1]]) |>
tmpfn() |> strsplit("?id=") |>
sapply(function(x) x[2]),
title_en = sapply(jsons, function(x) x$extras$en_title_and_desc),
notes = sapply(jsons, function(x) x$notes),
Expand Down
30 changes: 16 additions & 14 deletions R/od_resource.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#' contain a hidden attribute `attr(., "od")` about the time used for
#' downloading and parsing the resource. [od_resource_all()] converts these
#' hidden attribute into columns.
#' @importFrom magrittr %<>%
NULL

od_resource_blacklist <- c(
Expand Down Expand Up @@ -55,7 +54,7 @@ od_cache_path <- function(server = "ext", ...) {
#' @export
od_cache_clear <- function(id, server = "ext") {
od_resource_check_id(id)
files <- od_cache_path(server) %>% dir(id, full.names = TRUE)
files <- dir(od_cache_path(server),id, full.names = TRUE)
file.remove(files)
message("deleted ", length(files), " files from ", shQuote(od_cache_path(server)))
}
Expand Down Expand Up @@ -94,7 +93,7 @@ od_cache_file <- function(id, suffix = NULL, timestamp = NULL, ..., server = "ex
ext <- match.arg(list(...)$ext, c("csv", "json"))
stopifnot(is.character(id) && length(id) > 0)
od_resource_check_id(id)
filename <- c(id, suffix) %>% paste(collapse = "_") %>% paste0(".", ext)
filename <- paste0(paste(c(id, suffix), collapse = "_"), ".", ext)
cache_file <- od_cache_path(server, filename)
download <- NA_real_
if (!file.exists(cache_file) || !is.null(timestamp) &&
Expand Down Expand Up @@ -127,7 +126,7 @@ od_resource <- function(id, suffix = NULL, timestamp = NULL, server = "ext") {
cache_file <- od_cache_file(id, suffix, timestamp, ext = "csv", server = server)
t <- Sys.time()
x <- utils::read.csv2(cache_file, na.strings = c("", ":"),
check.names = FALSE, stringsAsFactors = FALSE) %>%
check.names = FALSE, stringsAsFactors = FALSE) |>
od_normalize_columns(suffix)
t <- Sys.time() - t
t <- 1000 * as.numeric(t)
Expand All @@ -136,20 +135,20 @@ od_resource <- function(id, suffix = NULL, timestamp = NULL, server = "ext") {
}

od_resource_parse_all <- function(resources, server = "ext") {
parsed <- resources %>% lapply(function(x) {
parsed <- lapply(resources, function(x) {
last_modified <- as.POSIXct(x$last_modified, format = "%Y-%m-%dT%H:%M:%OS")
od_resource(x$name, timestamp = last_modified, server = server)
})
od <- lapply(parsed, attr, "od")

data_frame(
name = sapply(resources, function(x) x$name),
last_modified = lapply(od, function(x) x$last_modified) %>% do.call(c, .),
cached = lapply(od, function(x) x$cached) %>% do.call(c, .),
last_modified = do.call(c, lapply(od, function(x) x$last_modified)),
cached = do.call(c, lapply(od, function(x) x$cached)),
size = sapply(od, function(x) x$size),
download = vapply(od, function(x) x$download, 1.0),
parsed = sapply(od, function(x) x$parsed),
data = I(parsed %>% lapply(`attr<-`, "od", NULL))
data = I(lapply(parsed, `attr<-`, "od", NULL))
)
}

Expand All @@ -159,7 +158,7 @@ od_resources_check <- function(json) {
id <- resources[[1]]$name
stopifnot(resources[[2]]$name == paste0(id, "_HEADER"))
stopifnot(all(sapply(resources, function(x) { x$format == "csv" })))
fc_res <- resources %>% .[-c(1, 2)] %>% sapply(function(x) x$name)
fc_res <- sapply(resources[-c(1, 2)], function(x) x$name)
fc_att <- att$code[substr(att$code, 1, 2) == "C-"]
stopifnot(setequal(fc_res, paste0(id, "_", fc_att)))
function(header) {
Expand All @@ -174,7 +173,8 @@ od_normalize_columns <- function(x, suffix) {
col_indices <- c(1, 2, 2, switch(suffix, HEADER = 3, c(4, 3)), 5, 7)
col_names <- c("code", "label", "label_de", "label_en",
switch(suffix, HEADER = NULL, "parent"), "de_desc", "en_desc")
x <- x[, col_indices] %>% `names<-`(col_names)
x <- x[, col_indices]
names(x) <- col_names
x$label <- NA_character_
x$label_en <- as.character(x$label_en)
x$label_de <- as.character(x$label_de)
Expand Down Expand Up @@ -207,8 +207,7 @@ od_json <- function(id, timestamp = Sys.time() - 3600, server = "ext") {

#' @export
as.character.od_json <- function(x, ...) {
jsonlite::toJSON(x, pretty = TRUE, auto_unbox = TRUE) %>%
paste(...)
paste(jsonlite::toJSON(x, pretty = TRUE, auto_unbox = TRUE), ...)
}

#' @name od_resource
Expand All @@ -222,8 +221,11 @@ od_resource_all <- function(id, json = od_json(id), server = "ext") {
check_header <- od_resources_check(json)
out <- od_resource_parse_all(json$resources, server = server)
check_header(out$data[[2]])
out$data[[2]] %<>% od_normalize_columns("HEADER")
out$data[seq(3, nrow(out))] %<>% lapply(od_normalize_columns, "FIELD")

out$data[[2]] <- od_normalize_columns(out$data[[2]], "HEADER")

out$data[seq(3, nrow(out))] <- lapply(out$data[seq(3, nrow(out))],
od_normalize_columns, "FIELD")
class(out$name) <- c("ogd_file", "character")
class(out$last_modified) <- c("sc_dttm", class(out$last_modified))
class(out$cached) <- c("sc_dttm", class(out$cached))
Expand Down
10 changes: 5 additions & 5 deletions R/od_revisions.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,15 @@ print.od_revisions <- function(x, ...) {
since <- attr(x, "since")
response <- attr(x, "response")
if (!is.null(since))
cli::format_inline("{.strong {length(x)}} changes between
message(cli::format_inline("{.strong {length(x)}} changes between
{.timestamp {attr(x, 'since')}} and
{.timestamp {response$date}}") %>% cat()
{.timestamp {response$date}}"))
else
cli::format_inline("{.strong {length(x)}} datasets are available ",
"({.timestamp {response$date}})\n") %>% cat()
message(cli::format_inline("{.strong {length(x)}} datasets are available ",
"({.timestamp {response$date}})\n"))
if (length(x) > 0) {
y <- cli::cli_vec(x, list("vec-trunc" = 3))
cli::format_inline("{.strong ids}: {.emph {y}}") %>% cat()
message(cli::format_inline("{.strong ids}: {.emph {y}}"))
}
invisible(x)
}
Expand Down
2 changes: 1 addition & 1 deletion R/od_table.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ od_table_class <- R6::R6Class(
#'
#' Similar contents can be found in `$meta`.
header = function() {
private$cache$header %>% sc_tibble_meta(c("label_de", "label_en"))
sc_tibble_meta(private$cache$header, c("label_de", "label_en"))
},
#' @field resources
#' lists all files downloaded from the server to construct this table
Expand Down
8 changes: 3 additions & 5 deletions R/od_table_save.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,11 @@ od_table_local_paths <- function() {
id <- json$resources[[1]]$name
stopifnot(is.character(id), length(id) == 1)
if (json$extras$metadata_modified == "$PublDateTime$") {
readLines(json_file) %>%
gsub("\\$PublDateTime\\$", json$extras$begin_datetime, .) %>%
writeLines(json_file)
writeLines(gsub("\\$PublDateTime\\$", json$extras$begin_datetime, readLines(json_file)), json_file)
json <- jsonlite::read_json(json_file)
}
timestamps <- sapply(json$resources, function(x) x$last_modified) %>%
as.POSIXct(format = "%Y-%m-%dT%H:%M:%OS")
timestamps <- as.POSIXct(sapply(json$resources, function(x) x$last_modified),
format = "%Y-%m-%dT%H:%M:%OS")
stopifnot(all(timestamps <= Sys.time()))
paths <- list(
classifications = dir(file.path(extracted, "classifications"), full.names = TRUE),
Expand Down
4 changes: 2 additions & 2 deletions R/od_tabulate.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ sc_data_tabulate <- function(table, ..., .list = NULL, raw = FALSE, parse_time =
x <- x[, setdiff(names(x), setdiff(fields_to_aggregate, has_total))]
if (length(aggregate_via_sum) > 0) {
grouping_var <- do.call(paste, x[fields])
x <- cbind(
x <- sc_tibble(cbind(
subset(x, !duplicated(grouping_var), fields),
rowsum(x[measures], group = grouping_var, reorder = FALSE)
) %>% sc_tibble()
))
}
# post process aggregated data
for (field_code in fields) {
Expand Down
Loading

0 comments on commit 7d5d0d2

Please sign in to comment.