diff --git a/DESCRIPTION b/DESCRIPTION index f68c6626..54b32b3a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,16 +1,18 @@ Type: Package Package: STATcubeR -Title: R Interface for the STATcube REST API and Open Government Data +Title: R Interface for the 'STATcube' REST API and Open Government Data Version: 1.0.0 -Date: 2024-11-28 +Date: 2024-11-29 Authors@R: c( person("Bernhard", "Meindl", , "Bernhard.Meindl@statistik.gv.at", role = c("ctb", "cre")), person("Alexander", "Kowarik", , "Alexander.Kowarik@statistik.gv.at", role = c("ctb"),comment=c(ORCID="0000-0001-8598-4130")), person("Gregor", "de Cillia", , "", role = "aut")) -Description: Import data from the STATcube REST API or from the open data +Description: Import data from the 'STATcube' REST API or from the open data portal of Statistics Austria. This package includes a client for API requests as well as parsing utilities for data which originates from - STATcube. + 'STATcube'. Documentation about 'STATcubeR' is provided by several vignettes + included in the package as well as on the public 'pkgdown' page at + . License: GPL (>= 2) URL: https://statistikat.github.io/STATcubeR/, https://github.com/statistikat/STATcubeR diff --git a/R/browse.R b/R/browse.R index 9347ccb4..9aea5e2e 100644 --- a/R/browse.R +++ b/R/browse.R @@ -1,14 +1,16 @@ -#' Links to important STATcube and OGD pages +#' Links to important 'STATcube' and 'OGD' pages #' -#' A collection of links, to browse important STATcube pages. +#' A collection of links, to browse important 'STATcube' pages. #' #' @inheritParams sc_key #' @name sc_browse NULL -#' @describeIn sc_browse opens the home menu of STATcube +#' @describeIn sc_browse opens the home menu of 'STATcube' #' @examples #' sc_browse() +#' @return the URL of a specific webpage which is opened by default +#' in a web browser. #' @export sc_browse <- function(server = "ext") { sc_url(sc_url_gui(server), "home") diff --git a/R/cache.R b/R/cache.R index 7216cf5a..6fff48e7 100644 --- a/R/cache.R +++ b/R/cache.R @@ -26,6 +26,12 @@ #' @param verbose print instructions on how to set up caching persistently #' via environment variables? #' @name sc_cache +#' @return +#' - for [sc_cache_enable()], [sc_cache_dir()]: the path to the cache-directory +#' - for [sc_cache_disable()]: `TRUE` +#' - for [sc_cache_enabled()]: `TRUE` if caching is enabled, `FALSE` otherwise +#' - for [sc_cache_files()]: the content of the cache associated with a file +#' - for [sc_cache_clear()]: `NULL` NULL #' @describeIn sc_cache enables caching for the current R session diff --git a/R/od_cache.R b/R/od_cache.R index aa4f5978..bae0a255 100644 --- a/R/od_cache.R +++ b/R/od_cache.R @@ -13,23 +13,24 @@ #' ## inspect #' od_cache_summary() #' od_downloads() -#' @details -#' [od_cache_summary()] provides an overview of all contents of the cache through -#' a data.frame. It has one row for each dataset and the following columns. -#' All file sizes are given in bytes -#' - **`id`** the dataset id -#' - **`updated`** the last modified time for `${id}.json` -#' - **`json`** the file size of `${id}.json` -#' - **`data`** the file size of `${id}.csv` -#' - **`header`** the file size of `${id}_HEADER.csv` -#' - **`fields`** the total file size of all files belonging to fields (`{id}_C*.csv`). -#' - **`n_fields`** the number of field files +#' @return +#' - [od_cache_summary()] provides an overview of all contents of the cache through +#' a data.frame. It has one row for each dataset and returns a `data.frame` with# +#' the following columns in which all file sizes are given in bytes. +#' - **`id`** the dataset id +#' - **`updated`** the last modified time for `${id}.json` +#' - **`json`** the file size of `${id}.json` +#' - **`data`** the file size of `${id}.csv` +#' - **`header`** the file size of `${id}_HEADER.csv` +#' - **`fields`** the total file size of all files belonging to fields (`{id}_C*.csv`). +#' - **`n_fields`** the number of field files #' -#' [od_downloads()] shows a download history for the current cache +#' - [od_downloads()] shows a download history for the current cache and returns +#' a `data.frame` with the following columns: #' -#' - **`time`** a timestamp for the download -#' - **`file`** the filename -#' - **`downloaded`** the download time in milliseconds +#' - **`time`** a timestamp for the download +#' - **`file`** the filename +#' - **`downloaded`** the download time in milliseconds #' @export od_cache_summary <- function(server = "ext") { cache_dir <- od_cache_path(server) diff --git a/R/od_list.R b/R/od_list.R index c7876fb3..65c943b2 100644 --- a/R/od_list.R +++ b/R/od_list.R @@ -77,8 +77,9 @@ od_list <- function(unique = TRUE, server = c("ext", "red")) { #' and combines them into a `data.frame` so the datasets can easily be #' filtered based on categorizations, tags, number of classifications, etc. #' +#' @details #' The naming, ordering and choice of the columns is likely to change. -#' Currently, the following columns are provided. +#' @return a `data.frame` with the following structure #' #' |**Column**|**Type** | **Description** #' | ---------| ------- | ------------- diff --git a/R/od_table_save.R b/R/od_table_save.R index 3735b791..90f9eea8 100644 --- a/R/od_table_save.R +++ b/R/od_table_save.R @@ -19,6 +19,9 @@ #' #' # cleanup #' file.remove(archive) +#' @return +#' - for [od_table_save()]: the path to the generated file +#' - for [od_table_local()]: the OGD identifier #' @export od_table_save <- function(x, file = NULL) { stopifnot(inherits(x, "od_table")) diff --git a/R/other_endpoints.R b/R/other_endpoints.R index 6188609a..3c13c6e0 100644 --- a/R/other_endpoints.R +++ b/R/other_endpoints.R @@ -11,6 +11,14 @@ #' @name other_endpoints #' @inheritParams sc_key #' @inheritParams sc_schema +#' @return +#' - [sc_info()]: a `data.frame` with two columns identifying possible languages +#' - [sc_rate_limit_table()], [sc_rate_limit_schema()], [sc_rate_limits()]: a `list` with elements +#' * `remaining`: how much requests can be sent until the rate limit is reached +#' * `limit`: the number of requests allowed per hour +#' * `reset`: a timestamp when the rate limit will be reset +#' - [sc_rate_limits()]: + NULL #' @describeIn other_endpoints @@ -29,12 +37,10 @@ sc_info <- function(language = c("en", "de"), key = NULL, server = "ext") { } #' @describeIn other_endpoints -#' returns a `3x1` dataframe with the following columns -#' * `remaining` how much requests can be sent to the `/table` -#' endpoint until the rate limit is reached. -#' * `limit` the number of requests allowed per hour. -#' * `reset` a timestamp when the rate limit will be reset. -#' Usually, this should be less than one hour `after the current time. +#' returns a `list` with information about current requests-limits with +#' respect to the `/table` endpoint. It +#' also shows when the limits reset which should be less than one hour +#' after the current time. #' @export sc_rate_limit_table <- function(language = c("en", "de"), key = NULL, server = "ext") { response <- sc_check_response(httr::GET( @@ -46,7 +52,11 @@ sc_rate_limit_table <- function(language = c("en", "de"), key = NULL, server = " rate_limit } -#' @rdname other_endpoints +#' @describeIn other_endpoints +#' returns a `list` with information about current requests-limits with +#' respect to the `/schema` endpoint. It +#' also shows when the limits reset which should be less than one hour +#' after the current time. #' @export sc_rate_limit_schema <- function(language = c("en", "de"), key = NULL, server = "ext") { response <- sc_check_response(httr::GET( diff --git a/R/schema.R b/R/schema.R index 4b2f0de5..77d234f4 100644 --- a/R/schema.R +++ b/R/schema.R @@ -23,6 +23,10 @@ #' `"field"` and `"valueset"`. For the catalogue, only `NULL` and `"folder"` #' are applicable. #' @family functions for /schema +#' @return +#' - for [sc_schema()] and [sc_schema_db()]: an object of class `sc_schema` +#' - for [sc_schema_flatten()]: a `data.frame` +#' - for [sc_schema_catalogue()]: a `list` #' @export sc_schema <- function(id = NULL, depth = NULL, language = NULL, key = NULL, server = "ext") { diff --git a/R/table.R b/R/table.R index 57f9d22a..1551c886 100644 --- a/R/table.R +++ b/R/table.R @@ -19,7 +19,7 @@ base_url <- function(server = "ext") { #' @title Class for /table responses #' @description R6 Class for all responses of the /table endpoint of the -#' STATcube REST API. +#' 'STATcube' REST API. #' @keywords internal sc_table_class <- R6::R6Class( "sc_table", diff --git a/R/table_custom.R b/R/table_custom.R index e5ae249b..fcc4913f 100644 --- a/R/table_custom.R +++ b/R/table_custom.R @@ -80,6 +80,10 @@ #' ) #' ) #' x$tabulate() +#' @return +#' - for [sc_table_custom()]: an object of class `sc_table` +#' - for [sc_recode()]: a `list` that is a suitable input for parameter +#' `"recode"` in [sc_table_custom()] #' @export sc_table_custom <- function(db, measures = c(), dimensions = c(), language = c("en", "de"), diff --git a/R/tabulate.R b/R/tabulate.R index 5b0a4841..d7ed50ec 100644 --- a/R/tabulate.R +++ b/R/tabulate.R @@ -3,7 +3,7 @@ #' @description #' [sc_tabulate()] extracts the data in the table and turns it into a tidy #' data.frame. It applies labeling of the data and transforms time variables -#' into a `Date` format if they satisfy certain STATcube Standards. +#' into a `Date` format if they satisfy certain 'STATcube' standards. #' #' `sc_tabulate(table, ...)` is just an alias for `table$tabulate(...)` and #' was added so this rather complicated method can have a separate documentation @@ -91,7 +91,7 @@ #' ## table$tabulate(...) is an alias for sc_tabulate(table, ...) #' sc_tabulate(table, "C-A11-0") #' -#' ######################### STATcube REST API ################################# +#' ######################## 'STATcube' REST API ################################ #' #' @examplesIf sc_key_exists() #' table_tourism <- sc_table(sc_example("accomodation.json"), "de") @@ -100,15 +100,7 @@ #' table_tourism$tabulate("Saison/Tourismusmonat") #' table_tourism$tabulate("Saison/Tourismusmonat", "Ankünfte") #' table_tourism$tabulate("Ankünfte") -#' -#' ## TODO: param annotations does not work currently -#' if (FALSE) { -#' table_trade <- sc_table(sc_example("foreign_trade.json"), "de") -#' tt <- sc_tabulate(table_trade, "Berichtsjahr", "Import, Wert in Euro", -#' annotations = TRUE) -#' tt -#' str(tt[['Import, Wert in Euro_a']]) -#' } +#' @return a `data.frame` #' @export sc_tabulate <- function(table, ..., .list = NULL, raw = FALSE, parse_time = TRUE, recode_zeros = inherits(table, "sc_table"), diff --git a/man/od_cache.Rd b/man/od_cache.Rd index 57f2c72e..2733abd6 100644 --- a/man/od_cache.Rd +++ b/man/od_cache.Rd @@ -14,13 +14,11 @@ od_downloads(server = "ext") \item{server}{the OGD-Server to use. \code{"ext"} for the external server (the default) or \code{"red"} for the editing server} } -\description{ -Functions to inspect the contents of the current cache. -} -\details{ -\code{\link[=od_cache_summary]{od_cache_summary()}} provides an overview of all contents of the cache through -a data.frame. It has one row for each dataset and the following columns. -All file sizes are given in bytes +\value{ +\itemize{ +\item \code{\link[=od_cache_summary]{od_cache_summary()}} provides an overview of all contents of the cache through +a data.frame. It has one row for each dataset and returns a \code{data.frame} with# +the following columns in which all file sizes are given in bytes. \itemize{ \item \strong{\code{id}} the dataset id \item \strong{\code{updated}} the last modified time for \verb{$\{id\}.json} @@ -30,14 +28,18 @@ All file sizes are given in bytes \item \strong{\code{fields}} the total file size of all files belonging to fields (\verb{\{id\}_C*.csv}). \item \strong{\code{n_fields}} the number of field files } - -\code{\link[=od_downloads]{od_downloads()}} shows a download history for the current cache +\item \code{\link[=od_downloads]{od_downloads()}} shows a download history for the current cache and returns +a \code{data.frame} with the following columns: \itemize{ \item \strong{\code{time}} a timestamp for the download \item \strong{\code{file}} the filename \item \strong{\code{downloaded}} the download time in milliseconds } } +} +\description{ +Functions to inspect the contents of the current cache. +} \examples{ ## make sure the cache is not empty od_table("OGD_krebs_ext_KREBS_1") diff --git a/man/od_catalogue.Rd b/man/od_catalogue.Rd index cba794a0..d53fc66d 100644 --- a/man/od_catalogue.Rd +++ b/man/od_catalogue.Rd @@ -14,14 +14,8 @@ external server or \code{prod} for the production server} cached json metadata. Otherwise, the cache is updated prior to creating the catalogue using a "bulk-download" for metadata files.} } -\description{ -\strong{EXPERIMENTAL} This function parses several json metadata files at once -and combines them into a \code{data.frame} so the datasets can easily be -filtered based on categorizations, tags, number of classifications, etc. -} -\details{ -The naming, ordering and choice of the columns is likely to change. -Currently, the following columns are provided.\tabular{lll}{ +\value{ +a \code{data.frame} with the following structure\tabular{lll}{ \strong{Column} \tab \strong{Type} \tab \strong{Description} \cr title \tab \code{chr} \tab Title of the dataset \cr measures \tab \code{int} \tab Number of measure variables \cr @@ -42,6 +36,14 @@ The type \code{datetime} refers to the \code{POSIXct} format as returned by \cod The last column \code{"json"} contains the full json metadata as returned by \code{\link[=od_json]{od_json()}}. } +\description{ +\strong{EXPERIMENTAL} This function parses several json metadata files at once +and combines them into a \code{data.frame} so the datasets can easily be +filtered based on categorizations, tags, number of classifications, etc. +} +\details{ +The naming, ordering and choice of the columns is likely to change. +} \examples{ catalogue <- od_catalogue() catalogue diff --git a/man/od_table_save.Rd b/man/od_table_save.Rd index e4ad904d..b14e7b5f 100644 --- a/man/od_table_save.Rd +++ b/man/od_table_save.Rd @@ -15,6 +15,12 @@ od_table_local(file) \item{file}{An archive file file for the dataset. For \code{od_table_save()}, the default is \verb{\{id\}.tar.gz} where \code{id} denotes the OGD identifier.} } +\value{ +\itemize{ +\item for \code{\link[=od_table_save]{od_table_save()}}: the path to the generated file +\item for \code{\link[=od_table_local]{od_table_local()}}: the OGD identifier +} +} \description{ \code{od_table_save()} creates a tar archive containing all relevant data from the OGD portal. \code{od_table_local()} parses the tar archive and recreates the diff --git a/man/other_endpoints.Rd b/man/other_endpoints.Rd index 13e5b914..87d0ce81 100644 --- a/man/other_endpoints.Rd +++ b/man/other_endpoints.Rd @@ -29,6 +29,18 @@ the production server. External users should always use the default option \code \item{x}{either a response-object (package \code{httr}), an object of class \code{sc_table} or an object of class \code{sc_schema}} } +\value{ +\itemize{ +\item \code{\link[=sc_info]{sc_info()}}: a \code{data.frame} with two columns identifying possible languages +\item \code{\link[=sc_rate_limit_table]{sc_rate_limit_table()}}, \code{\link[=sc_rate_limit_schema]{sc_rate_limit_schema()}}, \code{\link[=sc_rate_limits]{sc_rate_limits()}}: a \code{list} with elements +\itemize{ +\item \code{remaining}: how much requests can be sent until the rate limit is reached +\item \code{limit}: the number of requests allowed per hour +\item \code{reset}: a timestamp when the rate limit will be reset +} +\item \code{\link[=sc_rate_limits]{sc_rate_limits()}}: +} +} \description{ Utilize the simple endpoints \verb{/info} and \verb{/table_rate_limit}. Those provide information about available locales and the amount of requests available @@ -38,14 +50,15 @@ for calls against the \verb{/table} endpoint. \itemize{ \item \code{sc_info()}: returns information about all available database languages -\item \code{sc_rate_limit_table()}: returns a \verb{3x1} dataframe with the following columns -\itemize{ -\item \code{remaining} how much requests can be sent to the \verb{/table} -endpoint until the rate limit is reached. -\item \code{limit} the number of requests allowed per hour. -\item \code{reset} a timestamp when the rate limit will be reset. -Usually, this should be less than one hour `after the current time. -} +\item \code{sc_rate_limit_table()}: returns a \code{list} with information about current requests-limits with +respect to the \verb{/table} endpoint. It +also shows when the limits reset which should be less than one hour +after the current time. + +\item \code{sc_rate_limit_schema()}: returns a \code{list} with information about current requests-limits with +respect to the \verb{/schema} endpoint. It +also shows when the limits reset which should be less than one hour +after the current time. \item \code{sc_rate_limits()}: gets rate limits from response headers diff --git a/man/sc_browse.Rd b/man/sc_browse.Rd index 4d8b74e6..056aaf3a 100644 --- a/man/sc_browse.Rd +++ b/man/sc_browse.Rd @@ -7,7 +7,7 @@ \alias{sc_browse_database} \alias{sc_browse_catalogue} \alias{sc_browse_ogd} -\title{Links to important STATcube and OGD pages} +\title{Links to important 'STATcube' and 'OGD' pages} \usage{ sc_browse(server = "ext") @@ -33,12 +33,16 @@ the production server. External users should always use the default option \code \item{open}{If \code{FALSE} (the default), open the infopage for the database. Otherwise, open the table view.} } +\value{ +the URL of a specific webpage which is opened by default +in a web browser. +} \description{ -A collection of links, to browse important STATcube pages. +A collection of links, to browse important 'STATcube' pages. } \section{Functions}{ \itemize{ -\item \code{sc_browse()}: opens the home menu of STATcube +\item \code{sc_browse()}: opens the home menu of 'STATcube' \item \code{sc_browse_preferences()}: opens the preference menu with the API key diff --git a/man/sc_cache.Rd b/man/sc_cache.Rd index d879d37f..e32a4bdb 100644 --- a/man/sc_cache.Rd +++ b/man/sc_cache.Rd @@ -30,6 +30,15 @@ via environment variables?} \item{x}{an object of class \code{sc_table} or \code{sc_schema}} } +\value{ +\itemize{ +\item for \code{\link[=sc_cache_enable]{sc_cache_enable()}}, \code{\link[=sc_cache_dir]{sc_cache_dir()}}: the path to the cache-directory +\item for \code{\link[=sc_cache_disable]{sc_cache_disable()}}: \code{TRUE} +\item for \code{\link[=sc_cache_enabled]{sc_cache_enabled()}}: \code{TRUE} if caching is enabled, \code{FALSE} otherwise +\item for \code{\link[=sc_cache_files]{sc_cache_files()}}: the content of the cache associated with a file +\item for \code{\link[=sc_cache_clear]{sc_cache_clear()}}: \code{NULL} +} +} \description{ Functions to cache requested resources in the directory \verb{~/.STATcubeR_cache} and reuse them in calls to \code{\link[=sc_table]{sc_table()}}, \code{\link[=sc_table_custom]{sc_table_custom()}} \code{\link[=sc_schema]{sc_schema()}} and so forth. diff --git a/man/sc_schema.Rd b/man/sc_schema.Rd index 1aeff0e6..dc675898 100644 --- a/man/sc_schema.Rd +++ b/man/sc_schema.Rd @@ -48,6 +48,13 @@ to \code{TRUE}. Ignored otherwise.} \item{type}{a schema type such as "DATABASE", "VALUE" or "TABLE". See \href{https://docs.wingarc.com.au/superstar/9.12/open-data-api/open-data-api-reference/schema-endpoint#id-.SchemaOpenDataAPIv9.9.6-SchemaTypesandAssociatedIDSchemes}{the API reference} for a list of all schema types.} } +\value{ +\itemize{ +\item for \code{\link[=sc_schema]{sc_schema()}} and \code{\link[=sc_schema_db]{sc_schema_db()}}: an object of class \code{sc_schema} +\item for \code{\link[=sc_schema_flatten]{sc_schema_flatten()}}: a \code{data.frame} +\item for \code{\link[=sc_schema_catalogue]{sc_schema_catalogue()}}: a \code{list} +} +} \description{ Invoke the \href{https://docs.wingarc.com.au/superstar/9.12/open-data-api/open-data-api-reference/schema-endpoint}{\strong{/schema}} endpoint of the STATcube REST API. This endpoint can be used to get all available databases and tables diff --git a/man/sc_table_class.Rd b/man/sc_table_class.Rd index d0c55907..8ae7b704 100644 --- a/man/sc_table_class.Rd +++ b/man/sc_table_class.Rd @@ -5,7 +5,7 @@ \title{Class for /table responses} \description{ R6 Class for all responses of the /table endpoint of the -STATcube REST API. +'STATcube' REST API. } \keyword{internal} \section{Super class}{ diff --git a/man/sc_table_custom.Rd b/man/sc_table_custom.Rd index 15035197..113967b5 100644 --- a/man/sc_table_custom.Rd +++ b/man/sc_table_custom.Rd @@ -55,6 +55,13 @@ list if items should be grouped. See examples} \item{total}{Add totals to the field? If \code{map} is provided, the totals will correspond to the filtered data.} } +\value{ +\itemize{ +\item for \code{\link[=sc_table_custom]{sc_table_custom()}}: an object of class \code{sc_table} +\item for \code{\link[=sc_recode]{sc_recode()}}: a \code{list} that is a suitable input for parameter +\code{"recode"} in \code{\link[=sc_table_custom]{sc_table_custom()}} +} +} \description{ Define requests against the /table endpoint by providing URIs to databases, measures and fields. diff --git a/man/sc_tabulate.Rd b/man/sc_tabulate.Rd index ef7636ac..2aa94632 100644 --- a/man/sc_tabulate.Rd +++ b/man/sc_tabulate.Rd @@ -36,10 +36,13 @@ dataset language (\code{table$language}) is used.} \item{sort}{If \code{TRUE}, the resulting data will be sorted by all provided field values} } +\value{ +a \code{data.frame} +} \description{ \code{\link[=sc_tabulate]{sc_tabulate()}} extracts the data in the table and turns it into a tidy data.frame. It applies labeling of the data and transforms time variables -into a \code{Date} format if they satisfy certain STATcube Standards. +into a \code{Date} format if they satisfy certain 'STATcube' standards. \code{sc_tabulate(table, ...)} is just an alias for \code{table$tabulate(...)} and was added so this rather complicated method can have a separate documentation @@ -118,7 +121,7 @@ table$tabulate("C-A11-0") # -> 3 rows: "total", "male", "female" ## table$tabulate(...) is an alias for sc_tabulate(table, ...) sc_tabulate(table, "C-A11-0") -######################### STATcube REST API ################################# +######################## 'STATcube' REST API ################################ \dontshow{if (sc_key_exists()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} table_tourism <- sc_table(sc_example("accomodation.json"), "de") @@ -127,15 +130,6 @@ table_tourism$tabulate() table_tourism$tabulate("Saison/Tourismusmonat") table_tourism$tabulate("Saison/Tourismusmonat", "Ankünfte") table_tourism$tabulate("Ankünfte") - -## TODO: param annotations does not work currently -if (FALSE) { - table_trade <- sc_table(sc_example("foreign_trade.json"), "de") - tt <- sc_tabulate(table_trade, "Berichtsjahr", "Import, Wert in Euro", - annotations = TRUE) - tt - str(tt[['Import, Wert in Euro_a']]) -} \dontshow{\}) # examplesIf} } \seealso{ diff --git a/man/sdmx_table.Rd b/man/sdmx_table.Rd index 121e5e51..748f0380 100644 --- a/man/sdmx_table.Rd +++ b/man/sdmx_table.Rd @@ -21,7 +21,6 @@ values. \code{\link[=sdmx_table]{sdmx_table()}} should be treated as experimental for now. } \examples{ - x <- sdmx_table(system.file("sdmx/dedemo.zip", package = "STATcubeR")) # print and tabulate x