Skip to content

Commit

Permalink
added 'tables' argument to get_abs(), with ability to specify particu…
Browse files Browse the repository at this point in the history
…lar table(s) to download
  • Loading branch information
MattCowgill committed Jan 14, 2019
1 parent 2dca695 commit 6bbf4d6
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 10 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Authors@R: c(
Maintainer: Matt Cowgill <[email protected]>
Description: Downloads, imports, and tidies time series data from the
Australian Bureau of Statistics <https://www.abs.gov.au/>.
Date: 2018-05-30
Date: 2019-01-14
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
Expand Down
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
YEAR:2018
COPYRIGHT HOLDER: Zoe Meers
COPYRIGHT HOLDER: Zoe Meers and Matt Cowgill
13 changes: 10 additions & 3 deletions R/get_abs_xml_metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,21 @@
# given a catalogue number, download the catalogue metadata via XML, then find
# unique filenames in the latest release and return those

get_abs_xml_metadata <- function(cat_no) {
get_abs_xml_metadata <- function(cat_no, table) {

ProductReleaseDate=TableOrder=text=NULL

if(table == "all"){
tables_url <- ""
} else {
tables_url <- paste0("&ttitle=", table)
}

# Download the first page of metadata for cat_no
first_url <- paste0("http://ausstats.abs.gov.au/servlet/TSSearchServlet?catno=",
cat_no,
"&pg=1")
"&pg=1",
tables_url)

first_page <- XML::xmlParse(file = first_url)

Expand Down Expand Up @@ -47,7 +54,7 @@ get_abs_xml_metadata <- function(cat_no) {
xml_dfs <- list()
while(current == TRUE){

xml_df <- get_xml_df(cat_no = cat_no, metadata_page = all_pages[i])
xml_df <- get_xml_df(cat_no = cat_no, table = table, metadata_page = all_pages[i])

xml_dfs[[i]] <- xml_df

Expand Down
21 changes: 18 additions & 3 deletions R/get_xml_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,30 @@
#' @importFrom XML xmlParse xmlToDataFrame
#' @importFrom dplyr filter select "%>%"

get_xml_df <- function(cat_no, metadata_page){
get_xml_df <- function(cat_no, table, metadata_page){

text=NULL

if(table == "all"){
tables_url <- ""
} else {
tables_url <- paste0("&ttitle=", table)
}

base_url <- "http://ausstats.abs.gov.au/servlet/TSSearchServlet?catno="

url <- paste0(base_url, cat_no, "&pg=", metadata_page)
url <- paste0(base_url, cat_no, "&pg=", metadata_page, tables_url)

safe_parse <- purrr::safely(XML::xmlParse)

xml_page <- XML::xmlParse(file = url)
xml_page <- safe_parse(file = url)

if(is.null(xml_page$error)){
xml_page <- xml_page$result
} else {
stop(paste0("Error: the following URL does not contain valid ABS metadata:\n",
url))
}

xml_df <- XML::xmlToDataFrame(xml_page, stringsAsFactors = FALSE)

Expand Down
14 changes: 12 additions & 2 deletions R/read_abs.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
#' @param cat_no ABS catalogue number, as a string, including the extension.
#' For example, "6202.0".
#'
#' @param tables Time series tables in `cat_no`` to download and extract. Default is "all",
#' which will read all time series in `cat_no`. Specify `tables` to
#' download and import specific tables(s) - eg. `tables = 1` or `tables = c(1, 5)`.
#'
#' @param path Local directory in which to save downloaded ABS time series
#' spreadsheets. Default is "data/ABS"; this subdirectory of your working
#' directory will be created if it does not exist.
Expand All @@ -24,11 +28,12 @@
#'
#' \donttest{wpi <- read_abs("6345.0")}
#'
#' @importFrom purrr walk walk2 map
#' @importFrom purrr walk walk2 map map2_dfr
#' @name read_abs
#' @export

read_abs <- function(cat_no = NULL,
tables = "all",
path = "data/ABS",
show_progress_bars = TRUE){

Expand All @@ -44,9 +49,14 @@ read_abs <- function(cat_no = NULL,
stop("Please ensure you include the cat_no extension, eg. '6202.0', not '6202'")
}

if(is.null(tables)){
message(paste0("`tables` not specified; attempting to fetch all tables from ", cat_no))
}

# find URLs from cat_no
message(paste0("Finding filenames for tables from ABS catalogue ", cat_no))
xml_dfs <- get_abs_xml_metadata(cat_no = cat_no)
xml_dfs <- purrr::map2_dfr(cat_no, tables,
.f = get_abs_xml_metadata)

urls <- unique(xml_dfs$TableURL)
urls <- gsub(".test", "", urls)
Expand Down

0 comments on commit 6bbf4d6

Please sign in to comment.