diff --git a/.Rbuildignore b/.Rbuildignore
index 588683b..e87d948 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -1,3 +1,8 @@
^phsopendata\.Rproj$
^\.Rproj\.user$
^README\.Rmd$
+^\.github$
+^codecov\.yml$
+^_pkgdown\.yml$
+^docs$
+^pkgdown$
diff --git a/.github/.gitignore b/.github/.gitignore
new file mode 100644
index 0000000..2d19fc7
--- /dev/null
+++ b/.github/.gitignore
@@ -0,0 +1 @@
+*.html
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..0f1a241
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,12 @@
+# To get started with Dependabot version updates, you'll need to specify which
+# package ecosystems to update and where the package manifests are located.
+# Please see the documentation for all configuration options:
+# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
+
+version: 2
+updates:
+ - package-ecosystem: "github-actions" # See documentation for possible values
+ directory: "/" # Location of package manifests
+ schedule:
+ interval: "weekly"
+ day: "sunday"
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
new file mode 100644
index 0000000..001c2e5
--- /dev/null
+++ b/.github/workflows/R-CMD-check.yaml
@@ -0,0 +1,58 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+ push:
+ branches: [main, master]
+ pull_request:
+ branches: [main, master]
+
+name: R-CMD-check
+
+jobs:
+ R-CMD-check:
+ runs-on: ${{ matrix.config.os }}
+
+ name: ${{ matrix.config.os }} (${{ matrix.config.r }})
+
+ strategy:
+ fail-fast: false
+ matrix:
+ config:
+ # Windows
+ - {os: windows-latest, r: 'release'}
+ # Use 3.6 to trigger usage of RTools35
+ - {os: windows-latest, r: '3.6'}
+ # Linux
+ - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
+ - {os: ubuntu-latest, r: 'release'}
+ # Mac OS
+ - {os: macos-latest, r: 'release'}
+
+ # Vesions in use on PHS PWB
+ - {os: ubuntu-latest, r: '4.0.2'}
+ - {os: ubuntu-latest, r: '4.1.2'}
+ - {os: ubuntu-latest, r: '3.6.1'}
+
+ env:
+ GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+ R_KEEP_PKG_SOURCE: yes
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: r-lib/actions/setup-pandoc@v2
+
+ - uses: r-lib/actions/setup-r@v2
+ with:
+ r-version: ${{ matrix.config.r }}
+ http-user-agent: ${{ matrix.config.http-user-agent }}
+ use-public-rspm: true
+
+ - uses: r-lib/actions/setup-r-dependencies@v2
+ with:
+ extra-packages: any::rcmdcheck
+ needs: check
+
+ - uses: r-lib/actions/check-r-package@v2
+ with:
+ upload-snapshots: true
diff --git a/.github/workflows/document.yaml b/.github/workflows/document.yaml
new file mode 100644
index 0000000..8ac7bab
--- /dev/null
+++ b/.github/workflows/document.yaml
@@ -0,0 +1,42 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+ push:
+ paths: ["R/**"]
+
+name: Document
+
+jobs:
+ document:
+ runs-on: ubuntu-latest
+ env:
+ GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+ steps:
+ - name: Checkout repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Setup R
+ uses: r-lib/actions/setup-r@v2
+ with:
+ use-public-rspm: true
+
+ - name: Install dependencies
+ uses: r-lib/actions/setup-r-dependencies@v2
+ with:
+ extra-packages: any::roxygen2
+ needs: roxygen2
+
+ - name: Document
+ run: roxygen2::roxygenise()
+ shell: Rscript {0}
+
+ - name: Commit and push changes
+ run: |
+ git config --local user.name "$GITHUB_ACTOR"
+ git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
+ git add man/\* NAMESPACE DESCRIPTION
+ git commit -m "Update documentation" || echo "No changes to commit"
+ git pull --ff-only
+ git push origin
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
new file mode 100644
index 0000000..5dea707
--- /dev/null
+++ b/.github/workflows/lint.yaml
@@ -0,0 +1,30 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+ push:
+ branches: [main, master]
+ pull_request:
+ branches: [main, master]
+
+name: lint
+
+jobs:
+ lint:
+ runs-on: ubuntu-latest
+ env:
+ GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: r-lib/actions/setup-r@v2
+ with:
+ use-public-rspm: true
+
+ - uses: r-lib/actions/setup-r-dependencies@v2
+ with:
+ extra-packages: any::lintr, local::.
+ needs: lint
+
+ - name: Lint
+ run: lintr::lint_package()
+ shell: Rscript {0}
diff --git a/.github/workflows/phsopendata.yml b/.github/workflows/phsopendata.yml
deleted file mode 100644
index cea7ca2..0000000
--- a/.github/workflows/phsopendata.yml
+++ /dev/null
@@ -1,85 +0,0 @@
-# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
-# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
-on:
- push:
- branches:
- - main
- - master
- pull_request:
- branches:
- - main
- - master
-
-name: R-CMD-check
-
-jobs:
- R-CMD-check:
- runs-on: ${{ matrix.config.os }}
-
- name: ${{ matrix.config.os }} (${{ matrix.config.r }})
-
- strategy:
- fail-fast: false
- matrix:
- config:
- - {os: windows-latest, r: 'release'}
- - {os: macOS-latest, r: 'release'}
- - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
- - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", http-user-agent: "R/4.1.0 (ubuntu-20.04) R (4.1.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" }
-
- env:
- R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
- RSPM: ${{ matrix.config.rspm }}
- GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
-
- steps:
- - uses: actions/checkout@v2
-
- - uses: r-lib/actions/setup-r@v1
- with:
- r-version: ${{ matrix.config.r }}
-
- - uses: r-lib/actions/setup-pandoc@v1
-
- - name: Query dependencies
- run: |
- install.packages('remotes')
- saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
- writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
- shell: Rscript {0}
-
- - name: Restore R package cache
- uses: actions/cache@v2
- with:
- path: ${{ env.R_LIBS_USER }}
- key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
- restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
-
- - name: Install system dependencies
- if: runner.os == 'Linux'
- run: |
- while read -r cmd
- do
- eval sudo $cmd
- done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
-
- - name: Install dependencies
- run: |
- remotes::install_deps(dependencies = TRUE)
- remotes::install_cran("rcmdcheck")
- shell: Rscript {0}
-
- - name: Check
- env:
- _R_CHECK_CRAN_INCOMING_REMOTE_: false
- run: |
- options(crayon.enabled = TRUE)
- rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
- shell: Rscript {0}
-
- - name: Upload check results
- if: failure()
- uses: actions/upload-artifact@main
- with:
- name: ${{ runner.os }}-r${{ matrix.config.r }}-results
- path: check
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
new file mode 100644
index 0000000..a7276e8
--- /dev/null
+++ b/.github/workflows/pkgdown.yaml
@@ -0,0 +1,48 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+ push:
+ branches: [main, master]
+ pull_request:
+ branches: [main, master]
+ release:
+ types: [published]
+ workflow_dispatch:
+
+name: pkgdown
+
+jobs:
+ pkgdown:
+ runs-on: ubuntu-latest
+ # Only restrict concurrency for non-PR jobs
+ concurrency:
+ group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
+ env:
+ GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+ permissions:
+ contents: write
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: r-lib/actions/setup-pandoc@v2
+
+ - uses: r-lib/actions/setup-r@v2
+ with:
+ use-public-rspm: true
+
+ - uses: r-lib/actions/setup-r-dependencies@v2
+ with:
+ extra-packages: any::pkgdown, local::.
+ needs: website
+
+ - name: Build site
+ run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
+ shell: Rscript {0}
+
+ - name: Deploy to GitHub pages 🚀
+ if: github.event_name != 'pull_request'
+ uses: JamesIves/github-pages-deploy-action@v4.5.0
+ with:
+ clean: false
+ branch: gh-pages
+ folder: docs
diff --git a/.github/workflows/render-README.yml b/.github/workflows/render-README.yml
index 49b2b1e..2a343d6 100644
--- a/.github/workflows/render-README.yml
+++ b/.github/workflows/render-README.yml
@@ -6,18 +6,20 @@ on:
jobs:
render:
name: Render README
- runs-on: macOS-latest
+ runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- - uses: actions/checkout@v2
- - uses: r-lib/actions/setup-r@v1
- - uses: r-lib/actions/setup-pandoc@v1
+ - uses: actions/checkout@v4
+ - uses: r-lib/actions/setup-r@v2
+ - uses: r-lib/actions/setup-pandoc@v2
- name: Install rmarkdown
run: Rscript -e 'install.packages("rmarkdown")'
- name: Render README
run: Rscript -e 'rmarkdown::render("README.Rmd", output_format = "md_document")'
- name: Commit results
run: |
+ git config --local user.name "$GITHUB_ACTOR"
+ git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
git commit README.md -m 'Re-build README.Rmd' || echo "No changes to commit"
git push origin || echo "No changes to commit"
diff --git a/.github/workflows/style.yaml b/.github/workflows/style.yaml
new file mode 100644
index 0000000..6eea13f
--- /dev/null
+++ b/.github/workflows/style.yaml
@@ -0,0 +1,73 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+ push:
+ paths: ["**.[rR]", "**.[qrR]md", "**.[rR]markdown", "**.[rR]nw", "**.[rR]profile"]
+
+name: Style
+
+jobs:
+ style:
+ runs-on: ubuntu-latest
+ env:
+ GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+ steps:
+ - name: Checkout repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Setup R
+ uses: r-lib/actions/setup-r@v2
+ with:
+ use-public-rspm: true
+
+ - name: Install dependencies
+ uses: r-lib/actions/setup-r-dependencies@v2
+ with:
+ extra-packages: any::styler, any::roxygen2
+ needs: styler
+
+ - name: Enable styler cache
+ run: styler::cache_activate()
+ shell: Rscript {0}
+
+ - name: Determine cache location
+ id: styler-location
+ run: |
+ cat(
+ "location=",
+ styler::cache_info(format = "tabular")$location,
+ "\n",
+ file = Sys.getenv("GITHUB_OUTPUT"),
+ append = TRUE,
+ sep = ""
+ )
+ shell: Rscript {0}
+
+ - name: Cache styler
+ uses: actions/cache@v3
+ with:
+ path: ${{ steps.styler-location.outputs.location }}
+ key: ${{ runner.os }}-styler-${{ github.sha }}
+ restore-keys: |
+ ${{ runner.os }}-styler-
+ ${{ runner.os }}-
+
+ - name: Style
+ run: styler::style_pkg()
+ shell: Rscript {0}
+
+ - name: Commit and push changes
+ run: |
+ if FILES_TO_COMMIT=($(git diff-index --name-only ${{ github.sha }} \
+ | egrep --ignore-case '\.(R|[qR]md|Rmarkdown|Rnw|Rprofile)$'))
+ then
+ git config --local user.name "$GITHUB_ACTOR"
+ git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
+ git commit ${FILES_TO_COMMIT[*]} -m "Style code (GHA)"
+ git pull --ff-only
+ git push origin
+ else
+ echo "No changes to commit."
+ fi
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
new file mode 100644
index 0000000..c931821
--- /dev/null
+++ b/.github/workflows/test-coverage.yaml
@@ -0,0 +1,50 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+ push:
+ branches: [main, master]
+ pull_request:
+ branches: [main, master]
+
+name: test-coverage
+
+jobs:
+ test-coverage:
+ runs-on: ubuntu-latest
+ env:
+ GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: r-lib/actions/setup-r@v2
+ with:
+ use-public-rspm: true
+
+ - uses: r-lib/actions/setup-r-dependencies@v2
+ with:
+ extra-packages: any::covr
+ needs: coverage
+
+ - name: Test coverage
+ run: |
+ covr::codecov(
+ quiet = FALSE,
+ clean = FALSE,
+ install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
+ )
+ shell: Rscript {0}
+
+ - name: Show testthat output
+ if: always()
+ run: |
+ ## --------------------------------------------------------------------
+ find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
+ shell: bash
+
+ - name: Upload test results
+ if: failure()
+ uses: actions/upload-artifact@v3
+ with:
+ name: coverage-test-failures
+ path: ${{ runner.temp }}/package
diff --git a/.gitignore b/.gitignore
index 807ea25..c2f15ec 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
.Rproj.user
.Rhistory
.RData
+docs
diff --git a/DESCRIPTION b/DESCRIPTION
index e76c493..02cf416 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: phsopendata
Title: Extract Open Data from opendata.nhs.scot
-Version: 0.0.1.000
+Version: 0.1.0
Authors@R:
c(person(given = "Csilla", family = "Scharle", role = c("aut", "cre"), email = "csilla.scharle2@phs.scot"),
person("James", "McMahon", email = "james.mcmahon@phs.scot", role = "aut"),
@@ -20,9 +20,13 @@ Imports:
cli,
xml2
Suggests:
+ covr,
testthat (>= 3.0.0)
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
Config/testthat/edition: 3
+URL: https://github.com/Public-Health-Scotland/phsopendata,
+ https://public-health-scotland.github.io/phsopendata/
+BugReports: https://github.com/Public-Health-Scotland/phsopendata/issues
diff --git a/NEWS.md b/NEWS.md
new file mode 100644
index 0000000..5245345
--- /dev/null
+++ b/NEWS.md
@@ -0,0 +1,4 @@
+# phsopendata 0.1.0 (2021-07-22)
+
+- Initial package release.
+- `get_dataset()`, `get_resource()` and `get_resource_sql()` functions added.
diff --git a/R/check_dataset_name.R b/R/check_dataset_name.R
index eb9ae91..2e4744f 100644
--- a/R/check_dataset_name.R
+++ b/R/check_dataset_name.R
@@ -6,20 +6,20 @@
#' @param dataset_name a resource ID
#'
check_dataset_name <- function(dataset_name) {
-
# Starts and ends in a lowercase letter or number
# Has only lowercase alphanum or hyphens inbetween
dataset_name_regex <- "^[a-z0-9][a-z0-9\\-]+?[a-z0-9]$"
- if (!inherits(dataset_name, "character"))
+ if (!inherits(dataset_name, "character")) {
cli::cli_abort(c(
"The dataset name supplied {.var {dataset_name}} is invalid.",
"x" = "dataset_name must be of type character.",
"i" = "You supplied a {.cls {class(dataset_name)[0]}} value."
))
+ }
- if (!grepl(dataset_name_regex, dataset_name))
+ if (!grepl(dataset_name_regex, dataset_name)) {
cli::cli_abort(c(
"The dataset name supplied {.var {dataset_name}} is invalid",
"x" = "dataset_name must be in dash-case
@@ -27,5 +27,5 @@ check_dataset_name <- function(dataset_name) {
"i" = "You can find dataset names in the URL
of a dataset's page on {.url www.opendata.nhs.scot}."
))
-
+ }
}
diff --git a/R/check_res_id.R b/R/check_res_id.R
index c449fca..5b8dc1f 100644
--- a/R/check_res_id.R
+++ b/R/check_res_id.R
@@ -7,30 +7,31 @@
#'
#' @return TRUE / FALSE indicating the validity of the res_id
check_res_id <- function(res_id) {
-
# check res_id is single value
- if (length(res_id) > 1)
+ if (length(res_id) > 1) {
cli::cli_abort(c(
"Argument {.var res_id} must be of length 1.",
i = "You supplied a res_id with a length of {length(res_id)}",
x = "`get_resource` does not currently support
requests for multiple resources simultaneously."
))
+ }
# check res_id is character
- if (!inherits(res_id, "character"))
+ if (!inherits(res_id, "character")) {
cli::cli_abort(c(
"Argument {.var res_id} must be of type character",
i = "You supplied a {.var res_id} with type {.cls {class(res_id)[1]}}"
))
+ }
# check regex pattern
res_id_regex <-
"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
- if (!grepl(res_id_regex, res_id))
+ if (!grepl(res_id_regex, res_id)) {
cli::cli_abort(c(
"Argument {.var res_id} is in an invalid format.",
i = "You can find a resource's ID in the URL of it's page on {.url www.opendata.nhs.scot}."
))
-
+ }
}
diff --git a/R/dump_download.R b/R/dump_download.R
index 4c9d7e3..2058e2e 100644
--- a/R/dump_download.R
+++ b/R/dump_download.R
@@ -4,19 +4,18 @@
#' @return dataframe containing resource records
#'
dump_download <- function(res_id) {
-
# fetch the data
content <- suppressMessages(
phs_GET("dump", res_id)
)
# if content is a web page
- if ("xml_document" %in% class(content))
+ if ("xml_document" %in% class(content)) {
cli::cli_abort(c(
"Can't find resource with ID {.var {res_id}} in datastore."
))
+ }
# return data
return(content[, -1])
-
}
diff --git a/R/error_check.R b/R/error_check.R
index 6802376..cb670f8 100644
--- a/R/error_check.R
+++ b/R/error_check.R
@@ -3,20 +3,23 @@
#' @param content object produced by `httr::content`
#'
error_check <- function(content) {
-
# if content is not a list,
# stop for content (a string describing an error)
- if (!is.list(content)) cli::cli_abort(c(
- "API error",
- x = content
- ))
+ if (!is.list(content)) {
+ cli::cli_abort(c(
+ "API error",
+ x = content
+ ))
+ }
# if there is no error status/message in the content,
# break out of the function
is_error <- suppressWarnings(
!is.null(content$error)
)
- if (!is_error) return()
+ if (!is_error) {
+ return()
+ }
# generate error message and stop
error_text <- parse_error(content$error)
@@ -24,5 +27,4 @@ error_check <- function(content) {
"API error.",
x = error_text
))
-
}
diff --git a/R/get_dataset.R b/R/get_dataset.R
index 878fe71..0afd7fc 100644
--- a/R/get_dataset.R
+++ b/R/get_dataset.R
@@ -19,32 +19,82 @@
#' max_resources = 2, rows = 10
#' )
get_dataset <- function(dataset_name, max_resources = NULL, rows = NULL) {
-
# throw error if name type/format is invalid
check_dataset_name(dataset_name)
# define query and try API call
query <- paste0("id=", dataset_name)
content <- try(
- phs_GET("package_show", query), silent = TRUE
+ phs_GET("package_show", query),
+ silent = TRUE
)
# if content contains a 'Not Found Error'
# throw error with suggested dataset name
- if (grepl("Not Found Error", content[1]))
+ if (grepl("Not Found Error", content[1])) {
suggest_dataset_name(dataset_name)
+ }
# define list of resource IDs to get
- all_ids <- purrr::map_chr(content$result$resources, ~.x$id)
+ all_ids <- purrr::map_chr(content$result$resources, ~ .x$id)
n_res <- length(all_ids)
res_index <- 1:min(n_res, max_resources)
ids_selection <- all_ids[res_index]
- all_data <- purrr::map_dfr(
+ # get all resources
+ all_data <- purrr::map(
ids_selection,
get_resource,
rows = rows
)
- return(all_data)
+ # resolve class issues
+ types <- purrr::map(
+ all_data,
+ ~ unlist(lapply(.x, class))
+ )
+
+ # for each df, check if next df class matches
+ inconsistencies <- vector(length = length(types) - 1, mode = "list")
+ for (i in seq_along(types)) {
+ if (i == length(types)) break
+
+ this_types <- types[[i]]
+ next_types <- types[[i + 1]]
+
+ # find matching names
+ matching_names <- suppressWarnings(
+ names(this_types) == names(next_types)
+ )
+
+ # of matching name cols, find if types match too
+ inconsistent_index <- this_types[matching_names] != next_types[matching_names]
+ inconsistencies[[i]] <- this_types[matching_names][inconsistent_index]
+ }
+
+ # define which columns to coerce and warn
+ conflicts <- unlist(inconsistencies)
+ to_coerce <- unique(names(conflicts))
+
+ if (length(to_coerce) > 0) {
+ cli::cli_warn(c(
+ "Due to conflicts between column types across resources,
+ the following {cli::qty(to_coerce)} column{?s} ha{?s/ve} been coerced to type character:",
+ "{.val {to_coerce}}"
+ ))
+ }
+
+ # combine
+ combined <- purrr::map_df(
+ all_data,
+ ~ dplyr::mutate(
+ .x,
+ dplyr::across(
+ dplyr::any_of(to_coerce),
+ as.character
+ )
+ )
+ )
+
+ return(combined)
}
diff --git a/R/get_resource.R b/R/get_resource.R
index 204acda..55b06d1 100644
--- a/R/get_resource.R
+++ b/R/get_resource.R
@@ -22,7 +22,6 @@
#'
#' df <- get_resource(res_id = res_id, row_filters = filters, col_select = wanted_cols)
get_resource <- function(res_id, rows = NULL, row_filters = NULL, col_select = NULL) {
-
# check res_id
check_res_id(res_id)
@@ -35,8 +34,9 @@ get_resource <- function(res_id, rows = NULL, row_filters = NULL, col_select = N
)
# if dump should be used, use it
- if (use_dump_check(query, rows))
+ if (use_dump_check(query, rows)) {
return(dump_download(res_id))
+ }
# if there is no row limit set
# set limit to CKAN max
@@ -55,7 +55,7 @@ get_resource <- function(res_id, rows = NULL, row_filters = NULL, col_select = N
# AND the user was not aware of this limit (`rows` defaulted to NULL)
# warn the user about this limit.
total_rows <- res_content$result$total
- if (is.null(rows) && query$limit < total_rows)
+ if (is.null(rows) && query$limit < total_rows) {
cli::cli_warn(c(
"Returning the first {query$limit}
results (rows) of your query.
@@ -63,14 +63,16 @@ get_resource <- function(res_id, rows = NULL, row_filters = NULL, col_select = N
i = "To get ALL matching rows you will need to download
the whole resource and apply filters/selections locally."
))
+ }
# if more rows were requested than received
# let the user know
- if (!is.null(rows) && query$limit > total_rows)
+ if (!is.null(rows) && query$limit > total_rows) {
cli::cli_alert_warning(c(
"You set {.var rows} to {query$limit} but
only {total_rows} rows matched your query."
))
+ }
# extract data from response content
data <- purrr::map_dfr(
diff --git a/R/get_resource_sql.R b/R/get_resource_sql.R
index d6c1341..fbc588e 100644
--- a/R/get_resource_sql.R
+++ b/R/get_resource_sql.R
@@ -45,42 +45,44 @@
#'
#' # This is equivalent to:
#' cols <- c(
-#' "TotalCancelled", "TotalOperations",
-#' "Hospital", "Month"
+#' "TotalCancelled", "TotalOperations",
+#' "Hospital", "Month"
#' )
#' row_filter <- c(Hospital = "D102H")
#'
#' df2 <- get_resource(
-#' "bcc860a4-49f4-4232-a76b-f559cf6eb885",
-#' col_select = cols,
-#' row_filters = row_filter
+#' "bcc860a4-49f4-4232-a76b-f559cf6eb885",
+#' col_select = cols,
+#' row_filters = row_filter
#' )
get_resource_sql <- function(sql) {
-
- if (length(sql) > 1)
+ if (length(sql) > 1) {
cli::cli_abort(c(
"SQL validation error.",
i = "{.var sql} must be of length 1",
x = "You entered an object of length {length(sql)}."
))
+ }
- if (!("character" %in% class(sql)))
+ if (!("character" %in% class(sql))) {
cli::cli_abort(c(
"SQL validation error.",
i = "{.var sql} must be of class {.cls character}",
x = "You entered an object of class {.cls {class(sql)[1]}}."
))
+ }
# remove spaces
sql <- gsub(" ", "", sql)
sql <- gsub("\n", "", sql)
# check query is a SELECT statement
- if (substr(sql, 1, 6) != "SELECT")
+ if (substr(sql, 1, 6) != "SELECT") {
cli::cli_abort(c(
"SQL validation error.",
i = "{.var sql} must start with SELECT"
))
+ }
# add query field prefix
query <- paste0("sql=", sql)
@@ -91,19 +93,19 @@ get_resource_sql <- function(sql) {
# get correct order of columns
order <- purrr::map_chr(
content$result$fields,
- ~.x$id
+ ~ .x$id
)
# extract the records (rows) from content
data <- purrr::map_dfr(
content$result$records,
- ~{
- # replace NULL with "" so tibble works
- is_null <- purrr::map_lgl(.x, is.null)
- .x[is_null] <- ""
+ ~ {
+ # replace NULL with "" so tibble works
+ is_null <- purrr::map_lgl(.x, is.null)
+ .x[is_null] <- ""
- tibble::as_tibble(.x)
- }
+ tibble::as_tibble(.x)
+ }
)
# select and reorder columns to reflect
@@ -123,6 +125,4 @@ get_resource_sql <- function(sql) {
}
return(cleaner)
-
}
-
diff --git a/R/parse_col_select.R b/R/parse_col_select.R
index c0980dd..82232b2 100644
--- a/R/parse_col_select.R
+++ b/R/parse_col_select.R
@@ -4,12 +4,11 @@
#' @param col_select a character vector identifying the columns to select.
#' @return a character string
parse_col_select <- function(col_select) {
-
- if (is.null(col_select))
+ if (is.null(col_select)) {
return(NULL)
+ }
return(
paste0(col_select, collapse = ",")
)
-
}
diff --git a/R/parse_error.R b/R/parse_error.R
index 1cc9f07..353d91a 100644
--- a/R/parse_error.R
+++ b/R/parse_error.R
@@ -14,13 +14,11 @@ parse_error <- function(error) {
# special case for validation errors
if (error_type == "Validation Error") {
-
error_output <- paste0(names(error[1][1]), ": ", error[1][[1]])
# translate message for package users
error_output <- sub("fields", "col_select", error_output)
error_output <- sub("q", "row_filters", error_output)
-
}
# special case for SQL validation errors
diff --git a/R/parse_row_filters.R b/R/parse_row_filters.R
index 1cc8ce4..059d5ef 100644
--- a/R/parse_row_filters.R
+++ b/R/parse_row_filters.R
@@ -3,20 +3,21 @@
#' @param row_filters list or named vectors matching fileds to values
#' @return a json as a character string
parse_row_filters <- function(row_filters) {
-
# exit function if no filters
- if (is.null(row_filters))
+ if (is.null(row_filters)) {
return(NULL)
+ }
# check if any filters in list have length > 1
too_many <- sapply(row_filters, length) > 1
- if (any(too_many))
+ if (any(too_many)) {
cli::cli_abort(c(
"Invalid input for {.var row_filters}",
i = "{names(row_filters)[which(too_many)]} in {.var row_filters} has too many values. ",
x = "The {.var row_filters} list must only contain vectors of length 1."
))
+ }
# check if any items in the list/vector have the same name
# find number of unique names
@@ -26,19 +27,20 @@ parse_row_filters <- function(row_filters) {
# if same, all names are unique
unique_names <- n_u_row_filters == n_row_filters
- if (!unique_names)
+ if (!unique_names) {
cli::cli_abort(c(
"Invalid input for {.var row_filters}",
x = "One or more elements in {.var row_filters} have the same name.",
i = "Only one filter per field is currently supported by `get_resource`."
))
+ }
filter_body <- paste0(
- '"', names(row_filters), '":"', row_filters, '"', collapse = ","
+ '"', names(row_filters), '":"', row_filters, '"',
+ collapse = ","
)
return(
- paste0('{', filter_body, '}')
+ paste0("{", filter_body, "}")
)
-
}
diff --git a/R/phs_GET.R b/R/phs_GET.R
index d7fd156..f5e588a 100644
--- a/R/phs_GET.R
+++ b/R/phs_GET.R
@@ -6,7 +6,6 @@
#' @return content of a httr::GET request
#'
phs_GET <- function(action, query, verbose = FALSE) {
-
# define URL
url <- request_url(action, query)
@@ -19,11 +18,12 @@ phs_GET <- function(action, query, verbose = FALSE) {
)
# Check for response from server
- if (!inherits(response, "response"))
+ if (!inherits(response, "response")) {
cli::cli_abort(c(
"Can't connect to the CKAN server.",
i = "Check your network/proxy settings."
))
+ }
# extract content from HTTP response
content <- httr::content(
@@ -35,5 +35,4 @@ phs_GET <- function(action, query, verbose = FALSE) {
if (verbose) cat("GET request successful.\n")
return(content)
-
}
diff --git a/R/request_url.R b/R/request_url.R
index 7aa65b0..0798ec9 100644
--- a/R/request_url.R
+++ b/R/request_url.R
@@ -5,26 +5,26 @@
#' @return a URL as a character string
#'
request_url <- function(action, query) {
-
# check action is valid
valid_actions <- c("datastore_search", "datastore_search_sql", "dump", "package_show", "package_list")
- if (!(action %in% valid_actions))
+ if (!(action %in% valid_actions)) {
cli::cli_abort(c(
"API call failed.",
x = "Invalid {.var action} argument in request."
))
+ }
# return dump URL
- if (action == "dump")
+ if (action == "dump") {
return(paste0(
"https://www.opendata.nhs.scot/datastore/dump/",
query, "?bom=true"
))
+ }
# return standard API endpoint (i.e., not dump)
return(paste0(
"https://www.opendata.nhs.scot/api/3/action/",
action, "?", query
))
-
}
diff --git a/R/suggest_dataset_name.R b/R/suggest_dataset_name.R
index f1dd34b..0a61cbe 100644
--- a/R/suggest_dataset_name.R
+++ b/R/suggest_dataset_name.R
@@ -4,7 +4,6 @@
#' @param dataset_name a string to be matched against valid dataset names
#'
suggest_dataset_name <- function(dataset_name) {
-
content <- phs_GET("package_list", "")
dataset_names <- unlist(content$result)
@@ -13,13 +12,14 @@ suggest_dataset_name <- function(dataset_name) {
string_distances <- stringdist::stringdist(dataset_name, dataset_names)
# if min distance is too big, abort
- if (min(string_distances) > 10)
+ if (min(string_distances) > 10) {
cli::cli_abort(c(
"Can't find the dataset name
{.var {dataset_name}}, or a close match.",
i = "Find a dataset's name in the URL
of it's page on {.url www.opendata.nhs.scot.}"
))
+ }
# find closet match
closest_match <- dataset_names[which.min(string_distances)]
@@ -29,5 +29,4 @@ suggest_dataset_name <- function(dataset_name) {
"Can't find the dataset name {.var {dataset_name}}.",
"i" = "Did you mean '{closest_match}'?"
))
-
}
diff --git a/R/use_dump_check.R b/R/use_dump_check.R
index 632d96b..15d9ba1 100644
--- a/R/use_dump_check.R
+++ b/R/use_dump_check.R
@@ -5,7 +5,6 @@
#' @return a logical value. TRUE indicates that the dump should be used
#'
use_dump_check <- function(query, rows) {
-
# if row input is > 99999 or NULL
# or all queries (inc. rows) are null
# then use GET datastore_dump
@@ -21,7 +20,7 @@ use_dump_check <- function(query, rows) {
# if user queried the data
queried <- !is.null(query$q) || !is.null(query$filter) || !is.null(query$fields)
- if (queried && use_dump)
+ if (queried && use_dump) {
cli::cli_warn(c(
"Invalid combination of {.var rows}, {.var row_filters}
and/or {.var col_select}.",
@@ -29,12 +28,13 @@ use_dump_check <- function(query, rows) {
AND query its rows/columns.",
i = "ALL rows and columns of the resource will be downloaded."
))
+ }
# warn users if they haven't queried
# the data but have requested rows > 99999
if (is.null(rows)) rows <- 0
- if (!queried && rows > 99999)
+ if (!queried && rows > 99999) {
cli::cli_warn(c(
"Getting all rows of resource.",
i = "All rows will be returned if you
@@ -42,8 +42,8 @@ use_dump_check <- function(query, rows) {
i = "You set {.var rows} to
{format(rows, big.mark = ',', scientific = FALSE)}"
))
+ }
return(use_dump)
}
-
diff --git a/README.Rmd b/README.Rmd
index a5083a8..5c03111 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -16,7 +16,9 @@ knitr::opts_chunk$set(
# phsopendata
-[![R build status](https://github.com/Public-Health-Scotland/phsopendata/workflows/R-CMD-check/badge.svg)](https://github.com/Public-Health-Scotland/phsopendata/actions)
+[![GitHub release (latest by date)](https://img.shields.io/github/v/release/Public-Health-Scotland/phsopendata)](https://github.com/Public-Health-Scotland/phsopendata/releases/latest)
+[![R-CMD-check](https://github.com/Public-Health-Scotland/phsopendata/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/Public-Health-Scotland/phsopendata/actions/workflows/R-CMD-check.yaml)
+[![Codecov test coverage](https://codecov.io/gh/Public-Health-Scotland/phsopendata/branch/master/graph/badge.svg)](https://app.codecov.io/gh/Public-Health-Scotland/phsopendata?branch=master)
`phsopendata` contains functions to interact with open data from the [Scottish Health and Social Care Open Data platform](https://www.opendata.nhs.scot/) via the CKAN API.
@@ -87,14 +89,14 @@ In this example, we are downloading GP Practice Population Demographics from: [o
```{r example dataset, eval = FALSE}
-# if max_resources is not set, all resources will be returned by default.
+# if max_resources is not set, all resources will be returned by default.
# Here we pull 10 rows from the first 2 resources only
get_dataset("gp-practice-populations", max_resources = 2, rows = 10)
```
## Contributing to phsopendata
-At present, this package is maintained by [David Aikman](https://github.com/daikman).
+At present, this package is maintained by [Csilla Scharle](https://github.com/csillasch).
If you have requests or suggestions for additional functionality, please contact the package maintainer and/or the [PHS Open Data team](phs.opendata@phs.scot).
diff --git a/README.md b/README.md
index d2d8623..e8aad17 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,14 @@
-phsopendata
-===========
+# phsopendata
-[![R build
-status](https://github.com/Public-Health-Scotland/phsopendata/workflows/R-CMD-check/badge.svg)](https://github.com/Public-Health-Scotland/phsopendata/actions)
+[![GitHub release (latest by
+date)](https://img.shields.io/github/v/release/Public-Health-Scotland/phsopendata)](https://github.com/Public-Health-Scotland/phsopendata/releases/latest)
+[![R-CMD-check](https://github.com/Public-Health-Scotland/phsopendata/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/Public-Health-Scotland/phsopendata/actions/workflows/R-CMD-check.yaml)
+[![Codecov test
+coverage](https://codecov.io/gh/Public-Health-Scotland/phsopendata/branch/master/graph/badge.svg)](https://app.codecov.io/gh/Public-Health-Scotland/phsopendata?branch=master)
`phsopendata` contains functions to interact with open data from the
@@ -26,8 +28,7 @@ For extracting metadata and search functionality, we recommend using the
RStudio. However, depending on firewall settings, proxy use may need to
be configured with `use_proxy()`.
-Installation
-------------
+## Installation
You need to install `phsopendata` from GitHub, which requires a package
like `remotes` or `devtools`.
@@ -38,16 +39,14 @@ Using `remotes` you run this to install the package:
upgrade = "never"
)
-Examples
---------
+## Examples
### Downloading a data table with `get_resource()`
To extract a specific resource, you will need it’s unique identifier -
resource id. This can be found in the dataset metadata, the URL of a
-resource’s page on
-https://www.opendata.nhs.scot/,
-or extracted using `ckanr::package_show`.
+resource’s page on , or extracted using
+`ckanr::package_show`.
library(phsopendata)
@@ -93,15 +92,14 @@ from:
[opendata.nhs.scot/dataset/*gp-practice-populations*](https://www.opendata.nhs.scot/dataset/gp-practice-populations),
so the dataset name will be gp-practice-populations.
- # if max_resources is not set, all resources will be returned by default.
+ # if max_resources is not set, all resources will be returned by default.
# Here we pull 10 rows from the first 2 resources only
get_dataset("gp-practice-populations", max_resources = 2, rows = 10)
-Contributing to phsopendata
----------------------------
+## Contributing to phsopendata
-At present, this package is maintained by the [PHS Open Data
-team](phs.opendata@phs.scot).
+At present, this package is maintained by [Csilla
+Scharle](https://github.com/csillasch).
If you have requests or suggestions for additional functionality, please
contact the package maintainer and/or the [PHS Open Data
diff --git a/_pkgdown.yml b/_pkgdown.yml
new file mode 100644
index 0000000..0dcfc08
--- /dev/null
+++ b/_pkgdown.yml
@@ -0,0 +1,4 @@
+url: https://public-health-scotland.github.io/phsopendata/
+template:
+ bootstrap: 5
+
diff --git a/codecov.yml b/codecov.yml
new file mode 100644
index 0000000..04c5585
--- /dev/null
+++ b/codecov.yml
@@ -0,0 +1,14 @@
+comment: false
+
+coverage:
+ status:
+ project:
+ default:
+ target: auto
+ threshold: 1%
+ informational: true
+ patch:
+ default:
+ target: auto
+ threshold: 1%
+ informational: true
diff --git a/tests/testthat/test-dump_download.R b/tests/testthat/test-dump_download.R
index befa3a2..ebf1c75 100644
--- a/tests/testthat/test-dump_download.R
+++ b/tests/testthat/test-dump_download.R
@@ -1,17 +1,13 @@
test_that("throws error for non-existent res_ids", {
-
expect_error(
data <- phsopendata:::dump_download("not-real"),
regexp = "Can't find resource with ID"
)
-
})
test_that("downloads full resource", {
-
data <- phsopendata:::dump_download("a794d603-95ab-4309-8c92-b48970478c14")
expect_equal(nrow(data), 926)
expect_equal(ncol(data), 15)
-
})
diff --git a/tests/testthat/test-error_check.R b/tests/testthat/test-error_check.R
index 1b55343..745ed17 100644
--- a/tests/testthat/test-error_check.R
+++ b/tests/testthat/test-error_check.R
@@ -1,4 +1,4 @@
-test_that("returns nothing if no error", {
+test_that("returns nothing if no error", {
content <- httr::content(
httr::GET(
phsopendata:::request_url("package_list", "")
@@ -9,7 +9,7 @@ test_that("returns nothing if no error", {
)
})
-test_that("throws error if error in httr content", {
+test_that("throws error if error in httr content", {
content <- httr::content(
httr::GET(
phsopendata:::request_url("datastore_search", "id=doop")
diff --git a/tests/testthat/test-get_dataset.R b/tests/testthat/test-get_dataset.R
index debcf35..cab33b0 100644
--- a/tests/testthat/test-get_dataset.R
+++ b/tests/testthat/test-get_dataset.R
@@ -12,11 +12,12 @@ test_that("returns data in the expected format", {
test_that("errors properly", {
expect_error(get_dataset("Mal-formed-name"),
- regexp = "The dataset name supplied `Mal-formed-name` is invalid"
+ regexp = "The dataset name supplied `Mal-formed-name` is invalid"
)
expect_error(get_dataset("dataset-name-with-no-close-match"),
- regexp = "Can't find the dataset name `dataset-name-with-no-close-match`"
+ regexp = "Can't find the dataset name `dataset-name-with-no-close-match`"
)
expect_error(get_dataset("gp-practice-population"),
- regexp = "Did you mean 'gp-practice-populations'?")
+ regexp = "Did you mean 'gp-practice-populations'?"
+ )
})
diff --git a/tests/testthat/test-get_resource.R b/tests/testthat/test-get_resource.R
index 469e775..bdb44f6 100644
--- a/tests/testthat/test-get_resource.R
+++ b/tests/testthat/test-get_resource.R
@@ -74,5 +74,4 @@ test_that("first 99999 rows returned if query matches > 99999 rows", {
)
expect_true(nrow(df) == 99999)
-
})
diff --git a/tests/testthat/test-get_resource_sql.R b/tests/testthat/test-get_resource_sql.R
index e0ba982..e3f7c38 100644
--- a/tests/testthat/test-get_resource_sql.R
+++ b/tests/testthat/test-get_resource_sql.R
@@ -1,5 +1,4 @@
test_that("throws errors on invalid sql argument", {
-
# wrong class
expect_error(
get_resource_sql(9000),
@@ -17,11 +16,9 @@ test_that("throws errors on invalid sql argument", {
get_resource_sql("this is wrong"),
regexp = "`sql` must start with SELECT"
)
-
})
test_that("gets expected data", {
-
sql <- "
SELECT
\"TotalCancelled\",\"TotalOperations\",\"Hospital\",\"Month\"
@@ -37,11 +34,9 @@ test_that("gets expected data", {
c("TotalCancelled", "TotalOperations", "Hospital", "Month"),
names(df)
)
-
})
test_that("SQL errors", {
-
# non-existent column in real table
expect_error(
get_resource_sql(
@@ -66,5 +61,4 @@ test_that("SQL errors", {
),
regexp = "syntax error at or near \"'donut'\""
)
-
})
diff --git a/tests/testthat/test-parse_error.R b/tests/testthat/test-parse_error.R
index 639ab69..941689a 100644
--- a/tests/testthat/test-parse_error.R
+++ b/tests/testthat/test-parse_error.R
@@ -1,4 +1,4 @@
-test_that("correctly extracts error from API response", {
+test_that("correctly extracts error from API response", {
content <- httr::content(
httr::GET(
phsopendata:::request_url("datastore_search", "id=doop")
@@ -18,5 +18,4 @@ test_that("correctly extracts error from API response", {
phsopendata:::parse_error(content$error),
"resource_id: Missing value"
)
-
})
diff --git a/tests/testthat/test-phs_GET.R b/tests/testthat/test-phs_GET.R
index 9490ea1..28b6e24 100644
--- a/tests/testthat/test-phs_GET.R
+++ b/tests/testthat/test-phs_GET.R
@@ -1,15 +1,11 @@
-
test_that("returns httr::content", {
-
x <- phsopendata:::phs_GET("package_list", "")
expect_true(
!is.null(x$help) && !is.null(x$success)
)
-
})
-test_that("error_check() works as expected", {
-
+test_that("error_check() works as expected", {
# no error for valid endpoint
expect_type(
phsopendata:::phs_GET("package_list", ""),
@@ -21,15 +17,12 @@ test_that("error_check() works as expected", {
phsopendata:::phs_GET("datastore_search", "id=doop"),
regexp = 'Resource "doop" was not found.'
)
-
})
-test_that("request_url() works as expected", {
-
+test_that("request_url() works as expected", {
# invalid action argument
expect_error(
phsopendata:::phs_GET("", ""),
regexp = "API call failed"
)
-
})
diff --git a/tests/testthat/test-request_url.R b/tests/testthat/test-request_url.R
index 28a4235..898c74a 100644
--- a/tests/testthat/test-request_url.R
+++ b/tests/testthat/test-request_url.R
@@ -1,5 +1,4 @@
test_that("returns correct URL format", {
-
expect_equal(
phsopendata:::request_url("datastore_search", "id=doop"),
"https://www.opendata.nhs.scot/api/3/action/datastore_search?id=doop"
@@ -9,7 +8,6 @@ test_that("returns correct URL format", {
phsopendata:::request_url("dump", "id=doop"),
"https://www.opendata.nhs.scot/datastore/dump/id=doop?bom=true"
)
-
})
test_that("rejects invalid actions", {
diff --git a/tests/testthat/test-use_dump_check.R b/tests/testthat/test-use_dump_check.R
index 97e9e6b..0bb1979 100644
--- a/tests/testthat/test-use_dump_check.R
+++ b/tests/testthat/test-use_dump_check.R
@@ -1,5 +1,4 @@
test_that("returns true as expected", {
-
# all are null
expect_true(
phsopendata:::use_dump_check(list(), NULL)
@@ -24,11 +23,9 @@ test_that("returns true as expected", {
phsopendata:::use_dump_check(list(q = 4), 100000)
)
)
-
})
test_that("returns false as expected", {
-
# rows is NULL and query list is not all NULL
expect_false(
phsopendata:::use_dump_check(list(fields = "Age"), NULL)
@@ -43,5 +40,4 @@ test_that("returns false as expected", {
expect_false(
phsopendata:::use_dump_check(list(), 100)
)
-
})