diff --git a/.Rbuildignore b/.Rbuildignore index 588683b..e87d948 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,3 +1,8 @@ ^phsopendata\.Rproj$ ^\.Rproj\.user$ ^README\.Rmd$ +^\.github$ +^codecov\.yml$ +^_pkgdown\.yml$ +^docs$ +^pkgdown$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000..2d19fc7 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..0f1a241 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,12 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "github-actions" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "weekly" + day: "sunday" diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..001c2e5 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,58 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + # Windows + - {os: windows-latest, r: 'release'} + # Use 3.6 to trigger usage of RTools35 + - {os: windows-latest, r: '3.6'} + # Linux + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + # Mac OS + - {os: macos-latest, r: 'release'} + + # Vesions in use on PHS PWB + - {os: ubuntu-latest, r: '4.0.2'} + - {os: ubuntu-latest, r: '4.1.2'} + - {os: ubuntu-latest, r: '3.6.1'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true diff --git a/.github/workflows/document.yaml b/.github/workflows/document.yaml new file mode 100644 index 0000000..8ac7bab --- /dev/null +++ b/.github/workflows/document.yaml @@ -0,0 +1,42 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + paths: ["R/**"] + +name: Document + +jobs: + document: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - name: Checkout repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup R + uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - name: Install dependencies + uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::roxygen2 + needs: roxygen2 + + - name: Document + run: roxygen2::roxygenise() + shell: Rscript {0} + + - name: Commit and push changes + run: | + git config --local user.name "$GITHUB_ACTOR" + git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" + git add man/\* NAMESPACE DESCRIPTION + git commit -m "Update documentation" || echo "No changes to commit" + git pull --ff-only + git push origin diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 0000000..5dea707 --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,30 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: lint + +jobs: + lint: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::lintr, local::. + needs: lint + + - name: Lint + run: lintr::lint_package() + shell: Rscript {0} diff --git a/.github/workflows/phsopendata.yml b/.github/workflows/phsopendata.yml deleted file mode 100644 index cea7ca2..0000000 --- a/.github/workflows/phsopendata.yml +++ /dev/null @@ -1,85 +0,0 @@ -# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. -# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions -on: - push: - branches: - - main - - master - pull_request: - branches: - - main - - master - -name: R-CMD-check - -jobs: - R-CMD-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: windows-latest, r: 'release'} - - {os: macOS-latest, r: 'release'} - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", http-user-agent: "R/4.1.0 (ubuntu-20.04) R (4.1.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" } - - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - - steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-r@v1 - with: - r-version: ${{ matrix.config.r }} - - - uses: r-lib/actions/setup-pandoc@v1 - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Restore R package cache - uses: actions/cache@v2 - with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- - - - name: Install system dependencies - if: runner.os == 'Linux' - run: | - while read -r cmd - do - eval sudo $cmd - done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') - - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("rcmdcheck") - shell: Rscript {0} - - - name: Check - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: | - options(crayon.enabled = TRUE) - rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") - shell: Rscript {0} - - - name: Upload check results - if: failure() - uses: actions/upload-artifact@main - with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 0000000..a7276e8 --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,48 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + release: + types: [published] + workflow_dispatch: + +name: pkgdown + +jobs: + pkgdown: + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.5.0 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/.github/workflows/render-README.yml b/.github/workflows/render-README.yml index 49b2b1e..2a343d6 100644 --- a/.github/workflows/render-README.yml +++ b/.github/workflows/render-README.yml @@ -6,18 +6,20 @@ on: jobs: render: name: Render README - runs-on: macOS-latest + runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v2 - - uses: r-lib/actions/setup-r@v1 - - uses: r-lib/actions/setup-pandoc@v1 + - uses: actions/checkout@v4 + - uses: r-lib/actions/setup-r@v2 + - uses: r-lib/actions/setup-pandoc@v2 - name: Install rmarkdown run: Rscript -e 'install.packages("rmarkdown")' - name: Render README run: Rscript -e 'rmarkdown::render("README.Rmd", output_format = "md_document")' - name: Commit results run: | + git config --local user.name "$GITHUB_ACTOR" + git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" git commit README.md -m 'Re-build README.Rmd' || echo "No changes to commit" git push origin || echo "No changes to commit" diff --git a/.github/workflows/style.yaml b/.github/workflows/style.yaml new file mode 100644 index 0000000..6eea13f --- /dev/null +++ b/.github/workflows/style.yaml @@ -0,0 +1,73 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + paths: ["**.[rR]", "**.[qrR]md", "**.[rR]markdown", "**.[rR]nw", "**.[rR]profile"] + +name: Style + +jobs: + style: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - name: Checkout repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup R + uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - name: Install dependencies + uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::styler, any::roxygen2 + needs: styler + + - name: Enable styler cache + run: styler::cache_activate() + shell: Rscript {0} + + - name: Determine cache location + id: styler-location + run: | + cat( + "location=", + styler::cache_info(format = "tabular")$location, + "\n", + file = Sys.getenv("GITHUB_OUTPUT"), + append = TRUE, + sep = "" + ) + shell: Rscript {0} + + - name: Cache styler + uses: actions/cache@v3 + with: + path: ${{ steps.styler-location.outputs.location }} + key: ${{ runner.os }}-styler-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-styler- + ${{ runner.os }}- + + - name: Style + run: styler::style_pkg() + shell: Rscript {0} + + - name: Commit and push changes + run: | + if FILES_TO_COMMIT=($(git diff-index --name-only ${{ github.sha }} \ + | egrep --ignore-case '\.(R|[qR]md|Rmarkdown|Rnw|Rprofile)$')) + then + git config --local user.name "$GITHUB_ACTOR" + git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" + git commit ${FILES_TO_COMMIT[*]} -m "Style code (GHA)" + git pull --ff-only + git push origin + else + echo "No changes to commit." + fi diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml new file mode 100644 index 0000000..c931821 --- /dev/null +++ b/.github/workflows/test-coverage.yaml @@ -0,0 +1,50 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: test-coverage + +jobs: + test-coverage: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::covr + needs: coverage + + - name: Test coverage + run: | + covr::codecov( + quiet = FALSE, + clean = FALSE, + install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") + ) + shell: Rscript {0} + + - name: Show testthat output + if: always() + run: | + ## -------------------------------------------------------------------- + find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash + + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v3 + with: + name: coverage-test-failures + path: ${{ runner.temp }}/package diff --git a/.gitignore b/.gitignore index 807ea25..c2f15ec 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .Rproj.user .Rhistory .RData +docs diff --git a/DESCRIPTION b/DESCRIPTION index e76c493..02cf416 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: phsopendata Title: Extract Open Data from opendata.nhs.scot -Version: 0.0.1.000 +Version: 0.1.0 Authors@R: c(person(given = "Csilla", family = "Scharle", role = c("aut", "cre"), email = "csilla.scharle2@phs.scot"), person("James", "McMahon", email = "james.mcmahon@phs.scot", role = "aut"), @@ -20,9 +20,13 @@ Imports: cli, xml2 Suggests: + covr, testthat (>= 3.0.0) Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 Config/testthat/edition: 3 +URL: https://github.com/Public-Health-Scotland/phsopendata, + https://public-health-scotland.github.io/phsopendata/ +BugReports: https://github.com/Public-Health-Scotland/phsopendata/issues diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..5245345 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,4 @@ +# phsopendata 0.1.0 (2021-07-22) + +- Initial package release. +- `get_dataset()`, `get_resource()` and `get_resource_sql()` functions added. diff --git a/R/check_dataset_name.R b/R/check_dataset_name.R index eb9ae91..2e4744f 100644 --- a/R/check_dataset_name.R +++ b/R/check_dataset_name.R @@ -6,20 +6,20 @@ #' @param dataset_name a resource ID #' check_dataset_name <- function(dataset_name) { - # Starts and ends in a lowercase letter or number # Has only lowercase alphanum or hyphens inbetween dataset_name_regex <- "^[a-z0-9][a-z0-9\\-]+?[a-z0-9]$" - if (!inherits(dataset_name, "character")) + if (!inherits(dataset_name, "character")) { cli::cli_abort(c( "The dataset name supplied {.var {dataset_name}} is invalid.", "x" = "dataset_name must be of type character.", "i" = "You supplied a {.cls {class(dataset_name)[0]}} value." )) + } - if (!grepl(dataset_name_regex, dataset_name)) + if (!grepl(dataset_name_regex, dataset_name)) { cli::cli_abort(c( "The dataset name supplied {.var {dataset_name}} is invalid", "x" = "dataset_name must be in dash-case @@ -27,5 +27,5 @@ check_dataset_name <- function(dataset_name) { "i" = "You can find dataset names in the URL of a dataset's page on {.url www.opendata.nhs.scot}." )) - + } } diff --git a/R/check_res_id.R b/R/check_res_id.R index c449fca..5b8dc1f 100644 --- a/R/check_res_id.R +++ b/R/check_res_id.R @@ -7,30 +7,31 @@ #' #' @return TRUE / FALSE indicating the validity of the res_id check_res_id <- function(res_id) { - # check res_id is single value - if (length(res_id) > 1) + if (length(res_id) > 1) { cli::cli_abort(c( "Argument {.var res_id} must be of length 1.", i = "You supplied a res_id with a length of {length(res_id)}", x = "`get_resource` does not currently support requests for multiple resources simultaneously." )) + } # check res_id is character - if (!inherits(res_id, "character")) + if (!inherits(res_id, "character")) { cli::cli_abort(c( "Argument {.var res_id} must be of type character", i = "You supplied a {.var res_id} with type {.cls {class(res_id)[1]}}" )) + } # check regex pattern res_id_regex <- "^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$" - if (!grepl(res_id_regex, res_id)) + if (!grepl(res_id_regex, res_id)) { cli::cli_abort(c( "Argument {.var res_id} is in an invalid format.", i = "You can find a resource's ID in the URL of it's page on {.url www.opendata.nhs.scot}." )) - + } } diff --git a/R/dump_download.R b/R/dump_download.R index 4c9d7e3..2058e2e 100644 --- a/R/dump_download.R +++ b/R/dump_download.R @@ -4,19 +4,18 @@ #' @return dataframe containing resource records #' dump_download <- function(res_id) { - # fetch the data content <- suppressMessages( phs_GET("dump", res_id) ) # if content is a web page - if ("xml_document" %in% class(content)) + if ("xml_document" %in% class(content)) { cli::cli_abort(c( "Can't find resource with ID {.var {res_id}} in datastore." )) + } # return data return(content[, -1]) - } diff --git a/R/error_check.R b/R/error_check.R index 6802376..cb670f8 100644 --- a/R/error_check.R +++ b/R/error_check.R @@ -3,20 +3,23 @@ #' @param content object produced by `httr::content` #' error_check <- function(content) { - # if content is not a list, # stop for content (a string describing an error) - if (!is.list(content)) cli::cli_abort(c( - "API error", - x = content - )) + if (!is.list(content)) { + cli::cli_abort(c( + "API error", + x = content + )) + } # if there is no error status/message in the content, # break out of the function is_error <- suppressWarnings( !is.null(content$error) ) - if (!is_error) return() + if (!is_error) { + return() + } # generate error message and stop error_text <- parse_error(content$error) @@ -24,5 +27,4 @@ error_check <- function(content) { "API error.", x = error_text )) - } diff --git a/R/get_dataset.R b/R/get_dataset.R index 878fe71..0afd7fc 100644 --- a/R/get_dataset.R +++ b/R/get_dataset.R @@ -19,32 +19,82 @@ #' max_resources = 2, rows = 10 #' ) get_dataset <- function(dataset_name, max_resources = NULL, rows = NULL) { - # throw error if name type/format is invalid check_dataset_name(dataset_name) # define query and try API call query <- paste0("id=", dataset_name) content <- try( - phs_GET("package_show", query), silent = TRUE + phs_GET("package_show", query), + silent = TRUE ) # if content contains a 'Not Found Error' # throw error with suggested dataset name - if (grepl("Not Found Error", content[1])) + if (grepl("Not Found Error", content[1])) { suggest_dataset_name(dataset_name) + } # define list of resource IDs to get - all_ids <- purrr::map_chr(content$result$resources, ~.x$id) + all_ids <- purrr::map_chr(content$result$resources, ~ .x$id) n_res <- length(all_ids) res_index <- 1:min(n_res, max_resources) ids_selection <- all_ids[res_index] - all_data <- purrr::map_dfr( + # get all resources + all_data <- purrr::map( ids_selection, get_resource, rows = rows ) - return(all_data) + # resolve class issues + types <- purrr::map( + all_data, + ~ unlist(lapply(.x, class)) + ) + + # for each df, check if next df class matches + inconsistencies <- vector(length = length(types) - 1, mode = "list") + for (i in seq_along(types)) { + if (i == length(types)) break + + this_types <- types[[i]] + next_types <- types[[i + 1]] + + # find matching names + matching_names <- suppressWarnings( + names(this_types) == names(next_types) + ) + + # of matching name cols, find if types match too + inconsistent_index <- this_types[matching_names] != next_types[matching_names] + inconsistencies[[i]] <- this_types[matching_names][inconsistent_index] + } + + # define which columns to coerce and warn + conflicts <- unlist(inconsistencies) + to_coerce <- unique(names(conflicts)) + + if (length(to_coerce) > 0) { + cli::cli_warn(c( + "Due to conflicts between column types across resources, + the following {cli::qty(to_coerce)} column{?s} ha{?s/ve} been coerced to type character:", + "{.val {to_coerce}}" + )) + } + + # combine + combined <- purrr::map_df( + all_data, + ~ dplyr::mutate( + .x, + dplyr::across( + dplyr::any_of(to_coerce), + as.character + ) + ) + ) + + return(combined) } diff --git a/R/get_resource.R b/R/get_resource.R index 204acda..55b06d1 100644 --- a/R/get_resource.R +++ b/R/get_resource.R @@ -22,7 +22,6 @@ #' #' df <- get_resource(res_id = res_id, row_filters = filters, col_select = wanted_cols) get_resource <- function(res_id, rows = NULL, row_filters = NULL, col_select = NULL) { - # check res_id check_res_id(res_id) @@ -35,8 +34,9 @@ get_resource <- function(res_id, rows = NULL, row_filters = NULL, col_select = N ) # if dump should be used, use it - if (use_dump_check(query, rows)) + if (use_dump_check(query, rows)) { return(dump_download(res_id)) + } # if there is no row limit set # set limit to CKAN max @@ -55,7 +55,7 @@ get_resource <- function(res_id, rows = NULL, row_filters = NULL, col_select = N # AND the user was not aware of this limit (`rows` defaulted to NULL) # warn the user about this limit. total_rows <- res_content$result$total - if (is.null(rows) && query$limit < total_rows) + if (is.null(rows) && query$limit < total_rows) { cli::cli_warn(c( "Returning the first {query$limit} results (rows) of your query. @@ -63,14 +63,16 @@ get_resource <- function(res_id, rows = NULL, row_filters = NULL, col_select = N i = "To get ALL matching rows you will need to download the whole resource and apply filters/selections locally." )) + } # if more rows were requested than received # let the user know - if (!is.null(rows) && query$limit > total_rows) + if (!is.null(rows) && query$limit > total_rows) { cli::cli_alert_warning(c( "You set {.var rows} to {query$limit} but only {total_rows} rows matched your query." )) + } # extract data from response content data <- purrr::map_dfr( diff --git a/R/get_resource_sql.R b/R/get_resource_sql.R index d6c1341..fbc588e 100644 --- a/R/get_resource_sql.R +++ b/R/get_resource_sql.R @@ -45,42 +45,44 @@ #' #' # This is equivalent to: #' cols <- c( -#' "TotalCancelled", "TotalOperations", -#' "Hospital", "Month" +#' "TotalCancelled", "TotalOperations", +#' "Hospital", "Month" #' ) #' row_filter <- c(Hospital = "D102H") #' #' df2 <- get_resource( -#' "bcc860a4-49f4-4232-a76b-f559cf6eb885", -#' col_select = cols, -#' row_filters = row_filter +#' "bcc860a4-49f4-4232-a76b-f559cf6eb885", +#' col_select = cols, +#' row_filters = row_filter #' ) get_resource_sql <- function(sql) { - - if (length(sql) > 1) + if (length(sql) > 1) { cli::cli_abort(c( "SQL validation error.", i = "{.var sql} must be of length 1", x = "You entered an object of length {length(sql)}." )) + } - if (!("character" %in% class(sql))) + if (!("character" %in% class(sql))) { cli::cli_abort(c( "SQL validation error.", i = "{.var sql} must be of class {.cls character}", x = "You entered an object of class {.cls {class(sql)[1]}}." )) + } # remove spaces sql <- gsub(" ", "", sql) sql <- gsub("\n", "", sql) # check query is a SELECT statement - if (substr(sql, 1, 6) != "SELECT") + if (substr(sql, 1, 6) != "SELECT") { cli::cli_abort(c( "SQL validation error.", i = "{.var sql} must start with SELECT" )) + } # add query field prefix query <- paste0("sql=", sql) @@ -91,19 +93,19 @@ get_resource_sql <- function(sql) { # get correct order of columns order <- purrr::map_chr( content$result$fields, - ~.x$id + ~ .x$id ) # extract the records (rows) from content data <- purrr::map_dfr( content$result$records, - ~{ - # replace NULL with "" so tibble works - is_null <- purrr::map_lgl(.x, is.null) - .x[is_null] <- "" + ~ { + # replace NULL with "" so tibble works + is_null <- purrr::map_lgl(.x, is.null) + .x[is_null] <- "" - tibble::as_tibble(.x) - } + tibble::as_tibble(.x) + } ) # select and reorder columns to reflect @@ -123,6 +125,4 @@ get_resource_sql <- function(sql) { } return(cleaner) - } - diff --git a/R/parse_col_select.R b/R/parse_col_select.R index c0980dd..82232b2 100644 --- a/R/parse_col_select.R +++ b/R/parse_col_select.R @@ -4,12 +4,11 @@ #' @param col_select a character vector identifying the columns to select. #' @return a character string parse_col_select <- function(col_select) { - - if (is.null(col_select)) + if (is.null(col_select)) { return(NULL) + } return( paste0(col_select, collapse = ",") ) - } diff --git a/R/parse_error.R b/R/parse_error.R index 1cc9f07..353d91a 100644 --- a/R/parse_error.R +++ b/R/parse_error.R @@ -14,13 +14,11 @@ parse_error <- function(error) { # special case for validation errors if (error_type == "Validation Error") { - error_output <- paste0(names(error[1][1]), ": ", error[1][[1]]) # translate message for package users error_output <- sub("fields", "col_select", error_output) error_output <- sub("q", "row_filters", error_output) - } # special case for SQL validation errors diff --git a/R/parse_row_filters.R b/R/parse_row_filters.R index 1cc8ce4..059d5ef 100644 --- a/R/parse_row_filters.R +++ b/R/parse_row_filters.R @@ -3,20 +3,21 @@ #' @param row_filters list or named vectors matching fileds to values #' @return a json as a character string parse_row_filters <- function(row_filters) { - # exit function if no filters - if (is.null(row_filters)) + if (is.null(row_filters)) { return(NULL) + } # check if any filters in list have length > 1 too_many <- sapply(row_filters, length) > 1 - if (any(too_many)) + if (any(too_many)) { cli::cli_abort(c( "Invalid input for {.var row_filters}", i = "{names(row_filters)[which(too_many)]} in {.var row_filters} has too many values. ", x = "The {.var row_filters} list must only contain vectors of length 1." )) + } # check if any items in the list/vector have the same name # find number of unique names @@ -26,19 +27,20 @@ parse_row_filters <- function(row_filters) { # if same, all names are unique unique_names <- n_u_row_filters == n_row_filters - if (!unique_names) + if (!unique_names) { cli::cli_abort(c( "Invalid input for {.var row_filters}", x = "One or more elements in {.var row_filters} have the same name.", i = "Only one filter per field is currently supported by `get_resource`." )) + } filter_body <- paste0( - '"', names(row_filters), '":"', row_filters, '"', collapse = "," + '"', names(row_filters), '":"', row_filters, '"', + collapse = "," ) return( - paste0('{', filter_body, '}') + paste0("{", filter_body, "}") ) - } diff --git a/R/phs_GET.R b/R/phs_GET.R index d7fd156..f5e588a 100644 --- a/R/phs_GET.R +++ b/R/phs_GET.R @@ -6,7 +6,6 @@ #' @return content of a httr::GET request #' phs_GET <- function(action, query, verbose = FALSE) { - # define URL url <- request_url(action, query) @@ -19,11 +18,12 @@ phs_GET <- function(action, query, verbose = FALSE) { ) # Check for response from server - if (!inherits(response, "response")) + if (!inherits(response, "response")) { cli::cli_abort(c( "Can't connect to the CKAN server.", i = "Check your network/proxy settings." )) + } # extract content from HTTP response content <- httr::content( @@ -35,5 +35,4 @@ phs_GET <- function(action, query, verbose = FALSE) { if (verbose) cat("GET request successful.\n") return(content) - } diff --git a/R/request_url.R b/R/request_url.R index 7aa65b0..0798ec9 100644 --- a/R/request_url.R +++ b/R/request_url.R @@ -5,26 +5,26 @@ #' @return a URL as a character string #' request_url <- function(action, query) { - # check action is valid valid_actions <- c("datastore_search", "datastore_search_sql", "dump", "package_show", "package_list") - if (!(action %in% valid_actions)) + if (!(action %in% valid_actions)) { cli::cli_abort(c( "API call failed.", x = "Invalid {.var action} argument in request." )) + } # return dump URL - if (action == "dump") + if (action == "dump") { return(paste0( "https://www.opendata.nhs.scot/datastore/dump/", query, "?bom=true" )) + } # return standard API endpoint (i.e., not dump) return(paste0( "https://www.opendata.nhs.scot/api/3/action/", action, "?", query )) - } diff --git a/R/suggest_dataset_name.R b/R/suggest_dataset_name.R index f1dd34b..0a61cbe 100644 --- a/R/suggest_dataset_name.R +++ b/R/suggest_dataset_name.R @@ -4,7 +4,6 @@ #' @param dataset_name a string to be matched against valid dataset names #' suggest_dataset_name <- function(dataset_name) { - content <- phs_GET("package_list", "") dataset_names <- unlist(content$result) @@ -13,13 +12,14 @@ suggest_dataset_name <- function(dataset_name) { string_distances <- stringdist::stringdist(dataset_name, dataset_names) # if min distance is too big, abort - if (min(string_distances) > 10) + if (min(string_distances) > 10) { cli::cli_abort(c( "Can't find the dataset name {.var {dataset_name}}, or a close match.", i = "Find a dataset's name in the URL of it's page on {.url www.opendata.nhs.scot.}" )) + } # find closet match closest_match <- dataset_names[which.min(string_distances)] @@ -29,5 +29,4 @@ suggest_dataset_name <- function(dataset_name) { "Can't find the dataset name {.var {dataset_name}}.", "i" = "Did you mean '{closest_match}'?" )) - } diff --git a/R/use_dump_check.R b/R/use_dump_check.R index 632d96b..15d9ba1 100644 --- a/R/use_dump_check.R +++ b/R/use_dump_check.R @@ -5,7 +5,6 @@ #' @return a logical value. TRUE indicates that the dump should be used #' use_dump_check <- function(query, rows) { - # if row input is > 99999 or NULL # or all queries (inc. rows) are null # then use GET datastore_dump @@ -21,7 +20,7 @@ use_dump_check <- function(query, rows) { # if user queried the data queried <- !is.null(query$q) || !is.null(query$filter) || !is.null(query$fields) - if (queried && use_dump) + if (queried && use_dump) { cli::cli_warn(c( "Invalid combination of {.var rows}, {.var row_filters} and/or {.var col_select}.", @@ -29,12 +28,13 @@ use_dump_check <- function(query, rows) { AND query its rows/columns.", i = "ALL rows and columns of the resource will be downloaded." )) + } # warn users if they haven't queried # the data but have requested rows > 99999 if (is.null(rows)) rows <- 0 - if (!queried && rows > 99999) + if (!queried && rows > 99999) { cli::cli_warn(c( "Getting all rows of resource.", i = "All rows will be returned if you @@ -42,8 +42,8 @@ use_dump_check <- function(query, rows) { i = "You set {.var rows} to {format(rows, big.mark = ',', scientific = FALSE)}" )) + } return(use_dump) } - diff --git a/README.Rmd b/README.Rmd index a5083a8..5c03111 100644 --- a/README.Rmd +++ b/README.Rmd @@ -16,7 +16,9 @@ knitr::opts_chunk$set( # phsopendata -[![R build status](https://github.com/Public-Health-Scotland/phsopendata/workflows/R-CMD-check/badge.svg)](https://github.com/Public-Health-Scotland/phsopendata/actions) +[![GitHub release (latest by date)](https://img.shields.io/github/v/release/Public-Health-Scotland/phsopendata)](https://github.com/Public-Health-Scotland/phsopendata/releases/latest) +[![R-CMD-check](https://github.com/Public-Health-Scotland/phsopendata/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/Public-Health-Scotland/phsopendata/actions/workflows/R-CMD-check.yaml) +[![Codecov test coverage](https://codecov.io/gh/Public-Health-Scotland/phsopendata/branch/master/graph/badge.svg)](https://app.codecov.io/gh/Public-Health-Scotland/phsopendata?branch=master) `phsopendata` contains functions to interact with open data from the [Scottish Health and Social Care Open Data platform](https://www.opendata.nhs.scot/) via the CKAN API. @@ -87,14 +89,14 @@ In this example, we are downloading GP Practice Population Demographics from: [o ```{r example dataset, eval = FALSE} -# if max_resources is not set, all resources will be returned by default. +# if max_resources is not set, all resources will be returned by default. # Here we pull 10 rows from the first 2 resources only get_dataset("gp-practice-populations", max_resources = 2, rows = 10) ``` ## Contributing to phsopendata -At present, this package is maintained by [David Aikman](https://github.com/daikman). +At present, this package is maintained by [Csilla Scharle](https://github.com/csillasch). If you have requests or suggestions for additional functionality, please contact the package maintainer and/or the [PHS Open Data team](phs.opendata@phs.scot). diff --git a/README.md b/README.md index d2d8623..e8aad17 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,14 @@ -phsopendata -=========== +# phsopendata -[![R build -status](https://github.com/Public-Health-Scotland/phsopendata/workflows/R-CMD-check/badge.svg)](https://github.com/Public-Health-Scotland/phsopendata/actions) +[![GitHub release (latest by +date)](https://img.shields.io/github/v/release/Public-Health-Scotland/phsopendata)](https://github.com/Public-Health-Scotland/phsopendata/releases/latest) +[![R-CMD-check](https://github.com/Public-Health-Scotland/phsopendata/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/Public-Health-Scotland/phsopendata/actions/workflows/R-CMD-check.yaml) +[![Codecov test +coverage](https://codecov.io/gh/Public-Health-Scotland/phsopendata/branch/master/graph/badge.svg)](https://app.codecov.io/gh/Public-Health-Scotland/phsopendata?branch=master) `phsopendata` contains functions to interact with open data from the @@ -26,8 +28,7 @@ For extracting metadata and search functionality, we recommend using the RStudio. However, depending on firewall settings, proxy use may need to be configured with `use_proxy()`. -Installation ------------- +## Installation You need to install `phsopendata` from GitHub, which requires a package like `remotes` or `devtools`. @@ -38,16 +39,14 @@ Using `remotes` you run this to install the package: upgrade = "never" ) -Examples --------- +## Examples ### Downloading a data table with `get_resource()` To extract a specific resource, you will need it’s unique identifier - resource id. This can be found in the dataset metadata, the URL of a -resource’s page on -https://www.opendata.nhs.scot/, -or extracted using `ckanr::package_show`. +resource’s page on , or extracted using +`ckanr::package_show`. library(phsopendata) @@ -93,15 +92,14 @@ from: [opendata.nhs.scot/dataset/*gp-practice-populations*](https://www.opendata.nhs.scot/dataset/gp-practice-populations), so the dataset name will be gp-practice-populations. - # if max_resources is not set, all resources will be returned by default. + # if max_resources is not set, all resources will be returned by default. # Here we pull 10 rows from the first 2 resources only get_dataset("gp-practice-populations", max_resources = 2, rows = 10) -Contributing to phsopendata ---------------------------- +## Contributing to phsopendata -At present, this package is maintained by the [PHS Open Data -team](phs.opendata@phs.scot). +At present, this package is maintained by [Csilla +Scharle](https://github.com/csillasch). If you have requests or suggestions for additional functionality, please contact the package maintainer and/or the [PHS Open Data diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..0dcfc08 --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,4 @@ +url: https://public-health-scotland.github.io/phsopendata/ +template: + bootstrap: 5 + diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..04c5585 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,14 @@ +comment: false + +coverage: + status: + project: + default: + target: auto + threshold: 1% + informational: true + patch: + default: + target: auto + threshold: 1% + informational: true diff --git a/tests/testthat/test-dump_download.R b/tests/testthat/test-dump_download.R index befa3a2..ebf1c75 100644 --- a/tests/testthat/test-dump_download.R +++ b/tests/testthat/test-dump_download.R @@ -1,17 +1,13 @@ test_that("throws error for non-existent res_ids", { - expect_error( data <- phsopendata:::dump_download("not-real"), regexp = "Can't find resource with ID" ) - }) test_that("downloads full resource", { - data <- phsopendata:::dump_download("a794d603-95ab-4309-8c92-b48970478c14") expect_equal(nrow(data), 926) expect_equal(ncol(data), 15) - }) diff --git a/tests/testthat/test-error_check.R b/tests/testthat/test-error_check.R index 1b55343..745ed17 100644 --- a/tests/testthat/test-error_check.R +++ b/tests/testthat/test-error_check.R @@ -1,4 +1,4 @@ -test_that("returns nothing if no error", { +test_that("returns nothing if no error", { content <- httr::content( httr::GET( phsopendata:::request_url("package_list", "") @@ -9,7 +9,7 @@ test_that("returns nothing if no error", { ) }) -test_that("throws error if error in httr content", { +test_that("throws error if error in httr content", { content <- httr::content( httr::GET( phsopendata:::request_url("datastore_search", "id=doop") diff --git a/tests/testthat/test-get_dataset.R b/tests/testthat/test-get_dataset.R index debcf35..cab33b0 100644 --- a/tests/testthat/test-get_dataset.R +++ b/tests/testthat/test-get_dataset.R @@ -12,11 +12,12 @@ test_that("returns data in the expected format", { test_that("errors properly", { expect_error(get_dataset("Mal-formed-name"), - regexp = "The dataset name supplied `Mal-formed-name` is invalid" + regexp = "The dataset name supplied `Mal-formed-name` is invalid" ) expect_error(get_dataset("dataset-name-with-no-close-match"), - regexp = "Can't find the dataset name `dataset-name-with-no-close-match`" + regexp = "Can't find the dataset name `dataset-name-with-no-close-match`" ) expect_error(get_dataset("gp-practice-population"), - regexp = "Did you mean 'gp-practice-populations'?") + regexp = "Did you mean 'gp-practice-populations'?" + ) }) diff --git a/tests/testthat/test-get_resource.R b/tests/testthat/test-get_resource.R index 469e775..bdb44f6 100644 --- a/tests/testthat/test-get_resource.R +++ b/tests/testthat/test-get_resource.R @@ -74,5 +74,4 @@ test_that("first 99999 rows returned if query matches > 99999 rows", { ) expect_true(nrow(df) == 99999) - }) diff --git a/tests/testthat/test-get_resource_sql.R b/tests/testthat/test-get_resource_sql.R index e0ba982..e3f7c38 100644 --- a/tests/testthat/test-get_resource_sql.R +++ b/tests/testthat/test-get_resource_sql.R @@ -1,5 +1,4 @@ test_that("throws errors on invalid sql argument", { - # wrong class expect_error( get_resource_sql(9000), @@ -17,11 +16,9 @@ test_that("throws errors on invalid sql argument", { get_resource_sql("this is wrong"), regexp = "`sql` must start with SELECT" ) - }) test_that("gets expected data", { - sql <- " SELECT \"TotalCancelled\",\"TotalOperations\",\"Hospital\",\"Month\" @@ -37,11 +34,9 @@ test_that("gets expected data", { c("TotalCancelled", "TotalOperations", "Hospital", "Month"), names(df) ) - }) test_that("SQL errors", { - # non-existent column in real table expect_error( get_resource_sql( @@ -66,5 +61,4 @@ test_that("SQL errors", { ), regexp = "syntax error at or near \"'donut'\"" ) - }) diff --git a/tests/testthat/test-parse_error.R b/tests/testthat/test-parse_error.R index 639ab69..941689a 100644 --- a/tests/testthat/test-parse_error.R +++ b/tests/testthat/test-parse_error.R @@ -1,4 +1,4 @@ -test_that("correctly extracts error from API response", { +test_that("correctly extracts error from API response", { content <- httr::content( httr::GET( phsopendata:::request_url("datastore_search", "id=doop") @@ -18,5 +18,4 @@ test_that("correctly extracts error from API response", { phsopendata:::parse_error(content$error), "resource_id: Missing value" ) - }) diff --git a/tests/testthat/test-phs_GET.R b/tests/testthat/test-phs_GET.R index 9490ea1..28b6e24 100644 --- a/tests/testthat/test-phs_GET.R +++ b/tests/testthat/test-phs_GET.R @@ -1,15 +1,11 @@ - test_that("returns httr::content", { - x <- phsopendata:::phs_GET("package_list", "") expect_true( !is.null(x$help) && !is.null(x$success) ) - }) -test_that("error_check() works as expected", { - +test_that("error_check() works as expected", { # no error for valid endpoint expect_type( phsopendata:::phs_GET("package_list", ""), @@ -21,15 +17,12 @@ test_that("error_check() works as expected", { phsopendata:::phs_GET("datastore_search", "id=doop"), regexp = 'Resource "doop" was not found.' ) - }) -test_that("request_url() works as expected", { - +test_that("request_url() works as expected", { # invalid action argument expect_error( phsopendata:::phs_GET("", ""), regexp = "API call failed" ) - }) diff --git a/tests/testthat/test-request_url.R b/tests/testthat/test-request_url.R index 28a4235..898c74a 100644 --- a/tests/testthat/test-request_url.R +++ b/tests/testthat/test-request_url.R @@ -1,5 +1,4 @@ test_that("returns correct URL format", { - expect_equal( phsopendata:::request_url("datastore_search", "id=doop"), "https://www.opendata.nhs.scot/api/3/action/datastore_search?id=doop" @@ -9,7 +8,6 @@ test_that("returns correct URL format", { phsopendata:::request_url("dump", "id=doop"), "https://www.opendata.nhs.scot/datastore/dump/id=doop?bom=true" ) - }) test_that("rejects invalid actions", { diff --git a/tests/testthat/test-use_dump_check.R b/tests/testthat/test-use_dump_check.R index 97e9e6b..0bb1979 100644 --- a/tests/testthat/test-use_dump_check.R +++ b/tests/testthat/test-use_dump_check.R @@ -1,5 +1,4 @@ test_that("returns true as expected", { - # all are null expect_true( phsopendata:::use_dump_check(list(), NULL) @@ -24,11 +23,9 @@ test_that("returns true as expected", { phsopendata:::use_dump_check(list(q = 4), 100000) ) ) - }) test_that("returns false as expected", { - # rows is NULL and query list is not all NULL expect_false( phsopendata:::use_dump_check(list(fields = "Age"), NULL) @@ -43,5 +40,4 @@ test_that("returns false as expected", { expect_false( phsopendata:::use_dump_check(list(), 100) ) - })