Skip to content

Commit

Permalink
Merge branch 'master' of github.com:traitecoevo/APCalign
Browse files Browse the repository at this point in the history
  • Loading branch information
wcornwell committed Aug 26, 2024
2 parents fa592d7 + 391a058 commit 7d5a1a3
Show file tree
Hide file tree
Showing 23 changed files with 315 additions and 99 deletions.
2 changes: 1 addition & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@
^cran-comments\.md$
^CRAN-SUBMISSION$
^revdep

^.DS_Store
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: APCalign
Title: Resolving Plant Taxon Names Using the Australian Plant Census
Version: 1.0.1
Version: 1.1.0
Authors@R: c(
person(given = "Daniel", family = "Falster", role = c("aut", "cre", "cph"), email = "[email protected]", comment = c(ORCID = "0000-0002-9814-092X")),
person(given = "Elizabeth", family = "Wenk", role = c("aut", "ctb"), email = "[email protected]", comment = c(ORCID = "0000-0001-5640-5910")),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export(align_taxa)
export(create_species_state_origin_matrix)
export(create_taxonomic_update_lookup)
export(default_version)
export(get_apc_genus_family_lookup)
export(load_taxonomic_resources)
export(native_anywhere_in_australia)
export(standardise_names)
Expand Down
13 changes: 13 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@

# APCalign 1.1.0

- Create a genus->family lookup from the specified APC release

# APCalign 1.0.2

Minor update to fix

- Deal with the vignette issues that emerged on CRAN
- Improve "graceful failing", based on issues that have come up on github CI
- minor formatting

# APCalign 1.0.1

First major release of APCalign. A preprint is available at
Expand Down
10 changes: 8 additions & 2 deletions R/align_taxa.R
Original file line number Diff line number Diff line change
Expand Up @@ -155,10 +155,11 @@
#'
#' @examples
#' \donttest{
#'
#' resources <- load_taxonomic_resources()
#'
#' # example 1
#' align_taxa(c("Poa annua", "Abies alba"), resources = resources)
#' align_taxa(c("Poa annua", "Abies alba"), resources=resources)
#'
#' # example 2
#' input <- c("Banksia serrata", "Banksia serrate", "Banksia cerrata",
Expand All @@ -169,7 +170,7 @@
#' original_name = input,
#' identifier = "APCalign test",
#' full = TRUE,
#' resources = resources
#' resources=resources
#' )
#'
#' }
Expand All @@ -195,6 +196,11 @@ align_taxa <- function(original_name,
APNI_matches = TRUE,
identifier = NA_character_) {

if(is.null(resources)){
message("Not finding taxonomic resources; check internet connection?")
return(NULL)
}

if(!quiet)
message("Checking alignments of ",
dplyr::n_distinct(original_name, na.rm = TRUE),
Expand Down
6 changes: 6 additions & 0 deletions R/create_species_state_origin_matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@
#'
#'
create_species_state_origin_matrix <- function(resources = load_taxonomic_resources()) {

if(is.null(resources)){
message("Not finding taxonomic resources; check internet connection?")
return(NULL)
}

apc_species <- filter_data_to_accepted_species(resources)
sep_state_data <- separate_states(apc_species)
apc_places <- identify_places(sep_state_data)
Expand Down
5 changes: 5 additions & 0 deletions R/create_taxonomic_update_lookup.R
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,11 @@ create_taxonomic_update_lookup <- function(taxa,
resources = load_taxonomic_resources(),
quiet = FALSE,
output = NULL) {

if(is.null(resources)){
message("Not finding taxonomic resources; check internet connection?")
return(NULL)
}

validate_taxonomic_splits_input(taxonomic_splits)

Expand Down
12 changes: 8 additions & 4 deletions R/load_taxonomic_resources.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ load_taxonomic_resources <-
version = default_version(),
quiet = FALSE) {


if(is.null(version)){
message("No internet connection, please retry with stable connection or specify a local version of the data")
return(invisible(NULL))
}

taxonomic_resources <- dataset_access_function(
version = version,
Expand Down Expand Up @@ -312,7 +315,7 @@ dataset_access_function <-
network <- as.logical(Sys.getenv("NETWORK_UP", unset = TRUE))


if (!curl::has_internet() | !network) { # Simulate if network is down
if (!curl::has_internet() | !network| is.null(version)) { # Simulate if network is down
message("No internet connection, please retry with stable connection (dataset_access_function)")
return(invisible(NULL))
}
Expand Down Expand Up @@ -372,7 +375,8 @@ dataset_access_function <-
#' version is specified.
#'
#' @return A character string representing the default version for stable data.
#' @example default_version()
#' @examples
#' default_version()
#'
#' @export

Expand Down Expand Up @@ -420,7 +424,7 @@ dataset_get <- function(version = default_version(),
## Dummy variable to allow testing of network
network <- as.logical(Sys.getenv("NETWORK_UP", unset = TRUE))

if (!curl::has_internet() | !network) { # Simulate if network is down
if (!curl::has_internet() | !network | is.null(version)) { # Simulate if network is down
message("No internet connection, please retry with stable connection (dataset_get)")
return(invisible(NULL))
} else{
Expand Down
5 changes: 5 additions & 0 deletions R/match_taxa.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ match_taxa <- function(
identifier = NA_character_
) {

if(is.null(resources)){
message("Not finding taxonomic resources; check internet connection?")
return(NULL)
}

update_na_with <- function(current, new) {
ifelse(is.na(current), new, current)
}
Expand Down
5 changes: 5 additions & 0 deletions R/native_anywhere_in_australia.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ native_anywhere_in_australia <- function(species, resources = load_taxonomic_res
# Create lookup tables
full_lookup <- create_species_state_origin_matrix(resources = resources)

if(is.null(resources)){
message("Not finding taxonomic resources; check internet connection?")
return(NULL)
}

if (any(!species %in% full_lookup$species)) {
warning("At least one input not found in APC; make sure inputs are at the species level and consider using `create_taxonomic_update_lookup` first.")
}
Expand Down
135 changes: 68 additions & 67 deletions R/release.R
Original file line number Diff line number Diff line change
@@ -1,74 +1,75 @@
#' Download taxonomic resources for GitHub Release
#' Download taxonomic resources for GitHub Release
#'
#' @param version_name character string of version name, follow semantic versioning
#' @param path to download parquets to upload
#' @keywords internal
#' @noRd

download_taxonomic_resources_for_release<- function(version_name = NULL, path = "ignore/"){

# TODO: Use gh package to release programmatically
# body <- paste0('{"tag_name":"',version_name,'","target_commitish":"master","name":"',version_name,'","body":"Download of taxonomic resources from APC and APNI as of ',Sys.Date(),'","draft":true,"prerelease":false,"generate_release_notes":false}')
#
# # Creating release via GH API
# gh::gh("POST /repos/{owner}/{repo}/releases",
# owner = "traitecoevo", repo = "APCalign",
# charToRaw(body),
# .send_headers = c(
# Accept = "application/vnd.github.switcheroo-preview+json",
# "Content-Type" = "application/json"
# )
# )

# Download APC
APC <-
readr::read_csv(
"https://biodiversity.org.au/nsl/services/export/taxonCsv",
col_types =
readr::cols(
.default = readr::col_character(),
proParte = readr::col_logical(),
taxonRankSortOrder = readr::col_double(),
created = readr::col_datetime(format = ""),
modified = readr::col_datetime(format = "")
)
)

# Save APC as parquet
arrow::write_parquet(APC, sink = paste0(path,"apc.parquet"))
# Save APC as tar.gz
readr::write_csv(APC, file = paste0(path,"apc.tar.gz"))

# Download APNI
APNI <-
readr::read_csv(
"https://biodiversity.org.au/nsl/services/export/namesCsv",
col_types =
readr::cols(
.default = readr::col_character(),
autonym = readr::col_logical(),
hybrid = readr::col_logical(),
cultivar = readr::col_logical(),
formula = readr::col_logical(),
scientific = readr::col_logical(),
nomInval = readr::col_logical(),
nomIlleg = readr::col_logical(),
namePublishedInYear = readr::col_double(),
taxonRankSortOrder = readr::col_double(),
created = readr::col_datetime(format = ""),
modified = readr::col_datetime(format = "")
)
)

# Exclude names that are in APC from APNI
APNI_cleaned <- APNI |>
dplyr::filter(!canonicalName %in% APC$canonicalName)

# Save APNI as parquet
arrow::write_parquet(APNI_cleaned, sink = paste0(path,"apni.parquet"))

# Save APNI as tar.gz
readr::write_csv(APNI_cleaned, file = paste0(path,"apni.tar.gz"))

download_taxonomic_resources_for_release <- function(version_name = NULL,
path = "ignore/") {
# TODO: Use gh package to release programmatically
# body <- paste0('{"tag_name":"',version_name,'","target_commitish":"master","name":"',version_name,'","body":"Download of taxonomic resources from APC and APNI as of ',Sys.Date(),'","draft":true,"prerelease":false,"generate_release_notes":false}')
#
# # Creating release via GH API
# gh::gh("POST /repos/{owner}/{repo}/releases",
# owner = "traitecoevo", repo = "APCalign",
# charToRaw(body),
# .send_headers = c(
# Accept = "application/vnd.github.switcheroo-preview+json",
# "Content-Type" = "application/json"
# )
# )

dir.create("ignore/", showWarnings = FALSE)

# Download APC
APC <-
readr::read_csv(
"https://biodiversity.org.au/nsl/services/export/taxonCsv",
col_types =
readr::cols(
.default = readr::col_character(),
proParte = readr::col_logical(),
taxonRankSortOrder = readr::col_double(),
created = readr::col_datetime(format = ""),
modified = readr::col_datetime(format = "")
)
)

# Save APC as parquet
arrow::write_parquet(APC, sink = paste0(path, "apc.parquet"))
# Save APC as tar.gz
readr::write_csv(APC, file = paste0(path, "apc.tar.gz"))

# Download APNI
APNI <-
readr::read_csv(
"https://biodiversity.org.au/nsl/services/export/namesCsv",
col_types =
readr::cols(
.default = readr::col_character(),
autonym = readr::col_logical(),
hybrid = readr::col_logical(),
cultivar = readr::col_logical(),
formula = readr::col_logical(),
scientific = readr::col_logical(),
nomInval = readr::col_logical(),
nomIlleg = readr::col_logical(),
namePublishedInYear = readr::col_double(),
taxonRankSortOrder = readr::col_double(),
created = readr::col_datetime(format = ""),
modified = readr::col_datetime(format = "")
)
)

# Exclude names that are in APC from APNI
APNI_cleaned <- APNI |>
dplyr::filter(!canonicalName %in% APC$canonicalName)

# Save APNI as parquet
arrow::write_parquet(APNI_cleaned, sink = paste0(path, "apni.parquet"))

# Save APNI as tar.gz
readr::write_csv(APNI_cleaned, file = paste0(path, "apni.tar.gz"))

}

58 changes: 48 additions & 10 deletions R/state_diversity_counts.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#' @title State- and territory-level diversity
#'
#'
#' @description
#' For Australian states and territories, use geographic distribution data from
#' the APC to calculate state-level diversity for native, introduced,
#' For Australian states and territories, use geographic distribution data from
#' the APC to calculate state-level diversity for native, introduced,
#' and more complicated species origins
#'
#' @family diversity methods
Expand All @@ -26,8 +26,15 @@
#'
#' @examples
#' \donttest{state_diversity_counts(state = "NSW")}

state_diversity_counts <- function(state,
resources = load_taxonomic_resources()) {

if(is.null(resources)){
message("Not finding taxonomic resources; check internet connection?")
return(NULL)
}

valid_inputs <- c(
"NSW",
"NT",
Expand Down Expand Up @@ -71,14 +78,45 @@ state_diversity_counts <- function(state,


#' @noRd
get_apc_genus_family_lookup <-
function(resources = load_taxonomic_resources()) {
apc_s <- dplyr::filter(resources$APC,
taxon_rank == "species")
dplyr::tibble(genus = word(apc_s$scientific_name, 1, 1),
family = apc_s$family) %>%
create_apc_genus_family_lookup <-
function(resources) {
apc_s <- dplyr::filter(resources$APC, taxon_rank == "species")
dplyr::tibble(genus = word(apc_s$accepted_name_usage, 1, 1),
family = apc_s$family) |>
dplyr::distinct() -> lu
return(lu)
}


#' @title Lookup Family by Genus from APC
#'
#' @description
#' Retrieve the family name for a given genus using taxonomic data from the
#' Australian Plant Census (APC).
#'
#' @param genus A character vector of genus names for which to retrieve the
#' corresponding family names.
#' @param resources The taxonomic resources required to make the lookup.
#' Loading this can be slow, so call \code{\link{load_taxonomic_resources}}
#' separately to speed up this function and pass the resources in.
#'
#' @return A data frame with two columns: "genus", indicating the genus name,
#' and "family", indicating the corresponding family name from the APC.
#'
#' @seealso \code{\link{load_taxonomic_resources}}
#'
#' @export
#'
#' @examples
#' \donttest{get_apc_genus_family_lookup(genus = c("Acacia", "Eucalyptus"))}
get_apc_genus_family_lookup <-
function(genus, resources = load_taxonomic_resources()) {
if (is.null(resources)) {
message("Not finding taxonomic resources; check internet connection?")
return(NULL)
}
fam_lu <- create_apc_genus_family_lookup(resources = resources)
lu <- dplyr::tibble(genus = genus) %>%
dplyr::left_join(fam_lu, by = "genus")
if (any(is.na(lu$family))) warning("some non-matches with the APC accepted genus list, check the formatting of your genus vector.")
return(lu)
}
5 changes: 5 additions & 0 deletions R/update_taxonomy.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,11 @@ update_taxonomy <- function(aligned_data,
output = NULL,
resources = load_taxonomic_resources()) {

if(is.null(resources)){
message("Not finding taxonomic resources; check internet connection?")
return(NULL)
}

aligned_data <-
aligned_data %>%
dplyr::select(original_name, aligned_name, taxon_rank, taxonomic_dataset, aligned_reason) %>%
Expand Down
Loading

0 comments on commit 7d5a1a3

Please sign in to comment.