Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Algorithm fixes #174

Merged
merged 8 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export(native_anywhere_in_australia)
export(standardise_names)
export(state_diversity_counts)
export(strip_names)
export(strip_names_2)
export(update_taxonomy)
import(dplyr)
import(stringr)
Expand Down
20 changes: 4 additions & 16 deletions R/match_taxa.R
Original file line number Diff line number Diff line change
Expand Up @@ -519,10 +519,7 @@ match_taxa <- function(
taxonomic_dataset = NA_character_,
taxon_rank = "genus",
aligned_name_tmp = paste0(stringr::word(cleaned_name,1), " sp. [", cleaned_name),
aligned_name = ifelse(is.na(identifier_string2),
paste0(aligned_name_tmp, "]"),
paste0(aligned_name_tmp, identifier_string2, "]")
),
aligned_name = NA,
aligned_reason = paste0(
"Taxon name includes '--' (double dash) indicating an intergrade between two taxa, but exact and fuzzy matches fail to align to a genus in the APC or APNI (",
Sys.Date(),
Expand Down Expand Up @@ -724,10 +721,7 @@ match_taxa <- function(
taxonomic_dataset = NA_character_,
taxon_rank = "genus",
aligned_name_tmp = paste0(stringr::word(cleaned_name,1), " sp. [", cleaned_name),
aligned_name = ifelse(is.na(identifier_string2),
paste0(aligned_name_tmp, "]"),
paste0(aligned_name_tmp, identifier_string2, "]")
),
aligned_name = NA,
aligned_reason = paste0(
"Taxon name includes '/' (slash) indicating an uncertain species identification but an accepted genus and taxon can only be aligned to genus-rank. Exact and fuzzy matches fail to align to a genus in the APC or APNI (",
Sys.Date(),
Expand Down Expand Up @@ -1032,10 +1026,7 @@ match_taxa <- function(
taxonomic_dataset = NA_character_,
taxon_rank = "genus",
aligned_name_tmp = paste0(stringr::word(cleaned_name,1), " sp. [", cleaned_name),
aligned_name = ifelse(is.na(identifier_string2),
paste0(aligned_name_tmp, "]"),
paste0(aligned_name_tmp, identifier_string2, "]")
),
aligned_name = NA,
aligned_reason = paste0(
"Taxon name includes 'affinis' or 'aff' indicating an unknown taxon that bears an affinity to a different taxon in the same genus and taxon can only be aligned to genus-rank. Exact and fuzzy matches fail to align to a genus in the APC or APNI ",
Sys.Date(),
Expand Down Expand Up @@ -1299,10 +1290,7 @@ match_taxa <- function(
taxonomic_dataset = NA_character_,
taxon_rank = "genus",
aligned_name_tmp = paste0(stringr::word(cleaned_name,1), " x [", cleaned_name),
aligned_name = ifelse(is.na(identifier_string2),
paste0(aligned_name_tmp, "]"),
paste0(aligned_name_tmp, identifier_string2, "]")
),
aligned_name = NA,
aligned_reason = paste0(
"Taxon name includes ' x ' indicating a hybrid taxon and taxon can only be aligned to genus-rank. Exact and fuzzy matches fail to align to a genus in the APC or APNI (",
Sys.Date(),
Expand Down
2 changes: 1 addition & 1 deletion R/strip_names.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ strip_names <- function(taxon_names) {
#' "Acacia sp.",
#' "Lepidium sp. Tanguin Hill (K.R.Newbey 10501)"))
#'
#' @noRd
#' @export
strip_names_2 <- function(taxon_names) {
taxon_names %>%
stringr::str_replace_all("\\.", "") %>%
Expand Down
12 changes: 6 additions & 6 deletions R/update_taxonomy.R
Original file line number Diff line number Diff line change
Expand Up @@ -148,14 +148,14 @@ update_taxonomy <- function(aligned_data,
dplyr::bind_rows(taxa_blank) %>%
dplyr::mutate(
suggested_name = ifelse(is.na(suggested_name), aligned_name, suggested_name),
suggested_name = ifelse(is.na(suggested_name), original_name, suggested_name),
#suggested_name = ifelse(is.na(suggested_name), original_name, suggested_name), #should be removed
ehwenk marked this conversation as resolved.
Show resolved Hide resolved
update_reason = ifelse(taxonomic_status_aligned == "accepted", "aligned name accepted by APC", update_reason),
taxonomic_status = ifelse(is.na(taxonomic_status), "unknown", taxonomic_status),
taxonomic_dataset = ifelse(stringr::str_detect(taxonomic_dataset, "APC"), "APC", taxonomic_dataset),
## `genus` was the first word of the `aligned_name` in the input table; now needs to be set to NA for unknown taxa
genus = ifelse(taxonomic_status == "unknown", NA_character_, genus),
taxon_rank = ifelse(taxonomic_status == "unknown", NA_character_, taxon_rank),
# the next line makes everythign incosistent. If we want low, should do on loading APC
# the next line makes everything inconsistent. If we want low, should do on loading APC
taxon_rank = stringr::str_to_lower(taxon_rank),
canonical_name = suggested_name,
taxonomic_status_aligned = ifelse(is.na(taxonomic_status_aligned), NA_character_, taxonomic_status_aligned)
Expand Down Expand Up @@ -244,14 +244,14 @@ relevel_taxonomic_status_preferred_order <- function(taxonomic_status) {
update_taxonomy_APC_genus <- function(data, resources) {

if(is.null(data)) return(NULL)

data %>%
# merge in columns from APC, at the genus-level
dplyr::left_join(
by = "genus",
resources$genera_all %>%
dplyr::filter(stringr::str_detect(taxonomic_dataset, "APC")) %>%
dplyr::arrange(canonical_name, taxonomic_status) %>% ### how do I specify that I want to arrange by `preferred order`
dplyr::arrange(canonical_name, taxonomic_status) %>%
dplyr::distinct(canonical_name, .keep_all = TRUE) %>%
dplyr::mutate(
genus = canonical_name,
Expand All @@ -276,8 +276,8 @@ update_taxonomy_APC_genus <- function(data, resources) {
taxon_ID_genus = resources$genera_all$taxon_ID[match(accepted_name_usage_ID, resources$genera_all$accepted_name_usage_ID)],
# genus names in `aligned_name` that are not APC-accepted need to be updated to their current name in `suggested_name`
aligned_minus_genus = ifelse(is.na(genus_accepted), NA, stringr::str_replace(aligned_name, extract_genus(aligned_name), "")),
suggested_name = ifelse(taxonomic_status == "accepted", paste0(genus_accepted, aligned_minus_genus), NA),
suggested_name = ifelse(taxonomic_status != "accepted", aligned_name, suggested_name),
suggested_name = ifelse(taxonomic_status == "genus accepted", paste0(genus_accepted, aligned_minus_genus), NA),
suggested_name = ifelse(taxonomic_status != "genus accepted", aligned_name, suggested_name),
ehwenk marked this conversation as resolved.
Show resolved Hide resolved
# indicate taxonomic_status of the genus name in `aligned_name` and why it needed to be updated for the `suggested_name`
genus_update_reason = as.character(my_order),
genus = genus_accepted,
Expand Down
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ reference:
- update_taxonomy
- standardise_names
- strip_names
- strip_names_2
- subtitle: Established status across states/territories
- contents:
- create_species_state_origin_matrix
Expand Down
29 changes: 29 additions & 0 deletions man/strip_names_2.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 10 additions & 10 deletions tests/testthat/benchmarks/test_matches_alignments_updates.csv
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ Aporuelliaa abc--def,match_03c,Aporuellia sp. [Aporuelliaa abc--def; test_all_ma
Driandra abc--def,match_03c,Dryandra sp. [Driandra abc--def; test_all_matches_TRUE],APC,genus,Banksia,FALSE,https://id.biodiversity.org.au/instance/apni/865048,https://id.biodiversity.org.au/name/apni/77744,Dryandra R.Br.
Xyystidium abc--def,match_03d,Xystidium sp. [Xyystidium abc--def; test_all_matches_TRUE],APNI,genus,Xystidium,FALSE,NA,https://id.biodiversity.org.au/name/apni/244613,Xystidium Trin.
Zygiaa abc--def,match_03d,Zygia sp. [Zygiaa abc--def; test_all_matches_TRUE],APNI,genus,Zygia,FALSE,NA,https://id.biodiversity.org.au/name/apni/65077,Zygia P.Browne
Abcde fgh -- ijk,match_03e,Abcde sp. [Abcde fgh -- ijk; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA
Ryandra abc--def,match_03e,Ryandra sp. [Ryandra abc--def; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA
Abcde fgh -- ijk,match_03e,NA,NA,genus,NA,TRUE,NA,NA,NA
Ryandra abc--def,match_03e,NA,NA,genus,NA,TRUE,NA,NA,NA
Abildgaardia odontocarpa / Abildgaardia oxystachya,match_04a,Abildgaardia sp. [Abildgaardia odontocarpa / Abildgaardia oxystachya; test_all_matches_TRUE],APC,genus,Abildgaardia,FALSE,https://id.biodiversity.org.au/node/apni/2905759,https://id.biodiversity.org.au/name/apni/55984,Abildgaardia Vahl
Acanthocarpus fimbriatus / Acanthocarpus mucronatus,match_04a,Acanthocarpus sp. [Acanthocarpus fimbriatus / Acanthocarpus mucronatus; test_all_matches_TRUE],APC,genus,Acanthocarpus,FALSE,https://id.biodiversity.org.au/node/apni/2899190,https://id.biodiversity.org.au/name/apni/72610,Acanthocarpus Lehm.
Acanthocarpus fimbriatus / mucronatus,match_04a,Acanthocarpus sp. [Acanthocarpus fimbriatus / mucronatus; test_all_matches_TRUE],APC,genus,Acanthocarpus,FALSE,https://id.biodiversity.org.au/node/apni/2899190,https://id.biodiversity.org.au/name/apni/72610,Acanthocarpus Lehm.
Expand All @@ -51,8 +51,8 @@ Aporuelliaa abc / def,match_04c,Aporuellia sp. [Aporuelliaa abc / def; test_all_
Drrandra abc / def,match_04c,Dryandra sp. [Drrandra abc / def; test_all_matches_TRUE],APC,genus,Banksia,FALSE,https://id.biodiversity.org.au/instance/apni/865048,https://id.biodiversity.org.au/name/apni/77744,Dryandra R.Br.
Xyystidium abc/def,match_04d,Xystidium sp. [Xyystidium abc/def; test_all_matches_TRUE],APNI,genus,Xystidium,FALSE,NA,https://id.biodiversity.org.au/name/apni/244613,Xystidium Trin.
Zygiaa abc / def,match_04d,Zygia sp. [Zygiaa abc / def; test_all_matches_TRUE],APNI,genus,Zygia,FALSE,NA,https://id.biodiversity.org.au/name/apni/65077,Zygia P.Browne
Abcde fgh / ijk,match_04e,Abcde sp. [Abcde fgh / ijk; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA
Ryandra abc / def,match_04e,Ryandra sp. [Ryandra abc / def; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA
Abcde fgh / ijk,match_04e,NA,NA,genus,NA,TRUE,NA,NA,NA
Ryandra abc / def,match_04e,NA,NA,genus,NA,TRUE,NA,NA,NA
Cycas candida K.D.Hill,match_05a,Cycas candida,APC,species,Cycas candida,TRUE,https://id.biodiversity.org.au/node/apni/2893335,https://id.biodiversity.org.au/name/apni/188177,Cycas candida K.D.Hill
Eremophila papillata Chinnock,match_05a,Eremophila papillata,APC,species,Eremophila papillata,TRUE,https://id.biodiversity.org.au/node/apni/2910890,https://id.biodiversity.org.au/name/apni/207453,Eremophila papillata Chinnock
Acalypha indica var. australis F.M.Bailey,match_05b,Acalypha indica var. australis,APC,variety,Acalypha lanceolata,TRUE,https://id.biodiversity.org.au/instance/apni/889946,https://id.biodiversity.org.au/name/apni/72588,Acalypha indica var. australis F.M.Bailey
Expand Down Expand Up @@ -125,8 +125,8 @@ Aporuelliaa aff def,match_09c,Aporuellia sp. [Aporuelliaa aff. def; test_all_mat
Drrandra affinis def,match_09c,Dryandra sp. [Drrandra aff. def; test_all_matches_TRUE],APC,genus,Banksia,FALSE,https://id.biodiversity.org.au/instance/apni/865048,https://id.biodiversity.org.au/name/apni/77744,Dryandra R.Br.
Xyystidium aff. abc,match_09d,Xystidium sp. [Xyystidium aff. abc; test_all_matches_TRUE],APNI,genus,Xystidium,FALSE,NA,https://id.biodiversity.org.au/name/apni/244613,Xystidium Trin.
Zygiaa aff. abc,match_09d,Zygia sp. [Zygiaa aff. abc; test_all_matches_TRUE],APNI,genus,Zygia,FALSE,NA,https://id.biodiversity.org.au/name/apni/65077,Zygia P.Browne
Abcde affinis fgh,match_09e,Abcde sp. [Abcde aff. fgh; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA
Rryandra aff def,match_09e,Rryandra sp. [Rryandra aff. def; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA
Abcde affinis fgh,match_09e,NA,NA,genus,NA,TRUE,NA,NA,NA
Rryandra aff def,match_09e,NA,NA,genus,NA,TRUE,NA,NA,NA
Aceeena x ovinaaa,match_10a,Acaena x ovina,APC,species,Acaena x ovina,FALSE,https://id.biodiversity.org.au/taxon/apni/51446291,https://id.biodiversity.org.au/name/apni/72209,Acaena x ovina A.Cunn.
Banksiia serrratte,match_10a,Banksia serrata,APC,species,Banksia serrata,TRUE,https://id.biodiversity.org.au/taxon/apni/51293610,https://id.biodiversity.org.au/name/apni/109014,Banksia serrata L.f.
Eremoophila opppositifolia ssp. rubraaa,match_10a,Eremophila oppositifolia subsp. rubra,APC,subspecies,Eremophila oppositifolia subsp. rubra,TRUE,https://id.biodiversity.org.au/node/apni/7951458,https://id.biodiversity.org.au/name/apni/117903,Eremophila oppositifolia subsp. rubra (C.T.White & W.D.Francis) Chinnock
Expand Down Expand Up @@ -154,8 +154,8 @@ Aporuelliaa abc x def,match_11c,Aporuellia x [Aporuelliaa abc x def; test_all_ma
Drrandra x def,match_11c,Dryandra x [Drrandra x def; test_all_matches_TRUE],APC,genus,Banksia,FALSE,https://id.biodiversity.org.au/instance/apni/865048,https://id.biodiversity.org.au/name/apni/77744,Dryandra R.Br.
Xyystidium x def,match_11d,Xystidium x [Xyystidium x def; test_all_matches_TRUE],APNI,genus,Xystidium,FALSE,NA,https://id.biodiversity.org.au/name/apni/244613,Xystidium Trin.
Zygiaa abc x Zygia def,match_11d,Zygia x [Zygiaa abc x Zygia def; test_all_matches_TRUE],APNI,genus,Zygia,FALSE,NA,https://id.biodiversity.org.au/name/apni/65077,Zygia P.Browne
Abcde fgh x ijk,match_11e,Abcde x [Abcde fgh x ijk; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA
Ryandra abc x def,match_11e,Ryandra x [Ryandra abc x def; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA
Abcde fgh x ijk,match_11e,NA,NA,genus,NA,TRUE,NA,NA,NA
Ryandra abc x def,match_11e,NA,NA,genus,NA,TRUE,NA,NA,NA
Baeckea sp. murchison river,match_12a,Baeckea sp. Murchison River (M.E.Trudgen 12009),APC,species,Baeckea sp. Murchison River (M.E.Trudgen 12009),TRUE,https://id.biodiversity.org.au/node/apni/2888052,https://id.biodiversity.org.au/name/apni/191267,Baeckea sp. Murchison River (M.E.Trudgen 12009) WA Herbarium
Eremophila oppositifolia rubra (needle leaves),match_12a,Eremophila oppositifolia subsp. rubra,APC,subspecies,Eremophila oppositifolia subsp. rubra,TRUE,https://id.biodiversity.org.au/node/apni/7951458,https://id.biodiversity.org.au/name/apni/117903,Eremophila oppositifolia subsp. rubra (C.T.White & W.D.Francis) Chinnock
Eremophila oppositifolia rubra early collection,match_12a,Eremophila oppositifolia subsp. rubra,APC,subspecies,Eremophila oppositifolia subsp. rubra,TRUE,https://id.biodiversity.org.au/node/apni/7951458,https://id.biodiversity.org.au/name/apni/117903,Eremophila oppositifolia subsp. rubra (C.T.White & W.D.Francis) Chinnock
Expand Down Expand Up @@ -220,5 +220,5 @@ Achneriia crevifoliaa,match_22b,Achneria sp. [Achneriia crevifoliaa; test_all_ma
Actinocarpos,match_22b,Actinocarpus sp. [Actinocarpos; test_all_matches_TRUE],APC,genus,Damasonium,FALSE,https://id.biodiversity.org.au/instance/apni/884226,https://id.biodiversity.org.au/name/apni/74816,Actinocarpus R.Br.
Drryandra,match_22b,Dryandra sp. [Drryandra; test_all_matches_TRUE],APC,genus,Banksia,FALSE,https://id.biodiversity.org.au/instance/apni/865048,https://id.biodiversity.org.au/name/apni/77744,Dryandra R.Br.
Dryandraa,match_22b,Dryandra sp. [Dryandraa; test_all_matches_TRUE],APC,genus,Banksia,FALSE,https://id.biodiversity.org.au/instance/apni/865048,https://id.biodiversity.org.au/name/apni/77744,Dryandra R.Br.
Actiniladum sp.,NA,NA,NA,NA,NA,FALSE,NA,NA,NA
Ecalypha indica australis,NA,NA,NA,NA,NA,FALSE,NA,NA,NA
Actiniladum sp.,NA,NA,NA,NA,NA,TRUE,NA,NA,NA
Ecalypha indica australis,NA,NA,NA,NA,NA,TRUE,NA,NA,NA
Loading