diff --git a/NAMESPACE b/NAMESPACE index 8306631b..0b9d2cd4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,7 @@ export(native_anywhere_in_australia) export(standardise_names) export(state_diversity_counts) export(strip_names) +export(strip_names_2) export(update_taxonomy) import(dplyr) import(stringr) diff --git a/R/match_taxa.R b/R/match_taxa.R index cf117f6c..b7852eec 100644 --- a/R/match_taxa.R +++ b/R/match_taxa.R @@ -519,10 +519,7 @@ match_taxa <- function( taxonomic_dataset = NA_character_, taxon_rank = "genus", aligned_name_tmp = paste0(stringr::word(cleaned_name,1), " sp. [", cleaned_name), - aligned_name = ifelse(is.na(identifier_string2), - paste0(aligned_name_tmp, "]"), - paste0(aligned_name_tmp, identifier_string2, "]") - ), + aligned_name = NA, aligned_reason = paste0( "Taxon name includes '--' (double dash) indicating an intergrade between two taxa, but exact and fuzzy matches fail to align to a genus in the APC or APNI (", Sys.Date(), @@ -724,10 +721,7 @@ match_taxa <- function( taxonomic_dataset = NA_character_, taxon_rank = "genus", aligned_name_tmp = paste0(stringr::word(cleaned_name,1), " sp. [", cleaned_name), - aligned_name = ifelse(is.na(identifier_string2), - paste0(aligned_name_tmp, "]"), - paste0(aligned_name_tmp, identifier_string2, "]") - ), + aligned_name = NA, aligned_reason = paste0( "Taxon name includes '/' (slash) indicating an uncertain species identification but an accepted genus and taxon can only be aligned to genus-rank. Exact and fuzzy matches fail to align to a genus in the APC or APNI (", Sys.Date(), @@ -1032,10 +1026,7 @@ match_taxa <- function( taxonomic_dataset = NA_character_, taxon_rank = "genus", aligned_name_tmp = paste0(stringr::word(cleaned_name,1), " sp. [", cleaned_name), - aligned_name = ifelse(is.na(identifier_string2), - paste0(aligned_name_tmp, "]"), - paste0(aligned_name_tmp, identifier_string2, "]") - ), + aligned_name = NA, aligned_reason = paste0( "Taxon name includes 'affinis' or 'aff' indicating an unknown taxon that bears an affinity to a different taxon in the same genus and taxon can only be aligned to genus-rank. Exact and fuzzy matches fail to align to a genus in the APC or APNI ", Sys.Date(), @@ -1299,10 +1290,7 @@ match_taxa <- function( taxonomic_dataset = NA_character_, taxon_rank = "genus", aligned_name_tmp = paste0(stringr::word(cleaned_name,1), " x [", cleaned_name), - aligned_name = ifelse(is.na(identifier_string2), - paste0(aligned_name_tmp, "]"), - paste0(aligned_name_tmp, identifier_string2, "]") - ), + aligned_name = NA, aligned_reason = paste0( "Taxon name includes ' x ' indicating a hybrid taxon and taxon can only be aligned to genus-rank. Exact and fuzzy matches fail to align to a genus in the APC or APNI (", Sys.Date(), diff --git a/R/strip_names.R b/R/strip_names.R index 07d53ec3..5c811e82 100644 --- a/R/strip_names.R +++ b/R/strip_names.R @@ -37,7 +37,7 @@ strip_names <- function(taxon_names) { #' Strip taxonomic names of subtaxa designations, filled words and special characters #' #' Given a vector of taxonomic names, this function removes subtaxa designations ("subsp.", "var.", "f.", and "ser"), -#' additional filler words and characters (" x " [hybrid taxa], "sp.", "cf"), +#' additional filler words and characters (" x " for hybrid taxa, "sp.", "cf"), #' special characters (e.g., "-", ".", "(", ")", "?"), and extra whitespace. The resulting vector #' of names is also converted to lowercase. #' @@ -54,7 +54,7 @@ strip_names <- function(taxon_names) { #' "Acacia sp.", #' "Lepidium sp. Tanguin Hill (K.R.Newbey 10501)")) #' -#' @noRd +#' @export strip_names_2 <- function(taxon_names) { taxon_names %>% stringr::str_replace_all("\\.", "") %>% diff --git a/R/update_taxonomy.R b/R/update_taxonomy.R index 64196e33..501e6590 100644 --- a/R/update_taxonomy.R +++ b/R/update_taxonomy.R @@ -148,14 +148,13 @@ update_taxonomy <- function(aligned_data, dplyr::bind_rows(taxa_blank) %>% dplyr::mutate( suggested_name = ifelse(is.na(suggested_name), aligned_name, suggested_name), - suggested_name = ifelse(is.na(suggested_name), original_name, suggested_name), update_reason = ifelse(taxonomic_status_aligned == "accepted", "aligned name accepted by APC", update_reason), taxonomic_status = ifelse(is.na(taxonomic_status), "unknown", taxonomic_status), taxonomic_dataset = ifelse(stringr::str_detect(taxonomic_dataset, "APC"), "APC", taxonomic_dataset), ## `genus` was the first word of the `aligned_name` in the input table; now needs to be set to NA for unknown taxa genus = ifelse(taxonomic_status == "unknown", NA_character_, genus), taxon_rank = ifelse(taxonomic_status == "unknown", NA_character_, taxon_rank), - # the next line makes everythign incosistent. If we want low, should do on loading APC + # the next line makes everything inconsistent. If we want low, should do on loading APC taxon_rank = stringr::str_to_lower(taxon_rank), canonical_name = suggested_name, taxonomic_status_aligned = ifelse(is.na(taxonomic_status_aligned), NA_character_, taxonomic_status_aligned) @@ -244,14 +243,14 @@ relevel_taxonomic_status_preferred_order <- function(taxonomic_status) { update_taxonomy_APC_genus <- function(data, resources) { if(is.null(data)) return(NULL) - + data %>% # merge in columns from APC, at the genus-level dplyr::left_join( by = "genus", resources$genera_all %>% dplyr::filter(stringr::str_detect(taxonomic_dataset, "APC")) %>% - dplyr::arrange(canonical_name, taxonomic_status) %>% ### how do I specify that I want to arrange by `preferred order` + dplyr::arrange(canonical_name, taxonomic_status) %>% dplyr::distinct(canonical_name, .keep_all = TRUE) %>% dplyr::mutate( genus = canonical_name, @@ -276,8 +275,8 @@ update_taxonomy_APC_genus <- function(data, resources) { taxon_ID_genus = resources$genera_all$taxon_ID[match(accepted_name_usage_ID, resources$genera_all$accepted_name_usage_ID)], # genus names in `aligned_name` that are not APC-accepted need to be updated to their current name in `suggested_name` aligned_minus_genus = ifelse(is.na(genus_accepted), NA, stringr::str_replace(aligned_name, extract_genus(aligned_name), "")), - suggested_name = ifelse(taxonomic_status == "accepted", paste0(genus_accepted, aligned_minus_genus), NA), - suggested_name = ifelse(taxonomic_status != "accepted", aligned_name, suggested_name), + # if there is an APC-accepted genus, replace whatever the initial genus was with the accepted genus, otherwise the suggested name is the aligned name + suggested_name = ifelse(taxonomic_status == "genus accepted", paste0(genus_accepted, aligned_minus_genus), aligned_name), # indicate taxonomic_status of the genus name in `aligned_name` and why it needed to be updated for the `suggested_name` genus_update_reason = as.character(my_order), genus = genus_accepted, diff --git a/_pkgdown.yml b/_pkgdown.yml index b084bffd..c82eaa02 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -36,6 +36,7 @@ reference: - update_taxonomy - standardise_names - strip_names + - strip_names_2 - subtitle: Established status across states/territories - contents: - create_species_state_origin_matrix diff --git a/man/strip_names_2.Rd b/man/strip_names_2.Rd new file mode 100644 index 00000000..9b1d2dfa --- /dev/null +++ b/man/strip_names_2.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/strip_names.R +\name{strip_names_2} +\alias{strip_names_2} +\title{Strip taxonomic names of subtaxa designations, filled words and special characters} +\usage{ +strip_names_2(taxon_names) +} +\arguments{ +\item{taxon_names}{A character vector of taxonomic names to be stripped.} +} +\value{ +A character vector of stripped taxonomic names, with subtaxa designations, special +characters, additional filler words and extra whitespace removed, and all letters converted to lowercase. +} +\description{ +Given a vector of taxonomic names, this function removes subtaxa designations ("subsp.", "var.", "f.", and "ser"), +additional filler words and characters (" x " for hybrid taxa, "sp.", "cf"), +special characters (e.g., "-", ".", "(", ")", "?"), and extra whitespace. The resulting vector +of names is also converted to lowercase. +} +\examples{ +strip_names_2(c("Abies lasiocarpa subsp. lasiocarpa", + "Quercus kelloggii", + "Pinus contorta var. latifolia", + "Acacia sp.", + "Lepidium sp. Tanguin Hill (K.R.Newbey 10501)")) + +} diff --git a/tests/testthat/benchmarks/test_matches_alignments_updates.csv b/tests/testthat/benchmarks/test_matches_alignments_updates.csv index 5c19db7a..5029abf0 100644 --- a/tests/testthat/benchmarks/test_matches_alignments_updates.csv +++ b/tests/testthat/benchmarks/test_matches_alignments_updates.csv @@ -31,8 +31,8 @@ Aporuelliaa abc--def,match_03c,Aporuellia sp. [Aporuelliaa abc--def; test_all_ma Driandra abc--def,match_03c,Dryandra sp. [Driandra abc--def; test_all_matches_TRUE],APC,genus,Banksia,FALSE,https://id.biodiversity.org.au/instance/apni/865048,https://id.biodiversity.org.au/name/apni/77744,Dryandra R.Br. Xyystidium abc--def,match_03d,Xystidium sp. [Xyystidium abc--def; test_all_matches_TRUE],APNI,genus,Xystidium,FALSE,NA,https://id.biodiversity.org.au/name/apni/244613,Xystidium Trin. Zygiaa abc--def,match_03d,Zygia sp. [Zygiaa abc--def; test_all_matches_TRUE],APNI,genus,Zygia,FALSE,NA,https://id.biodiversity.org.au/name/apni/65077,Zygia P.Browne -Abcde fgh -- ijk,match_03e,Abcde sp. [Abcde fgh -- ijk; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA -Ryandra abc--def,match_03e,Ryandra sp. [Ryandra abc--def; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA +Abcde fgh -- ijk,match_03e,NA,NA,genus,NA,TRUE,NA,NA,NA +Ryandra abc--def,match_03e,NA,NA,genus,NA,TRUE,NA,NA,NA Abildgaardia odontocarpa / Abildgaardia oxystachya,match_04a,Abildgaardia sp. [Abildgaardia odontocarpa / Abildgaardia oxystachya; test_all_matches_TRUE],APC,genus,Abildgaardia,FALSE,https://id.biodiversity.org.au/node/apni/2905759,https://id.biodiversity.org.au/name/apni/55984,Abildgaardia Vahl Acanthocarpus fimbriatus / Acanthocarpus mucronatus,match_04a,Acanthocarpus sp. [Acanthocarpus fimbriatus / Acanthocarpus mucronatus; test_all_matches_TRUE],APC,genus,Acanthocarpus,FALSE,https://id.biodiversity.org.au/node/apni/2899190,https://id.biodiversity.org.au/name/apni/72610,Acanthocarpus Lehm. Acanthocarpus fimbriatus / mucronatus,match_04a,Acanthocarpus sp. [Acanthocarpus fimbriatus / mucronatus; test_all_matches_TRUE],APC,genus,Acanthocarpus,FALSE,https://id.biodiversity.org.au/node/apni/2899190,https://id.biodiversity.org.au/name/apni/72610,Acanthocarpus Lehm. @@ -51,8 +51,8 @@ Aporuelliaa abc / def,match_04c,Aporuellia sp. [Aporuelliaa abc / def; test_all_ Drrandra abc / def,match_04c,Dryandra sp. [Drrandra abc / def; test_all_matches_TRUE],APC,genus,Banksia,FALSE,https://id.biodiversity.org.au/instance/apni/865048,https://id.biodiversity.org.au/name/apni/77744,Dryandra R.Br. Xyystidium abc/def,match_04d,Xystidium sp. [Xyystidium abc/def; test_all_matches_TRUE],APNI,genus,Xystidium,FALSE,NA,https://id.biodiversity.org.au/name/apni/244613,Xystidium Trin. Zygiaa abc / def,match_04d,Zygia sp. [Zygiaa abc / def; test_all_matches_TRUE],APNI,genus,Zygia,FALSE,NA,https://id.biodiversity.org.au/name/apni/65077,Zygia P.Browne -Abcde fgh / ijk,match_04e,Abcde sp. [Abcde fgh / ijk; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA -Ryandra abc / def,match_04e,Ryandra sp. [Ryandra abc / def; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA +Abcde fgh / ijk,match_04e,NA,NA,genus,NA,TRUE,NA,NA,NA +Ryandra abc / def,match_04e,NA,NA,genus,NA,TRUE,NA,NA,NA Cycas candida K.D.Hill,match_05a,Cycas candida,APC,species,Cycas candida,TRUE,https://id.biodiversity.org.au/node/apni/2893335,https://id.biodiversity.org.au/name/apni/188177,Cycas candida K.D.Hill Eremophila papillata Chinnock,match_05a,Eremophila papillata,APC,species,Eremophila papillata,TRUE,https://id.biodiversity.org.au/node/apni/2910890,https://id.biodiversity.org.au/name/apni/207453,Eremophila papillata Chinnock Acalypha indica var. australis F.M.Bailey,match_05b,Acalypha indica var. australis,APC,variety,Acalypha lanceolata,TRUE,https://id.biodiversity.org.au/instance/apni/889946,https://id.biodiversity.org.au/name/apni/72588,Acalypha indica var. australis F.M.Bailey @@ -125,8 +125,8 @@ Aporuelliaa aff def,match_09c,Aporuellia sp. [Aporuelliaa aff. def; test_all_mat Drrandra affinis def,match_09c,Dryandra sp. [Drrandra aff. def; test_all_matches_TRUE],APC,genus,Banksia,FALSE,https://id.biodiversity.org.au/instance/apni/865048,https://id.biodiversity.org.au/name/apni/77744,Dryandra R.Br. Xyystidium aff. abc,match_09d,Xystidium sp. [Xyystidium aff. abc; test_all_matches_TRUE],APNI,genus,Xystidium,FALSE,NA,https://id.biodiversity.org.au/name/apni/244613,Xystidium Trin. Zygiaa aff. abc,match_09d,Zygia sp. [Zygiaa aff. abc; test_all_matches_TRUE],APNI,genus,Zygia,FALSE,NA,https://id.biodiversity.org.au/name/apni/65077,Zygia P.Browne -Abcde affinis fgh,match_09e,Abcde sp. [Abcde aff. fgh; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA -Rryandra aff def,match_09e,Rryandra sp. [Rryandra aff. def; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA +Abcde affinis fgh,match_09e,NA,NA,genus,NA,TRUE,NA,NA,NA +Rryandra aff def,match_09e,NA,NA,genus,NA,TRUE,NA,NA,NA Aceeena x ovinaaa,match_10a,Acaena x ovina,APC,species,Acaena x ovina,FALSE,https://id.biodiversity.org.au/taxon/apni/51446291,https://id.biodiversity.org.au/name/apni/72209,Acaena x ovina A.Cunn. Banksiia serrratte,match_10a,Banksia serrata,APC,species,Banksia serrata,TRUE,https://id.biodiversity.org.au/taxon/apni/51293610,https://id.biodiversity.org.au/name/apni/109014,Banksia serrata L.f. Eremoophila opppositifolia ssp. rubraaa,match_10a,Eremophila oppositifolia subsp. rubra,APC,subspecies,Eremophila oppositifolia subsp. rubra,TRUE,https://id.biodiversity.org.au/node/apni/7951458,https://id.biodiversity.org.au/name/apni/117903,Eremophila oppositifolia subsp. rubra (C.T.White & W.D.Francis) Chinnock @@ -154,8 +154,8 @@ Aporuelliaa abc x def,match_11c,Aporuellia x [Aporuelliaa abc x def; test_all_ma Drrandra x def,match_11c,Dryandra x [Drrandra x def; test_all_matches_TRUE],APC,genus,Banksia,FALSE,https://id.biodiversity.org.au/instance/apni/865048,https://id.biodiversity.org.au/name/apni/77744,Dryandra R.Br. Xyystidium x def,match_11d,Xystidium x [Xyystidium x def; test_all_matches_TRUE],APNI,genus,Xystidium,FALSE,NA,https://id.biodiversity.org.au/name/apni/244613,Xystidium Trin. Zygiaa abc x Zygia def,match_11d,Zygia x [Zygiaa abc x Zygia def; test_all_matches_TRUE],APNI,genus,Zygia,FALSE,NA,https://id.biodiversity.org.au/name/apni/65077,Zygia P.Browne -Abcde fgh x ijk,match_11e,Abcde x [Abcde fgh x ijk; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA -Ryandra abc x def,match_11e,Ryandra x [Ryandra abc x def; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA +Abcde fgh x ijk,match_11e,NA,NA,genus,NA,TRUE,NA,NA,NA +Ryandra abc x def,match_11e,NA,NA,genus,NA,TRUE,NA,NA,NA Baeckea sp. murchison river,match_12a,Baeckea sp. Murchison River (M.E.Trudgen 12009),APC,species,Baeckea sp. Murchison River (M.E.Trudgen 12009),TRUE,https://id.biodiversity.org.au/node/apni/2888052,https://id.biodiversity.org.au/name/apni/191267,Baeckea sp. Murchison River (M.E.Trudgen 12009) WA Herbarium Eremophila oppositifolia rubra (needle leaves),match_12a,Eremophila oppositifolia subsp. rubra,APC,subspecies,Eremophila oppositifolia subsp. rubra,TRUE,https://id.biodiversity.org.au/node/apni/7951458,https://id.biodiversity.org.au/name/apni/117903,Eremophila oppositifolia subsp. rubra (C.T.White & W.D.Francis) Chinnock Eremophila oppositifolia rubra early collection,match_12a,Eremophila oppositifolia subsp. rubra,APC,subspecies,Eremophila oppositifolia subsp. rubra,TRUE,https://id.biodiversity.org.au/node/apni/7951458,https://id.biodiversity.org.au/name/apni/117903,Eremophila oppositifolia subsp. rubra (C.T.White & W.D.Francis) Chinnock @@ -220,5 +220,5 @@ Achneriia crevifoliaa,match_22b,Achneria sp. [Achneriia crevifoliaa; test_all_ma Actinocarpos,match_22b,Actinocarpus sp. [Actinocarpos; test_all_matches_TRUE],APC,genus,Damasonium,FALSE,https://id.biodiversity.org.au/instance/apni/884226,https://id.biodiversity.org.au/name/apni/74816,Actinocarpus R.Br. Drryandra,match_22b,Dryandra sp. [Drryandra; test_all_matches_TRUE],APC,genus,Banksia,FALSE,https://id.biodiversity.org.au/instance/apni/865048,https://id.biodiversity.org.au/name/apni/77744,Dryandra R.Br. Dryandraa,match_22b,Dryandra sp. [Dryandraa; test_all_matches_TRUE],APC,genus,Banksia,FALSE,https://id.biodiversity.org.au/instance/apni/865048,https://id.biodiversity.org.au/name/apni/77744,Dryandra R.Br. -Actiniladum sp.,NA,NA,NA,NA,NA,FALSE,NA,NA,NA -Ecalypha indica australis,NA,NA,NA,NA,NA,FALSE,NA,NA,NA +Actiniladum sp.,NA,NA,NA,NA,NA,TRUE,NA,NA,NA +Ecalypha indica australis,NA,NA,NA,NA,NA,TRUE,NA,NA,NA