Merge branch 'develop' (preparing for 4.1.0)

traitecoevo · Jan 30, 2023 · ad077bb · ad077bb
2 parents b9e5dad + adc5c21
commit ad077bb
Show file tree

Hide file tree

Showing 155 changed files with 1,683 additions and 1,877 deletions.
diff --git a/.gitignore b/.gitignore
@@ -12,6 +12,7 @@ temp
 .cache/
 .local/
 .config/
+.vs/
 *.Rproj
 tmp*
 reports

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -43,7 +43,6 @@ Imports:
 Suggests:
     austraits,
     leaflet,
-    ggbeeswarm,
     bibtex,
     knitr,
     bench,

diff --git a/R/process.R b/R/process.R
@@ -1201,14 +1201,15 @@ process_format_methods <- function(metadata, dataset_id, sources, contributors)
       type = str_replace_all(.data$source_key, "_[:digit:]+", ""),
       source_id = metadata$source %>%
         util_list_to_df2() %>%
-        dplyr::select(.data$key)
-    ) 
+        purrr::pluck("key")
+    )
 
   source_primary_key <- metadata$source$primary$key
-  source_secondary_keys <- citation_types %>% dplyr::filter(.data$type == "secondary") %>% dplyr::select(.data$source_id) %>% as.vector() 
-  source_secondary_keys <- source_secondary_keys$source_id$key %>% as.vector()
-  source_original_dataset_keys <- citation_types %>% dplyr::filter(.data$type == "original") %>% dplyr::select(.data$source_id) %>% as.vector()
-  source_original_dataset_keys <- source_original_dataset_keys$source_id$key %>% as.vector()
+  source_secondary_keys <- citation_types %>%
+    dplyr::filter(.data$type == "secondary") %>%
+    purrr::pluck("source_id")
+
+  source_original_dataset_keys <- citation_types %>% dplyr::filter(.data$type == "original") %>% purrr::pluck("source_id")
 
   # combine collectors to add into the methods table
   collectors_tmp <-
@@ -1247,12 +1248,12 @@ process_format_methods <- function(metadata, dataset_id, sources, contributors)
           source_primary_citation = bib_print(sources[[source_primary_key]]),
           source_secondary_key = source_secondary_keys %>% paste(collapse = "; "),
           source_secondary_citation = ifelse(length(source_secondary_keys) == 0, NA_character_,
-            purrr::map_chr(sources[source_secondary_keys], bib_print) %>% paste(collapse = "; ") %>%
+            purrr::map_chr(source_secondary_keys, ~sources[[.x]] %>% bib_print) %>% paste(collapse = "; ") %>%
               stringr::str_replace_all("\\.;", ";")
             ),                    
           source_original_dataset_key = source_original_dataset_keys %>% paste(collapse = "; "),
           source_original_dataset_citation = ifelse(length(source_original_dataset_keys) == 0, NA_character_,
-            purrr::map_chr(sources[source_original_dataset_keys], bib_print) %>% paste(collapse = "; ") %>%
+            purrr::map_chr(source_original_dataset_keys, ~sources[[.x]] %>% bib_print) %>% paste(collapse = "; ") %>%
             stringr::str_replace_all("\\.;", ";")
           )
         )
@@ -1458,21 +1459,28 @@ build_update_taxonomy <- function(austraits_raw, taxa) {
     austraits_raw$taxonomic_updates %>%
     dplyr::left_join(by = "cleaned_name",
               taxa %>% dplyr::select(.data$cleaned_name, .data$cleaned_scientific_name_id, .data$cleaned_name_taxonomic_status,
-                                      .data$cleaned_name_alternative_taxonomic_status, .data$taxon_id, .data$taxon_name)
+                                      .data$cleaned_name_alternative_taxonomic_status, .data$taxon_id, .data$taxon_name, .data$taxon_rank)
               ) %>%
+    dplyr::mutate(
+      taxonomic_resolution = ifelse(!is.na(.data$taxonomic_resolution) & .data$taxonomic_resolution != .data$taxon_rank, .data$taxon_rank, .data$taxonomic_resolution),
+      taxonomic_resolution = ifelse(is.na(.data$taxonomic_resolution), .data$taxon_rank, .data$taxonomic_resolution)
+    ) %>%
     dplyr::distinct() %>%
+    dplyr::select(-.data$taxon_rank) %>%
     dplyr::arrange(.data$cleaned_name)
 
 
   austraits_raw$traits <-
     austraits_raw$traits %>%
     dplyr::rename(cleaned_name = .data$taxon_name) %>%
     dplyr::left_join(by = "cleaned_name",
-              taxa %>% dplyr::select(.data$cleaned_name, .data$taxon_name)
+              taxa %>% dplyr::select(.data$cleaned_name, .data$taxon_name, .data$taxon_rank)
               ) %>%
     dplyr::select(.data$dataset_id, .data$taxon_name, dplyr::everything()) %>%
-    dplyr::mutate(taxon_name = ifelse(is.na(.data$taxon_name), .data$cleaned_name, .data$taxon_name)) %>%
-    dplyr::mutate(taxon_name = ifelse(stringr::str_detect(.data$cleaned_name, "\\["), .data$cleaned_name, .data$taxon_name)) %>%
+    dplyr::mutate(
+      taxon_name = ifelse(is.na(.data$taxon_name), .data$cleaned_name, .data$taxon_name),
+      taxon_name = ifelse(stringr::str_detect(.data$cleaned_name, "\\["), .data$cleaned_name, .data$taxon_name)
+    ) %>%
     dplyr::select(-.data$cleaned_name)
 
 # names, identifiers for all genera
@@ -1501,31 +1509,33 @@ build_update_taxonomy <- function(austraits_raw, taxa) {
                      taxa %>% dplyr::select(.data$taxon_name, .data$taxon_rank, .data$family) %>% dplyr::distinct() %>% util_df_convert_character()
     ) 
 
+
   species_tmp <- species_tmp %>%  
-    dplyr::mutate(    
+    dplyr::mutate(
       # if no taxonomic resolution is specified, then the name's taxonomic resolution is the taxon_rank for the taxon name
-      taxon_rank = ifelse(!is.na(.data$taxonomic_resolution), .data$taxonomic_resolution, .data$taxon_rank),
+      taxonomic_resolution = ifelse(.data$taxon_name %in% taxa$cleaned_name, taxa$taxon_rank[match(.data$taxon_name, taxa$cleaned_name)], taxonomic_resolution),
+      taxon_rank = ifelse(!is.na(.data$taxon_rank), .data$taxonomic_resolution, .data$taxon_rank),
       # field trinomial is only filled in if taxonomic resolution is an infraspecific name 
-      trinomial = ifelse(.data$taxon_rank %in% c("trinomial", "Subspecies", "Forma", "Varietas"),             
+      trinomial = ifelse(.data$taxon_rank %in% c("Subspecies", "Forma", "Varietas"),             
                         stringr::str_split_fixed(.data$taxon_name, "\\[",2)[,1] %>% stringr::str_trim(), NA),
       # field binomial is filled in if taxonomic resolution is an infraspecific name or a binomial
       # all taxon names that have "extra" information (beyond the actual name) have been formatted to have that information in square brackets '[]',
       # so these can be used as a delimitor to extract the actual name
-      binomial = ifelse(.data$taxon_rank %in% c("binomial", "Species"), 
+      binomial = ifelse(.data$taxon_rank %in% c("Species"), 
                         stringr::str_split_fixed(.data$taxon_name, "\\[",2)[,1] %>% stringr::str_trim(), NA),
-      binomial = ifelse(.data$taxon_rank %in% c("trinomial", "Subspecies", "Forma", "Varietas", "Series"), 
+      binomial = ifelse(.data$taxon_rank %in% c("Subspecies", "Forma", "Varietas", "Series"), 
                         stringr::word(.data$taxon_name, start = 1, end = 2), .data$binomial),
       binomial = stringr::str_trim(.data$binomial),
       # genus filled in for all names that have a taxonomic of genus or more detailed
       genus = ifelse(!.data$taxon_rank %in% c("Familia", "family"), ifelse(stringr::word(.data$taxon_name, 1) == "x", stringr::word(.data$taxon_name, start = 1, end = 2), stringr::word(.data$taxon_name, 1)), NA),
       family = ifelse(.data$taxon_rank %in% c("Familia", "family"), stringr::word(.data$taxon_name, 1), .data$family),
       # identify which name is to be matched to the various identifiers, distribution information, etc. in the taxa file
-      name_to_match_to = ifelse(.data$taxon_rank %in% c("trinomial", "Subspecies", "Forma", "Varietas"), .data$trinomial, NA),
-      name_to_match_to = ifelse(is.na(.data$name_to_match_to) & .data$taxon_rank %in% c("binomial", "Species"), .data$binomial, .data$name_to_match_to),
+      name_to_match_to = ifelse(.data$taxon_rank %in% c("Subspecies", "Forma", "Varietas"), .data$trinomial, NA),
+      name_to_match_to = ifelse(is.na(.data$name_to_match_to) & .data$taxon_rank %in% c("Species"), .data$binomial, .data$name_to_match_to),
       name_to_match_to = ifelse(is.na(.data$name_to_match_to) & .data$taxon_rank %in% c("genus", "Genus"), .data$genus, .data$name_to_match_to),
       name_to_match_to = ifelse(is.na(.data$name_to_match_to) & is.na(.data$taxon_rank), .data$genus, .data$name_to_match_to),
       name_to_match_to = ifelse(is.na(.data$name_to_match_to) & .data$taxon_rank %in% c("family", "Familia"), .data$family, .data$name_to_match_to)
-      ) %>%
+      ) %>% 
       # remove family, taxon_rank; they are about to be merged back in, but matches will now be possible to more rows
       select(-.data$taxon_rank, - .data$taxonomic_resolution) %>%
       rename(family_tmp = .data$family) %>%
@@ -1558,18 +1568,17 @@ build_update_taxonomy <- function(austraits_raw, taxa) {
                     .data$taxonomic_status, .data$scientific_name, .data$scientific_name_authorship, .data$taxon_id, 
                     .data$scientific_name_id)
 
-
   austraits_raw$taxa <-
     species_tmp %>%
     dplyr::bind_rows() %>%
     dplyr::arrange(.data$taxon_name) %>%
     dplyr::distinct(.data$taxon_name, .keep_all = TRUE)
 
   # only now, at the very end, can `taxonomic_resolution` be removed from the traits table
-  
+
   austraits_raw$traits <-
     austraits_raw$traits %>%
-      dplyr::select(-.data$taxonomic_resolution)
+      dplyr::select(-.data$taxonomic_resolution, -.data$taxon_rank)
 
   austraits_raw$excluded_data <-
     austraits_raw$excluded_data %>%

diff --git a/config/metadata.yml b/config/metadata.yml
@@ -1,7 +1,7 @@
 metadata:
   title: 'AusTraits: a curated plant trait database for the Australian flora'
   description: "AusTraits is a transformative database, containing measurements on the traits of Australia's plant taxa, standardised from hundreds of disconnected primary sources. While this repository contains the harmonised data, the raw data and code used to build the resource are also available on the project's GitHub repository,http://traitecoevo.github.io/austraits.build. Further information on the project is available in the associated publication and at the project website https://austraits.org."
-  version: "4.0.0"
+  version: "4.1.0"
   doi: 10.5281/zenodo.3568417
   structure_URI: https://github.com/traitecoevo/austraits.build
   geo_location:
@@ -10,15 +10,20 @@ metadata:
   language: en
   related_identifiers:
   - related_identifier_type: url 
-    identifier: https://github.com/traitecoevo/austraits.build/tree/v4.0.0
+    identifier: https://github.com/traitecoevo/austraits.build/tree/v4.1.0
     relation_type: isCompiledBy
     resource_type: dataset
   - related_identifier_type: doi
     identifier: 10.1038/s41597-021-01006-6
     relation_type: isCitedBy
-    resource_type: publication-article
+    resource_type: publication-article  
+  - related_identifier_type: doi
+    identifier: 10.5281/zenodo.3568417
+    relation_type: isVersionOf
+    resource_type: dataset
   references: "Falster, Gallagher et al (2021) *AusTraits, a curated plant trait database for the Australian flora*. Scientific Data 8: 254, https://doi.org/10.1038/s41597-021-01006-6"
   publisher: Zenodo
+  publication_year: 2022
   publication_date: 27 Nov 2022
   license:
     rights: CC-BY-4.0
-Original file line number
+Diff line change
@@ Expand Up / @@ -12,6 +12,7 @@ temp @@
     .cache/
     .local/
     .config/
+    .vs/
     *.Rproj
     tmp*
     reports
@@ Expand Down @@