Skip to content

Commit

Permalink
Merge branch 'develop' (preparing for 4.1.0)
Browse files Browse the repository at this point in the history
  • Loading branch information
dfalster committed Jan 30, 2023
2 parents b9e5dad + adc5c21 commit ad077bb
Show file tree
Hide file tree
Showing 155 changed files with 1,683 additions and 1,877 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ temp
.cache/
.local/
.config/
.vs/
*.Rproj
tmp*
reports
Expand Down
1 change: 0 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ Imports:
Suggests:
austraits,
leaflet,
ggbeeswarm,
bibtex,
knitr,
bench,
Expand Down
55 changes: 32 additions & 23 deletions R/process.R
Original file line number Diff line number Diff line change
Expand Up @@ -1201,14 +1201,15 @@ process_format_methods <- function(metadata, dataset_id, sources, contributors)
type = str_replace_all(.data$source_key, "_[:digit:]+", ""),
source_id = metadata$source %>%
util_list_to_df2() %>%
dplyr::select(.data$key)
)
purrr::pluck("key")
)

source_primary_key <- metadata$source$primary$key
source_secondary_keys <- citation_types %>% dplyr::filter(.data$type == "secondary") %>% dplyr::select(.data$source_id) %>% as.vector()
source_secondary_keys <- source_secondary_keys$source_id$key %>% as.vector()
source_original_dataset_keys <- citation_types %>% dplyr::filter(.data$type == "original") %>% dplyr::select(.data$source_id) %>% as.vector()
source_original_dataset_keys <- source_original_dataset_keys$source_id$key %>% as.vector()
source_secondary_keys <- citation_types %>%
dplyr::filter(.data$type == "secondary") %>%
purrr::pluck("source_id")

source_original_dataset_keys <- citation_types %>% dplyr::filter(.data$type == "original") %>% purrr::pluck("source_id")

# combine collectors to add into the methods table
collectors_tmp <-
Expand Down Expand Up @@ -1247,12 +1248,12 @@ process_format_methods <- function(metadata, dataset_id, sources, contributors)
source_primary_citation = bib_print(sources[[source_primary_key]]),
source_secondary_key = source_secondary_keys %>% paste(collapse = "; "),
source_secondary_citation = ifelse(length(source_secondary_keys) == 0, NA_character_,
purrr::map_chr(sources[source_secondary_keys], bib_print) %>% paste(collapse = "; ") %>%
purrr::map_chr(source_secondary_keys, ~sources[[.x]] %>% bib_print) %>% paste(collapse = "; ") %>%
stringr::str_replace_all("\\.;", ";")
),
source_original_dataset_key = source_original_dataset_keys %>% paste(collapse = "; "),
source_original_dataset_citation = ifelse(length(source_original_dataset_keys) == 0, NA_character_,
purrr::map_chr(sources[source_original_dataset_keys], bib_print) %>% paste(collapse = "; ") %>%
purrr::map_chr(source_original_dataset_keys, ~sources[[.x]] %>% bib_print) %>% paste(collapse = "; ") %>%
stringr::str_replace_all("\\.;", ";")
)
)
Expand Down Expand Up @@ -1458,21 +1459,28 @@ build_update_taxonomy <- function(austraits_raw, taxa) {
austraits_raw$taxonomic_updates %>%
dplyr::left_join(by = "cleaned_name",
taxa %>% dplyr::select(.data$cleaned_name, .data$cleaned_scientific_name_id, .data$cleaned_name_taxonomic_status,
.data$cleaned_name_alternative_taxonomic_status, .data$taxon_id, .data$taxon_name)
.data$cleaned_name_alternative_taxonomic_status, .data$taxon_id, .data$taxon_name, .data$taxon_rank)
) %>%
dplyr::mutate(
taxonomic_resolution = ifelse(!is.na(.data$taxonomic_resolution) & .data$taxonomic_resolution != .data$taxon_rank, .data$taxon_rank, .data$taxonomic_resolution),
taxonomic_resolution = ifelse(is.na(.data$taxonomic_resolution), .data$taxon_rank, .data$taxonomic_resolution)
) %>%
dplyr::distinct() %>%
dplyr::select(-.data$taxon_rank) %>%
dplyr::arrange(.data$cleaned_name)


austraits_raw$traits <-
austraits_raw$traits %>%
dplyr::rename(cleaned_name = .data$taxon_name) %>%
dplyr::left_join(by = "cleaned_name",
taxa %>% dplyr::select(.data$cleaned_name, .data$taxon_name)
taxa %>% dplyr::select(.data$cleaned_name, .data$taxon_name, .data$taxon_rank)
) %>%
dplyr::select(.data$dataset_id, .data$taxon_name, dplyr::everything()) %>%
dplyr::mutate(taxon_name = ifelse(is.na(.data$taxon_name), .data$cleaned_name, .data$taxon_name)) %>%
dplyr::mutate(taxon_name = ifelse(stringr::str_detect(.data$cleaned_name, "\\["), .data$cleaned_name, .data$taxon_name)) %>%
dplyr::mutate(
taxon_name = ifelse(is.na(.data$taxon_name), .data$cleaned_name, .data$taxon_name),
taxon_name = ifelse(stringr::str_detect(.data$cleaned_name, "\\["), .data$cleaned_name, .data$taxon_name)
) %>%
dplyr::select(-.data$cleaned_name)

# names, identifiers for all genera
Expand Down Expand Up @@ -1501,31 +1509,33 @@ build_update_taxonomy <- function(austraits_raw, taxa) {
taxa %>% dplyr::select(.data$taxon_name, .data$taxon_rank, .data$family) %>% dplyr::distinct() %>% util_df_convert_character()
)


species_tmp <- species_tmp %>%
dplyr::mutate(
dplyr::mutate(
# if no taxonomic resolution is specified, then the name's taxonomic resolution is the taxon_rank for the taxon name
taxon_rank = ifelse(!is.na(.data$taxonomic_resolution), .data$taxonomic_resolution, .data$taxon_rank),
taxonomic_resolution = ifelse(.data$taxon_name %in% taxa$cleaned_name, taxa$taxon_rank[match(.data$taxon_name, taxa$cleaned_name)], taxonomic_resolution),
taxon_rank = ifelse(!is.na(.data$taxon_rank), .data$taxonomic_resolution, .data$taxon_rank),
# field trinomial is only filled in if taxonomic resolution is an infraspecific name
trinomial = ifelse(.data$taxon_rank %in% c("trinomial", "Subspecies", "Forma", "Varietas"),
trinomial = ifelse(.data$taxon_rank %in% c("Subspecies", "Forma", "Varietas"),
stringr::str_split_fixed(.data$taxon_name, "\\[",2)[,1] %>% stringr::str_trim(), NA),
# field binomial is filled in if taxonomic resolution is an infraspecific name or a binomial
# all taxon names that have "extra" information (beyond the actual name) have been formatted to have that information in square brackets '[]',
# so these can be used as a delimitor to extract the actual name
binomial = ifelse(.data$taxon_rank %in% c("binomial", "Species"),
binomial = ifelse(.data$taxon_rank %in% c("Species"),
stringr::str_split_fixed(.data$taxon_name, "\\[",2)[,1] %>% stringr::str_trim(), NA),
binomial = ifelse(.data$taxon_rank %in% c("trinomial", "Subspecies", "Forma", "Varietas", "Series"),
binomial = ifelse(.data$taxon_rank %in% c("Subspecies", "Forma", "Varietas", "Series"),
stringr::word(.data$taxon_name, start = 1, end = 2), .data$binomial),
binomial = stringr::str_trim(.data$binomial),
# genus filled in for all names that have a taxonomic of genus or more detailed
genus = ifelse(!.data$taxon_rank %in% c("Familia", "family"), ifelse(stringr::word(.data$taxon_name, 1) == "x", stringr::word(.data$taxon_name, start = 1, end = 2), stringr::word(.data$taxon_name, 1)), NA),
family = ifelse(.data$taxon_rank %in% c("Familia", "family"), stringr::word(.data$taxon_name, 1), .data$family),
# identify which name is to be matched to the various identifiers, distribution information, etc. in the taxa file
name_to_match_to = ifelse(.data$taxon_rank %in% c("trinomial", "Subspecies", "Forma", "Varietas"), .data$trinomial, NA),
name_to_match_to = ifelse(is.na(.data$name_to_match_to) & .data$taxon_rank %in% c("binomial", "Species"), .data$binomial, .data$name_to_match_to),
name_to_match_to = ifelse(.data$taxon_rank %in% c("Subspecies", "Forma", "Varietas"), .data$trinomial, NA),
name_to_match_to = ifelse(is.na(.data$name_to_match_to) & .data$taxon_rank %in% c("Species"), .data$binomial, .data$name_to_match_to),
name_to_match_to = ifelse(is.na(.data$name_to_match_to) & .data$taxon_rank %in% c("genus", "Genus"), .data$genus, .data$name_to_match_to),
name_to_match_to = ifelse(is.na(.data$name_to_match_to) & is.na(.data$taxon_rank), .data$genus, .data$name_to_match_to),
name_to_match_to = ifelse(is.na(.data$name_to_match_to) & .data$taxon_rank %in% c("family", "Familia"), .data$family, .data$name_to_match_to)
) %>%
) %>%
# remove family, taxon_rank; they are about to be merged back in, but matches will now be possible to more rows
select(-.data$taxon_rank, - .data$taxonomic_resolution) %>%
rename(family_tmp = .data$family) %>%
Expand Down Expand Up @@ -1558,18 +1568,17 @@ build_update_taxonomy <- function(austraits_raw, taxa) {
.data$taxonomic_status, .data$scientific_name, .data$scientific_name_authorship, .data$taxon_id,
.data$scientific_name_id)


austraits_raw$taxa <-
species_tmp %>%
dplyr::bind_rows() %>%
dplyr::arrange(.data$taxon_name) %>%
dplyr::distinct(.data$taxon_name, .keep_all = TRUE)

# only now, at the very end, can `taxonomic_resolution` be removed from the traits table

austraits_raw$traits <-
austraits_raw$traits %>%
dplyr::select(-.data$taxonomic_resolution)
dplyr::select(-.data$taxonomic_resolution, -.data$taxon_rank)

austraits_raw$excluded_data <-
austraits_raw$excluded_data %>%
Expand Down
11 changes: 8 additions & 3 deletions config/metadata.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
metadata:
title: 'AusTraits: a curated plant trait database for the Australian flora'
description: "AusTraits is a transformative database, containing measurements on the traits of Australia's plant taxa, standardised from hundreds of disconnected primary sources. While this repository contains the harmonised data, the raw data and code used to build the resource are also available on the project's GitHub repository,http://traitecoevo.github.io/austraits.build. Further information on the project is available in the associated publication and at the project website https://austraits.org."
version: "4.0.0"
version: "4.1.0"
doi: 10.5281/zenodo.3568417
structure_URI: https://github.com/traitecoevo/austraits.build
geo_location:
Expand All @@ -10,15 +10,20 @@ metadata:
language: en
related_identifiers:
- related_identifier_type: url
identifier: https://github.com/traitecoevo/austraits.build/tree/v4.0.0
identifier: https://github.com/traitecoevo/austraits.build/tree/v4.1.0
relation_type: isCompiledBy
resource_type: dataset
- related_identifier_type: doi
identifier: 10.1038/s41597-021-01006-6
relation_type: isCitedBy
resource_type: publication-article
resource_type: publication-article
- related_identifier_type: doi
identifier: 10.5281/zenodo.3568417
relation_type: isVersionOf
resource_type: dataset
references: "Falster, Gallagher et al (2021) *AusTraits, a curated plant trait database for the Australian flora*. Scientific Data 8: 254, https://doi.org/10.1038/s41597-021-01006-6"
publisher: Zenodo
publication_year: 2022
publication_date: 27 Nov 2022
license:
rights: CC-BY-4.0
Expand Down
Loading

0 comments on commit ad077bb

Please sign in to comment.