Skip to content

Commit

Permalink
Collapse duplicate categorical values within a row (#125)
Browse files Browse the repository at this point in the history
- Closes #120
  • Loading branch information
yangsophieee authored Nov 15, 2023
1 parent 906713c commit d0b132e
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 10 deletions.
16 changes: 8 additions & 8 deletions R/process.R
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ dataset_process <- function(filename_data_raw,

# Record methods
methods <- process_format_methods(metadata, dataset_id, sources, contributors)

# Retrieve taxonomic details for known species
taxonomic_updates <-
traits %>%
Expand All @@ -204,7 +204,7 @@ dataset_process <- function(filename_data_raw,

# Taxon names explicitly excluded in metadata also excluded from taxonomic updates table.
if (!is.na(metadata[["exclude_observations"]][1])) {
taxa_to_exclude <-
taxa_to_exclude <-
metadata[["exclude_observations"]] %>%
traits.build::util_list_to_df2() %>%
dplyr::mutate(
Expand All @@ -213,7 +213,7 @@ dataset_process <- function(filename_data_raw,
tidyr::unnest_longer(.data$find) %>%
dplyr::filter(.data$variable == "taxon_name")

taxonomic_updates <-
taxonomic_updates <-
taxonomic_updates %>%
dplyr::filter(!.data$aligned_name %in% taxa_to_exclude$find)
}
Expand Down Expand Up @@ -1863,10 +1863,10 @@ build_combine <- function(..., d = list(...)) {
build_update_taxonomy <- function(austraits_raw, taxa) {

columns_in_taxon_list <- names(taxa)

# incoming table from austraits_raw is a list of all taxa for the study
# `original_name` and `aligned_name` will be different if
# there were taxonomic_updates specified in metadata file
# `original_name` and `aligned_name` will be different if
# there were taxonomic_updates specified in metadata file
austraits_raw$taxonomic_updates <-
austraits_raw$taxonomic_updates %>%
dplyr::left_join(
Expand Down Expand Up @@ -1896,7 +1896,7 @@ build_update_taxonomy <- function(austraits_raw, taxa) {
dplyr::distinct() %>%
util_df_convert_character() %>%
dplyr::mutate(
# If no taxonomic resolution is specified from taxonomic_updates,
# If no taxonomic resolution is specified from taxonomic_updates,
# then the name's taxonomic resolution is the taxon_rank for the taxon name.
taxonomic_resolution = ifelse(
.data$taxon_name %in% taxa$aligned_name,
Expand All @@ -1907,7 +1907,7 @@ build_update_taxonomy <- function(austraits_raw, taxa) {
# Create variable `name_to_match_to` which specifies the part of the taxon name to which matches can be made.
# This step requires taxon_rank.
name_to_match_to = stringr::str_replace(.data$taxon_name, " \\[.+",""),
name_to_match_to = ifelse(!.data$taxon_rank %in% c("species", "subspecies", "series", "variety", "form"),
name_to_match_to = ifelse(!.data$taxon_rank %in% c("species", "subspecies", "series", "variety", "form"),
stringr::word(.data$taxon_name,1), .data$name_to_match_to)
) %>%
# Remove taxon_rank, as it is about to be merged back in, but matches will now be possible to more rows.
Expand Down
3 changes: 2 additions & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,10 @@ util_separate_and_sort <- function(x, sep = " ") {
# For those cells, split, sort then combine
x[i] <- x[i] %>%
stringr::str_split(" ") %>%
lapply(function(xi) xi %>% sort() %>% paste(collapse = " ")) %>%
lapply(function(xi) xi %>% sort() %>% unique() %>% paste(collapse = " ")) %>%
unlist()
x

}

#' Convert dataframe to list
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/examples/Test_2023_1/output/traits.csv
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ Test_2023_1,Alphitonia petriei,034,leaf_mass_per_area,149,g/m2,population,mean,m
Test_2023_1,Alphitonia petriei,034,seed_dry_mass,27.2,mg,population,mean,measurement,3,field,adult,05,,,02,,01,14,03,02,2004/2004,from Falster_2005_1,01,,Alphitonia petriei
Test_2023_1,Alphitonia petriei,034,seed_dry_mass,3--4,mg,population,bin,measurement,10,field,adult,05,,,02,,01,14,03,02,2004/2004,from Falster_2005_1,02,,Alphitonia petriei
Test_2023_1,Alphitonia petriei,034,wood_density,0.413,mg/mm3,population,mean,measurement,3,field,adult,05,,,02,,01,14,03,02,2004/2004,from Falster_2005_1,01,,Alphitonia petriei
Test_2023_1,Alphitonia petriei,035,plant_growth_form,mallee mallee shrub tree tree,,species,mode,expert_score,3,field,adult,,,,02,,,14,03,02,2004/2004,from Falster_2005_1,01,,Alphitonia petriei
Test_2023_1,Alphitonia petriei,035,plant_growth_form,mallee shrub tree,,species,mode,expert_score,3,field,adult,,,,02,,,14,03,02,2004/2004,from Falster_2005_1,01,,Alphitonia petriei
Test_2023_1,Alstonia scholaris,037,huber_value,0.000254323499491353,mm2{sapwood}/mm2{leaf},individual,mean,model_derived,5,field,adult,04,02,,,,02,11,,,2002/2002,made-up measurement remarks,05,,Alstonia scholaris
Test_2023_1,Alstonia scholaris,038,leaf_length,40,mm,individual,mean,measurement,3,field,adult,04,02,,,,02,11,,,2004/2004,from Falster_2005_1,01,,Alstonia scholaris
Test_2023_1,Alstonia scholaris,039,huber_value,0.000254323499491353,mm2{sapwood}/mm2{leaf},individual,raw,model_derived,5,field,adult,04,02,,,,02,11,,,,made-up measurement remarks,04,01,Alstonia scholaris
Expand Down

0 comments on commit d0b132e

Please sign in to comment.