From d43eae0eb3250b59d2f36cc25050c7d80d00fdef Mon Sep 17 00:00:00 2001 From: yangsophieee Date: Thu, 23 Nov 2023 18:13:15 +1100 Subject: [PATCH 01/10] Update testdata.R --- R/testdata.R | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/R/testdata.R b/R/testdata.R index a9170c56..e698ecee 100644 --- a/R/testdata.R +++ b/R/testdata.R @@ -682,6 +682,7 @@ dataset_test_worker <- } ## Traits + expect_list_elements_contains_names( metadata[["traits"]], c("var_in", "unit_in", "trait_name", "value_type", "basis_of_value"), @@ -707,7 +708,10 @@ dataset_test_worker <- label = "`trait_name`'s" ) - # Check no duplicate `var_in`'s + # Check units are found in `unit_conversions.csv` #TODO + + # Check no duplicate `var_in`'s #TODO + # For both traits and contexts # Now that traits loaded, check details of contexts match if (nrow(contexts > 0)) { @@ -1061,9 +1065,8 @@ dataset_test_worker <- } else { # For wide datasets, expect variables in traits are headers in the data - values <- names(data) expect_is_in( - traits[["var_in"]], values, + traits[["var_in"]], names(data), info = paste0(red(files[2]), "\ttraits"), label = "`var_in`" ) @@ -1083,10 +1086,10 @@ dataset_test_worker <- ## Check traits are not only NAs expect_false( - nrow(metadata[["traits"]] %>% util_list_to_df2() %>% dplyr::filter(!is.na(.data$trait_name))) == 0, + nrow(traits %>% dplyr::filter(!is.na(.data$trait_name))) == 0, info = paste0(red(f), "\ttraits - only contain NA `trait_name`'s")) - if (nrow(metadata[["traits"]] %>% util_list_to_df2() %>% dplyr::filter(!is.na(.data$trait_name))) > 0) { + if (nrow(traits %>% dplyr::filter(!is.na(.data$trait_name))) > 0) { # Test build dataset expect_no_error( From 9da197baaf907fa7eced62881ecc311f2fbc00bb Mon Sep 17 00:00:00 2001 From: yangsophieee Date: Thu, 23 Nov 2023 18:20:35 +1100 Subject: [PATCH 02/10] Add test for checking units --- R/testdata.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/testdata.R b/R/testdata.R index e698ecee..de5aeaf4 100644 --- a/R/testdata.R +++ b/R/testdata.R @@ -709,6 +709,12 @@ dataset_test_worker <- ) # Check units are found in `unit_conversions.csv` #TODO + units <- read_csv("config/unit_conversions.csv") + expect_is_in( + traits$unit_in, units$unit_from, + info = paste0(red(f), "\ttraits"), + label = "`unit_in`'s" + ) # Check no duplicate `var_in`'s #TODO # For both traits and contexts From 51e5406893ae204ca062f5138b4e78cc8ab6a946 Mon Sep 17 00:00:00 2001 From: yangsophieee Date: Thu, 23 Nov 2023 18:46:55 +1100 Subject: [PATCH 03/10] Add tests for checking no duplicate `var_in` or `context_property` in contexts and no duplicate `var_in`'s in traits --- R/testdata.R | 49 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/R/testdata.R b/R/testdata.R index de5aeaf4..06dcaba2 100644 --- a/R/testdata.R +++ b/R/testdata.R @@ -590,6 +590,27 @@ dataset_test_worker <- process_format_contexts(dataset_id, data) ) + # Check that there are no duplicate `var_in` or `context_property` fields + context_properties <- sapply(metadata[["contexts"]], "[[", "context_property") + context_vars_in <- sapply(metadata[["contexts"]], "[[", "var_in") + + expect_equal( + context_properties |> duplicated() |> sum(), + 0, + info = sprintf( + "%s\tcontexts - duplicate `context_property` values detected: '%s'", + red(f), + paste(context_properties[duplicated(context_properties)], collapse = "', '")) + ) + expect_equal( + context_vars_in |> duplicated() |> sum(), + 0, + info = sprintf( + "%s\tcontexts - duplicate `var_in` values detected: '%s'", + red(f), + paste(context_vars_in[duplicated(context_vars_in)], collapse = "', '")) + ) + # Check context details load if (nrow(contexts) > 0) { @@ -599,6 +620,7 @@ dataset_test_worker <- info = paste0(red(f), "\tcontexts"), label = "field names" ) + # Check that unique context `value`'s only have one unique description expect_equal( contexts %>% dplyr::group_by(.data$context_property, .data$value) %>% dplyr::summarise(n = dplyr::n_distinct(.data$description)) %>% @@ -708,7 +730,7 @@ dataset_test_worker <- label = "`trait_name`'s" ) - # Check units are found in `unit_conversions.csv` #TODO + # Check units are found in `unit_conversions.csv` units <- read_csv("config/unit_conversions.csv") expect_is_in( traits$unit_in, units$unit_from, @@ -716,8 +738,22 @@ dataset_test_worker <- label = "`unit_in`'s" ) - # Check no duplicate `var_in`'s #TODO - # For both traits and contexts + # Check no duplicate `var_in`'s + + expect_equal( + traits %>% dplyr::group_by(.data$var_in) %>% dplyr::summarise(n = dplyr::n()) %>% + filter(.data$n > 1) %>% nrow(), + 0, + info = sprintf( + "%s\ttraits - duplicate `var_in` values detected: '%s'", + red(f), + paste( + traits %>% dplyr::group_by(.data$var_in) %>% dplyr::summarise(n = dplyr::n()) %>% + filter(.data$n > 1) %>% dplyr::pull(.data$var_in) %>% unique(), + collapse = "', '") + ) + ) + # Now that traits loaded, check details of contexts match if (nrow(contexts > 0)) { @@ -862,13 +898,14 @@ dataset_test_worker <- # First check no duplicate combinations of `find` expect_equal( - x[[trait]] %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% filter(.data$n > 1) %>% nrow(), + x[[trait]] %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% + filter(.data$n > 1) %>% nrow(), 0, info = sprintf( "%s\tsubstitutions - duplicate `find` values detected: '%s'", red(f), paste( - x[[trait]] %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% filter(.data$n > 1) %>% - dplyr::pull(.data$find) %>% unique(), + x[[trait]] %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% + filter(.data$n > 1) %>% dplyr::pull(.data$find) %>% unique(), collapse = "', '") ) ) From 22cef1352503175aa2a1cc99218fa03c306ac599 Mon Sep 17 00:00:00 2001 From: yangsophieee Date: Fri, 24 Nov 2023 12:33:51 +1100 Subject: [PATCH 04/10] Add prefixes to filter() --- R/process.R | 2 +- R/setup.R | 12 ++++++------ R/testdata.R | 26 +++++++++++++------------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/R/process.R b/R/process.R index 8e85e67c..d774e177 100644 --- a/R/process.R +++ b/R/process.R @@ -2050,7 +2050,7 @@ check_pivot_duplicates <- function( # Check for duplicates database_object$traits %>% - filter(.data$dataset_id %in% dataset_ids) %>% + dplyr::filter(.data$dataset_id %in% dataset_ids) %>% select( # `taxon_name` and `original_name` are not needed for pivoting but are included for informative purposes dplyr::all_of( diff --git a/R/setup.R b/R/setup.R index 0786365b..c4b1debd 100644 --- a/R/setup.R +++ b/R/setup.R @@ -900,15 +900,15 @@ metadata_add_taxonomic_change <- function(dataset_id, find, replace, reason, tax } else { message(sprintf(red("Existing substitution will be overwritten for ") %+% green("'%s'"), find)) data <- data %>% - filter(.data$find != to_add$find) %>% - dplyr::bind_rows(to_add) %>% - filter(!.data$find == replace) %>% - arrange(.data$find) + dplyr::filter(.data$find != to_add$find) %>% + dplyr::bind_rows(to_add) %>% + dplyr::filter(!.data$find == replace) %>% + arrange(.data$find) } } else { data <- dplyr::bind_rows(data, to_add) %>% - filter(!.data$find == replace) %>% - arrange(.data$find) + dplyr::filter(!.data$find == replace) %>% + arrange(.data$find) } } diff --git a/R/testdata.R b/R/testdata.R index 06dcaba2..8e1bd10b 100644 --- a/R/testdata.R +++ b/R/testdata.R @@ -624,26 +624,26 @@ dataset_test_worker <- # Check that unique context `value`'s only have one unique description expect_equal( contexts %>% dplyr::group_by(.data$context_property, .data$value) %>% dplyr::summarise(n = dplyr::n_distinct(.data$description)) %>% - filter(.data$n > 1) %>% nrow(), + dplyr::filter(.data$n > 1) %>% nrow(), 0, info = sprintf( "%s\tcontexts - `value`'s should only have one unique description each: '%s'", red(f), paste( contexts %>% dplyr::group_by(.data$context_property, .data$value) %>% dplyr::summarise(n = dplyr::n_distinct(.data$description)) %>% - filter(.data$n > 1) %>% dplyr::pull(.data$value) %>% unique(), + dplyr::filter(.data$n > 1) %>% dplyr::pull(.data$value) %>% unique(), collapse = "', '") ) ) # Check that there are no duplicate `find` fields expect_equal( - contexts %>% dplyr::group_by(.data$context_property, .data$find) %>% dplyr::summarise(n = dplyr::n()) %>% filter(.data$n > 1) %>% + contexts %>% dplyr::group_by(.data$context_property, .data$find) %>% dplyr::summarise(n = dplyr::n()) %>% dplyr::filter(.data$n > 1) %>% nrow(), 0, info = sprintf( "%s\tcontexts - duplicate `find` values detected: '%s'", red(f), paste( - contexts %>% dplyr::group_by(.data$context_property, .data$find) %>% dplyr::summarise(n = dplyr::n()) %>% filter(.data$n > 1) %>% + contexts %>% dplyr::group_by(.data$context_property, .data$find) %>% dplyr::summarise(n = dplyr::n()) %>% dplyr::filter(.data$n > 1) %>% dplyr::pull(.data$find) %>% unique(), collapse = "', '") ) @@ -742,14 +742,14 @@ dataset_test_worker <- expect_equal( traits %>% dplyr::group_by(.data$var_in) %>% dplyr::summarise(n = dplyr::n()) %>% - filter(.data$n > 1) %>% nrow(), + dplyr::filter(.data$n > 1) %>% nrow(), 0, info = sprintf( "%s\ttraits - duplicate `var_in` values detected: '%s'", red(f), paste( traits %>% dplyr::group_by(.data$var_in) %>% dplyr::summarise(n = dplyr::n()) %>% - filter(.data$n > 1) %>% dplyr::pull(.data$var_in) %>% unique(), + dplyr::filter(.data$n > 1) %>% dplyr::pull(.data$var_in) %>% unique(), collapse = "', '") ) ) @@ -769,7 +769,7 @@ dataset_test_worker <- for (j in unique(contexts[["var_in"]])) { - contextsub <- contexts %>% filter(var_in == j) + contextsub <- contexts %>% dplyr::filter(var_in == j) # Context values align either with a column of data or a column of traits table if (is.null(data[[j]])) { @@ -899,13 +899,13 @@ dataset_test_worker <- # First check no duplicate combinations of `find` expect_equal( x[[trait]] %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% - filter(.data$n > 1) %>% nrow(), + dplyr::filter(.data$n > 1) %>% nrow(), 0, info = sprintf( "%s\tsubstitutions - duplicate `find` values detected: '%s'", red(f), paste( x[[trait]] %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% - filter(.data$n > 1) %>% dplyr::pull(.data$find) %>% unique(), + dplyr::filter(.data$n > 1) %>% dplyr::pull(.data$find) %>% unique(), collapse = "', '") ) ) @@ -945,12 +945,12 @@ dataset_test_worker <- # Check no duplicate `find` values expect_equal( - x %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% filter(.data$n > 1) %>% nrow(), + x %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% dplyr::filter(.data$n > 1) %>% nrow(), 0, info = sprintf( "%s\ttaxonomic_updates - duplicate `find` values detected: '%s'", red(f), paste( - x %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% filter(.data$n > 1) %>% + x %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% dplyr::filter(.data$n > 1) %>% dplyr::pull(.data$find) %>% unique(), collapse = "', '") ) @@ -1052,13 +1052,13 @@ dataset_test_worker <- # Check no duplicate `find` values expect_equal( x %>% dplyr::group_by(.data$variable, .data$find) %>% - dplyr::summarise(n = dplyr::n()) %>% filter(.data$n > 1) %>% nrow(), + dplyr::summarise(n = dplyr::n()) %>% dplyr::filter(.data$n > 1) %>% nrow(), 0, info = sprintf( "%s\texclude_observations - duplicate `find` values detected: '%s'", red(f), paste( x %>% dplyr::group_by(.data$variable, .data$find) %>% dplyr::summarise(n = dplyr::n()) %>% - filter(.data$n > 1) %>% dplyr::pull(.data$find) %>% unique(), + dplyr::filter(.data$n > 1) %>% dplyr::pull(.data$find) %>% unique(), collapse = "', '") ) ) From f39674d82c81371d9988f1cb2d985e8c6843690d Mon Sep 17 00:00:00 2001 From: yangsophieee Date: Fri, 24 Nov 2023 17:51:28 +1100 Subject: [PATCH 05/10] Add `dplyr::` prefixes --- R/pivot.R | 2 +- R/process.R | 6 +++--- R/testdata.R | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/pivot.R b/R/pivot.R index cd742750..0e834856 100644 --- a/R/pivot.R +++ b/R/pivot.R @@ -20,7 +20,7 @@ check_pivot_wider <- function(dataset) { dplyr::all_of(c("dataset_id", "trait_name", "number_of_duplicates", "observation_id", "value_type")), everything() ) %>% - filter(.data$number_of_duplicates > 1) %>% + dplyr::filter(.data$number_of_duplicates > 1) %>% nrow() if (duplicates == 0) { diff --git a/R/process.R b/R/process.R index d774e177..28a6ef1e 100644 --- a/R/process.R +++ b/R/process.R @@ -561,7 +561,7 @@ process_create_observation_id <- function(data, metadata) { if (!is.null(traits_table[["repeat_measurements_id"]])) { to_add_id <- traits_table %>% - filter(.data$repeat_measurements_id == TRUE) %>% + dplyr::filter(.data$repeat_measurements_id == TRUE) %>% dplyr::pull(.data$trait_name) i <- !is.na(data$value) & data$trait_name %in% to_add_id & @@ -2060,10 +2060,10 @@ check_pivot_duplicates <- function( tidyr::pivot_wider(names_from = "trait_name", values_from = "value", values_fn = length) %>% tidyr::pivot_longer(cols = 9:ncol(.)) %>% dplyr::rename(dplyr::all_of(c("trait_name" = "name", "number_of_duplicates" = "value"))) %>% - select( + dplyr::select( dplyr::all_of(c("dataset_id", "trait_name", "number_of_duplicates", "taxon_name", "original_name", "observation_id", "value_type")), everything() ) %>% - filter(.data$number_of_duplicates > 1) + dplyr::filter(.data$number_of_duplicates > 1) } diff --git a/R/testdata.R b/R/testdata.R index 8e1bd10b..cdb26467 100644 --- a/R/testdata.R +++ b/R/testdata.R @@ -1077,7 +1077,7 @@ dataset_test_worker <- expect_is_in( find_values, # Extract values from the data for that variable - parsed_data %>% filter(.data$trait_name == variable) %>% dplyr::pull(.data$value) %>% unique(), + parsed_data %>% dplyr::filter(.data$trait_name == variable) %>% dplyr::pull(.data$value) %>% unique(), info = paste0(red(f), "\texclude_observations"), label = sprintf("variable '%s'", variable) ) From e41e336578e3e5028e1bf0d2f21ece99d48208fd Mon Sep 17 00:00:00 2001 From: yangsophieee Date: Wed, 29 Nov 2023 14:36:36 +1100 Subject: [PATCH 06/10] Fix `write_metadata` adding spaces to `custom_R_code` --- R/utils.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index 819b000a..50157204 100644 --- a/R/utils.R +++ b/R/utils.R @@ -268,12 +268,13 @@ write_metadata <- function(data, path, style_code = FALSE) { if (!is.na(data$dataset$custom_R_code)) { code <- data$dataset$custom_R_code + code <- stringr::str_trim(code, side = "left") if (style_code) code <- code %>% suppressWarnings(styler::style_text(transformers = .data$tidyverse_style(strict = TRUE))) txt <- gsub("custom_R_code: .na", code %>% paste(collapse = "\n") %>% - paste0("custom_R_code:", .), txt, fixed = TRUE) + paste0("custom_R_code: ", .), txt, fixed = TRUE) } if (!stringr::str_sub(txt, nchar(txt)) == "\n") From 228ba100ca19d26db32a997490548c5c6bc18a67 Mon Sep 17 00:00:00 2001 From: yangsophieee Date: Thu, 30 Nov 2023 14:35:10 +1100 Subject: [PATCH 07/10] Fix comment --- R/testdata.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/testdata.R b/R/testdata.R index cdb26467..fcd90058 100644 --- a/R/testdata.R +++ b/R/testdata.R @@ -1127,7 +1127,7 @@ dataset_test_worker <- - ## Check traits are not only NAs + ## Check that not all trait names are NAs expect_false( nrow(traits %>% dplyr::filter(!is.na(.data$trait_name))) == 0, info = paste0(red(f), "\ttraits - only contain NA `trait_name`'s")) From 9a996f005de7ac03082487050a4f0b030aaf9868 Mon Sep 17 00:00:00 2001 From: yangsophieee Date: Thu, 30 Nov 2023 14:45:55 +1100 Subject: [PATCH 08/10] Add tidyverse prefixes --- R/pivot.R | 2 +- R/process.R | 8 ++++---- R/setup.R | 6 +++--- inst/support/report_dataset.Rmd | 6 +++--- tests/testthat/examples/Test_2023_3/metadata.yml | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/R/pivot.R b/R/pivot.R index 0e834856..09474612 100644 --- a/R/pivot.R +++ b/R/pivot.R @@ -68,7 +68,7 @@ db_traits_pivot_wider <- function(traits) { .data$dataset_id, .data$observation_id, .data$method_id, .data$method_context_id, .data$repeat_measurements_id) %>% dplyr::summarise(n_value_type = length(unique(.data$value_type))) %>% - arrange(.data$observation_id) %>% + dplyr::arrange(.data$observation_id) %>% dplyr::filter(.data$n_value_type > 1) if (nrow(check_value_type) > 1) { diff --git a/R/process.R b/R/process.R index 1a54f6c7..49218571 100644 --- a/R/process.R +++ b/R/process.R @@ -803,12 +803,12 @@ process_create_context_ids <- function(data, contexts) { contexts_finished <- contexts %>% - filter(!is.na(.data$value)) %>% + dplyr::filter(!is.na(.data$value)) %>% dplyr::left_join( id_link %>% dplyr::bind_rows(), by = c("context_property", "category", "value") ) %>% - distinct(dplyr::across(-dplyr::any_of("find"))) + dplyr::distinct(dplyr::across(-dplyr::any_of("find"))) list( contexts = contexts_finished %>% util_df_convert_character(), @@ -1866,8 +1866,8 @@ build_combine <- function(..., d = list(...)) { metadata[["contributors"]] <- contributors %>% dplyr::select(-dplyr::any_of(c("dataset_id", "additional_role"))) %>% - distinct() %>% - arrange(.data$last_name, .data$given_name) %>% + dplyr::distinct() %>% + dplyr::arrange(.data$last_name, .data$given_name) %>% util_df_to_list() ret <- list(traits = combine("traits", d), diff --git a/R/setup.R b/R/setup.R index c4b1debd..ad5d6fa0 100644 --- a/R/setup.R +++ b/R/setup.R @@ -903,12 +903,12 @@ metadata_add_taxonomic_change <- function(dataset_id, find, replace, reason, tax dplyr::filter(.data$find != to_add$find) %>% dplyr::bind_rows(to_add) %>% dplyr::filter(!.data$find == replace) %>% - arrange(.data$find) + dplyr::arrange(.data$find) } } else { data <- dplyr::bind_rows(data, to_add) %>% dplyr::filter(!.data$find == replace) %>% - arrange(.data$find) + dplyr::arrange(.data$find) } } @@ -967,7 +967,7 @@ metadata_add_taxonomic_changes_list <- function(dataset_id, taxonomic_updates) { )) } # Write new taxonomic updates to metadata - metadata$taxonomic_updates <- existing_updates %>% dplyr::arrange(.data$find) %>% filter(!.data$find == .data$replace) + metadata$taxonomic_updates <- existing_updates %>% dplyr::arrange(.data$find) %>% dplyr::filter(!.data$find == .data$replace) } else { # Read in dataframe of taxonomic changes, split into single-row lists, and add to metadata file diff --git a/inst/support/report_dataset.Rmd b/inst/support/report_dataset.Rmd index c9bb0888..f42db845 100644 --- a/inst/support/report_dataset.Rmd +++ b/inst/support/report_dataset.Rmd @@ -255,9 +255,9 @@ new_question("(section `dataset`) Can you provide more detailed information for ```{r, results='asis', echo=FALSE} missing <- metadata$dataset %>% - util_list_to_df1() %>% - filter(value == "unknown") %>% - pull(key) + util_list_to_df1() %>% + filter(value == "unknown") %>% + pull(key) for (v in missing) { sprintf("(section `dataset`) Can you provide missing details for the variable `%s`?\n", v) %>% diff --git a/tests/testthat/examples/Test_2023_3/metadata.yml b/tests/testthat/examples/Test_2023_3/metadata.yml index 6211a1ff..2ffa033a 100644 --- a/tests/testthat/examples/Test_2023_3/metadata.yml +++ b/tests/testthat/examples/Test_2023_3/metadata.yml @@ -91,7 +91,7 @@ dataset: arrange(category) %>% distinct() %>% mutate(entity_measured = ifelse(is.na(entity_measured), "unknown", entity_measured)) - ' + ' collection_date: unknown/2022 taxon_name: taxon_name trait_name: trait_name From fe4eb5b60be0c9c1b5a570f290304c8cf88b1d10 Mon Sep 17 00:00:00 2001 From: yangsophieee Date: Thu, 30 Nov 2023 14:49:11 +1100 Subject: [PATCH 09/10] Add more prefixes --- R/pivot.R | 6 +++--- R/process.R | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/pivot.R b/R/pivot.R index 09474612..cfd1c87d 100644 --- a/R/pivot.R +++ b/R/pivot.R @@ -9,14 +9,14 @@ check_pivot_wider <- function(dataset) { duplicates <- dataset$traits %>% - select( + dplyr::select( dplyr::all_of(c("dataset_id", "trait_name", "value", "observation_id", "value_type", "repeat_measurements_id", "method_id", "method_context_id")) ) %>% tidyr::pivot_wider(names_from = "trait_name", values_from = "value", values_fn = length) %>% tidyr::pivot_longer(cols = 7:ncol(.)) %>% dplyr::rename(dplyr::all_of(c("trait_name" = "name", "number_of_duplicates" = "value"))) %>% - select( + dplyr::select( dplyr::all_of(c("dataset_id", "trait_name", "number_of_duplicates", "observation_id", "value_type")), everything() ) %>% @@ -61,7 +61,7 @@ db_traits_pivot_wider <- function(traits) { # A check for if there are more than 1 value_type for a given taxon_name, observation_id and method check_value_type <- traits %>% - select(dplyr::all_of(c( + dplyr::select(dplyr::all_of(c( "trait_name", "value", "dataset_id", "observation_id", "method_id", "method_context_id", "repeat_measurements_id", "value_type"))) %>% dplyr::group_by( diff --git a/R/process.R b/R/process.R index 49218571..7b0bf096 100644 --- a/R/process.R +++ b/R/process.R @@ -1954,7 +1954,7 @@ dataset_update_taxonomy <- function(austraits_raw, taxa) { stringr::word(.data$taxon_name, 1), .data$name_to_match_to) ) %>% # Remove `taxon_rank`, as it is about to be merged back in, but matches will now be possible to more rows - select(-dplyr::any_of(c("taxon_rank", "taxonomic_resolution"))) %>% + dplyr::select(-dplyr::any_of(c("taxon_rank", "taxonomic_resolution"))) %>% util_df_convert_character() %>% # Merge in all data from taxa dplyr::left_join(by = c("taxon_name"), @@ -2051,7 +2051,7 @@ check_pivot_duplicates <- function( # Check for duplicates database_object$traits %>% dplyr::filter(.data$dataset_id %in% dataset_ids) %>% - select( + dplyr::select( # `taxon_name` and `original_name` are not needed for pivoting but are included for informative purposes dplyr::all_of( c("dataset_id", "trait_name", "value", "taxon_name", "original_name", "observation_id", From 988717b91e4d1fea416a83140f9479dfb2d015e5 Mon Sep 17 00:00:00 2001 From: yangsophieee Date: Thu, 30 Nov 2023 14:51:56 +1100 Subject: [PATCH 10/10] Add more prefixes --- R/process.R | 10 ++++------ R/setup.R | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/R/process.R b/R/process.R index 7b0bf096..aecb374a 100644 --- a/R/process.R +++ b/R/process.R @@ -126,7 +126,7 @@ dataset_process <- function(filename_data_raw, ) traits <- traits %>% - mutate( + dplyr::mutate( location_id = ifelse(.data$entity_type == "species", NA_character_, .data$location_id) ) } @@ -147,7 +147,7 @@ dataset_process <- function(filename_data_raw, by = "location_id", locations %>% tidyr::pivot_wider(names_from = "location_property", values_from = "value") %>% - mutate(col_tmp = .data[[v]]) %>% + dplyr::mutate(col_tmp = .data[[v]]) %>% dplyr::select(dplyr::any_of(c("location_id", "col_tmp"))) %>% stats::na.omit() ) @@ -185,13 +185,11 @@ dataset_process <- function(filename_data_raw, process_format_contributors(dataset_id, schema) # Record sources - sources <- metadata$source %>% - lapply(util_list_to_bib) %>% purrr::reduce(c) + sources <- metadata$source %>% lapply(util_list_to_bib) %>% purrr::reduce(c) # Record methods methods <- process_format_methods(metadata, dataset_id, sources, contributors) - # Retrieve taxonomic details for known species taxonomic_updates <- traits %>% @@ -985,7 +983,7 @@ util_check_disallowed_chars <- function(object) { process_flag_unsupported_characters <- function(data) { data <- data %>% - mutate( + dplyr::mutate( error = ifelse(is.na(.data$error) & util_check_disallowed_chars(.data$value), "Value contains unsupported characters", .data$error) ) diff --git a/R/setup.R b/R/setup.R index ad5d6fa0..df350424 100644 --- a/R/setup.R +++ b/R/setup.R @@ -377,7 +377,7 @@ metadata_add_locations <- function(dataset_id, location_data, user_responses = N # Save and notify location_data <- location_data %>% dplyr::select(dplyr::all_of(c(location_name, keep))) %>% - distinct() + dplyr::distinct() # If user didn't select any variables to keep, so add defaults if (is.na(keep[1])) {