From 96b6abcf178f87d56b75b90cba9ba4ab89542f1a Mon Sep 17 00:00:00 2001 From: Sophie Yang <78716301+yangsophieee@users.noreply.github.com> Date: Thu, 12 Oct 2023 18:02:24 +1100 Subject: [PATCH] Fix time data types in contexts getting reformatted (#88) - Fixes #49 - Add test for substituting a time value - Update context categories for `metadata_add_contexts` - Convert context values to character type, preventing time data types from converting weirdly when converted to yaml in `write_metadata` - Remove "time" values from `dataset_test` checks of allowable categorical values - Add a note to the user when time data types get reformatted - Partial solution to #87 where substitutions for time variables (e.g. `flowering_time`, `fruiting_time`) fail `dataset_test` --- R/process.R | 18 ++++++++++-------- R/setup.R | 6 ++++-- R/testdata.R | 18 +++++++++--------- tests/testthat/examples/Test_2023_1/data.csv | 2 +- .../testthat/examples/Test_2023_1/metadata.yml | 3 +++ .../examples/Test_2023_1/output/traits.csv | 1 + 6 files changed, 28 insertions(+), 20 deletions(-) diff --git a/R/process.R b/R/process.R index c6496d8f..0683eccf 100644 --- a/R/process.R +++ b/R/process.R @@ -687,9 +687,9 @@ process_create_context_ids <- function(data, contexts) { dplyr::select(dplyr::all_of(c("context_property", "category", "value"))) %>% dplyr::distinct() - categories <- c( - "plot_context", "treatment_context", "entity_context", - "temporal_context", "method_context") %>% + categories <- + c("plot_context", "treatment_context", "entity_context", + "temporal_context", "method_context") %>% subset(., . %in% tmp$category) ids <- dplyr::tibble(.rows = nrow(context_cols)) @@ -1031,12 +1031,12 @@ process_flag_unsupported_values <- function(data, definitions) { ii <- data[["trait_name"]] == trait # Only Y,N - i <- ii & is.na(data[["error"]]) & !grepl("^[YyNn]+$", data[["value"]]) + i <- ii & is.na(data[["error"]]) & !grepl("^[YyNn]+$", data[["value"]]) data <- data %>% dplyr::mutate(error = ifelse(i, "Time can only contain Y & Ns", .data$error)) # Must be length 12 - i <- ii & is.na(data[["error"]]) & stringr::str_length(data[["value"]]) != 12 + i <- ii & is.na(data[["error"]]) & stringr::str_length(data[["value"]]) != 12 data <- data %>% dplyr::mutate(error = ifelse(i, "Times must be length 12", .data$error)) } @@ -1482,15 +1482,17 @@ process_parse_data <- function(data, dataset_id, metadata, contexts, schema) { # Implement any value changes as per substitutions if (!is.na(metadata[["substitutions"]][1])) { - substitutions_table <- util_list_to_df2(metadata[["substitutions"]]) %>% + substitutions_table <- util_list_to_df2(metadata[["substitutions"]]) %>% dplyr::mutate( find = tolower(.data$find), replace = tolower(.data$replace) ) for (i in seq_len(nrow(substitutions_table))) { - j <- which(out[["trait_name"]] == substitutions_table[["trait_name"]][i] & - out[["value"]] == substitutions_table[["find"]][i]) + j <- which( + out[["trait_name"]] == substitutions_table[["trait_name"]][i] & + out[["value"]] == substitutions_table[["find"]][i] + ) if (length(j) > 0) { out[["value"]][j] <- substitutions_table[["replace"]][i] diff --git a/R/setup.R b/R/setup.R index add26453..ff82cad4 100644 --- a/R/setup.R +++ b/R/setup.R @@ -470,14 +470,16 @@ metadata_add_contexts <- function(dataset_id, overwrite = FALSE, user_responses var_in <- metadata_user_select_names( paste("Indicate all columns that contain additional contextual data for ", dataset_id), v) - categories <- c("treatment", "plot", "temporal", "method", "entity_context") + categories <- + c("treatment_context", "plot_context", "temporal_context", + "method_context", "entity_context") for (i in seq_along(var_in)) { ii <- n_existing + i category <- metadata_user_select_names( paste("What category does context", var_in[i], "fit in?"), categories) - context_values <- data[[var_in[i]]] %>% unique() %>% na.omit() + context_values <- data[[var_in[i]]] %>% unique() %>% na.omit() %>% as.character() message(sprintf("\tThe following values exist for this context: %s", context_values %>% paste(collapse = ", "))) diff --git a/R/testdata.R b/R/testdata.R index b89797db..4cbb72f8 100644 --- a/R/testdata.R +++ b/R/testdata.R @@ -598,12 +598,13 @@ dataset_test_worker <- # Look for context values in `find` column i <- v %in% contextsub[["find"]] - expect_true(all(i), - info = paste0( - f, - " - context names from data file not present in metadata contexts: ", - v[!i] - ) + expect_true( + all(i), + info = ifelse( + "hms" %in% class(v), + sprintf("%s - context names from data file not present in metadata contexts: %s\n\n'%s' has been detected as a time data type and reformatted\n\t-> Please make sure context metadata matches reformatting", f, v[!i], j), + sprintf("%s - context names from data file not present in metadata contexts: %s", f, v[!i]) + ) ) } } @@ -661,9 +662,8 @@ dataset_test_worker <- if (!is.null(definitions$elements[[trait]]) && definitions$elements[[trait]]$type == "categorical") { to_check <- x[[trait]]$replace %>% unique() - allowable <- - c(definitions$elements[[trait]]$allowed_values_levels %>% names(), - NA) + to_check <- to_check[!(grepl("^[YyNn]+$", to_check) & stringr::str_length(to_check) == 12)] + allowable <- c(definitions$elements[[trait]]$allowed_values_levels %>% names(), NA) failing <- to_check[!( is.na(to_check) | to_check %in% allowable | diff --git a/tests/testthat/examples/Test_2023_1/data.csv b/tests/testthat/examples/Test_2023_1/data.csv index 4955f521..016324a7 100644 --- a/tests/testthat/examples/Test_2023_1/data.csv +++ b/tests/testthat/examples/Test_2023_1/data.csv @@ -30,7 +30,7 @@ Melicope elleryana,melell,Atherton,0.075,0.0268,6955,0.346,0.58,1.61,,4201,2860, Neolitsea dealbata,neodea,Atherton,0.093,0.0164,5228,0.352,0.58,176.1,1686,2747,3049,7504,,,,geophyte,,bottom slope,,9:00:00,instrument 3,,,4,cm,,,,,, Polyscias australiana,polaus,Atherton,0.079,0.0154,6806,0.397,0.42,8.35,,6008,,6700,,,nnnnnyyyynnnnnnnnnn,tree,female,dune crest,,9:00:00,instrument 3,,4,4,cm,,,,,, Psychotria sp Utchee Creek,psyspp,Atherton,0.108,0.0179,11157,0.582,0,23.09,,4977,2302,2037,,,,trees,female,dune crest,,,,10,6,4,cm,,,,,, -Rhodomyrtus trineura,rhotri,Atherton,0.129,0.0111,3401,0.763,0.34,,4321,3803,6088,6877,,,,tree,male,,,,,,9,4,cm,,,,,, +Rhodomyrtus trineura,rhotri,Atherton,0.129,0.0111,3401,0.763,0.34,,4321,3803,6088,6877,,,7:00:00,tree,male,,,,,,9,4,cm,,,,,, Acmena graveolens,acmgra,Cape Tribulation,0.151,0.0155,5246,0.599,0.67,,,3057,2667,7156,,,,,male,,,,,1,10,4,cm,,,,,, Aleurites rockinghamensis,aleroc,Cape Tribulation,0.113,0.0184,73984,0.28,0.02,7077,,4194,,6209,,,summer,,male,,,,,1,2,4,cm,,,,,, Alstonia scholaris,alssch,Cape Tribulation,0.107,0.0223,6182,0.361,0.46,1.53,,1854,1682,3932,,,not observed,herbs,male,,,,,1,2,4,cm,,,,,, diff --git a/tests/testthat/examples/Test_2023_1/metadata.yml b/tests/testthat/examples/Test_2023_1/metadata.yml index e2d4b04a..d864bfd9 100644 --- a/tests/testthat/examples/Test_2023_1/metadata.yml +++ b/tests/testthat/examples/Test_2023_1/metadata.yml @@ -531,6 +531,9 @@ substitutions: - trait_name: flowering_time find: not observed replace: .na +- trait_name: flowering_time + find: 7:00:00 + replace: nnnnnnnnnnny taxonomic_updates: - find: Homalanthus novoguineensis replace: Homalanthus novo-guineensis diff --git a/tests/testthat/examples/Test_2023_1/output/traits.csv b/tests/testthat/examples/Test_2023_1/output/traits.csv index bf16c016..ae57dfe1 100644 --- a/tests/testthat/examples/Test_2023_1/output/traits.csv +++ b/tests/testthat/examples/Test_2023_1/output/traits.csv @@ -483,6 +483,7 @@ Test_2023_1,Quassia baileyana,130,leaf_N_per_dry_mass,23,mg/g,population,mean,me Test_2023_1,Quassia baileyana,130,leaf_area,9624,mm2,population,mean,measurement,3,field,adult,04,,,,,02,,,,2004/2004,from Falster_2005_1,01,,Quassia baileyana Test_2023_1,Quassia baileyana,130,leaf_mass_per_area,86,g/m2,population,mean,measurement,3,field,adult,04,,,,,02,,,,2004/2004,from Falster_2005_1,01,,Quassia baileyana Test_2023_1,Quassia baileyana,130,wood_density,0.501,mg/mm3,population,mean,measurement,3,field,adult,04,,,,,02,,,,2004/2004,from Falster_2005_1,01,,Quassia baileyana +Test_2023_1,Rhodomyrtus trineura,132,flowering_time,nnnnnnnnnnny,,individual,mode,expert_score,3,field,adult,02,,,,,01,15,,,2004/2004,from Falster_2005_1,01,,Rhodomyrtus trineura Test_2023_1,Rhodomyrtus trineura,132,huber_value,0.00014541224371092,mm2{sapwood}/mm2{leaf},individual,raw,model_derived,5,field,adult,02,,,,,01,15,,,2002/2002,made-up measurement remarks,04,01,Rhodomyrtus trineura Test_2023_1,Rhodomyrtus trineura,132,huber_value,0.00014541224371092,mm2{sapwood}/mm2{leaf},individual,mean,model_derived,5,field,adult,02,,,,,01,15,,,2002/2002,made-up measurement remarks,05,,Rhodomyrtus trineura Test_2023_1,Rhodomyrtus trineura,132,leaf_length,40,mm,individual,mean,measurement,3,field,adult,02,,,,,01,15,,,2004/2004,from Falster_2005_1,01,,Rhodomyrtus trineura