Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dataset tests #143

Merged
merged 12 commits into from
Nov 30, 2023
2 changes: 1 addition & 1 deletion R/process.R
Original file line number Diff line number Diff line change
Expand Up @@ -2050,7 +2050,7 @@ check_pivot_duplicates <- function(

# Check for duplicates
database_object$traits %>%
filter(.data$dataset_id %in% dataset_ids) %>%
dplyr::filter(.data$dataset_id %in% dataset_ids) %>%
select(
yangsophieee marked this conversation as resolved.
Show resolved Hide resolved
# `taxon_name` and `original_name` are not needed for pivoting but are included for informative purposes
dplyr::all_of(
Expand Down
12 changes: 6 additions & 6 deletions R/setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -900,15 +900,15 @@ metadata_add_taxonomic_change <- function(dataset_id, find, replace, reason, tax
} else {
message(sprintf(red("Existing substitution will be overwritten for ") %+% green("'%s'"), find))
data <- data %>%
filter(.data$find != to_add$find) %>%
dplyr::bind_rows(to_add) %>%
filter(!.data$find == replace) %>%
arrange(.data$find)
dplyr::filter(.data$find != to_add$find) %>%
dplyr::bind_rows(to_add) %>%
dplyr::filter(!.data$find == replace) %>%
arrange(.data$find)
}
} else {
data <- dplyr::bind_rows(data, to_add) %>%
filter(!.data$find == replace) %>%
arrange(.data$find)
dplyr::filter(!.data$find == replace) %>%
arrange(.data$find)
yangsophieee marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand Down
78 changes: 62 additions & 16 deletions R/testdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,27 @@ dataset_test_worker <-
process_format_contexts(dataset_id, data)
)

# Check that there are no duplicate `var_in` or `context_property` fields
context_properties <- sapply(metadata[["contexts"]], "[[", "context_property")
context_vars_in <- sapply(metadata[["contexts"]], "[[", "var_in")

expect_equal(
context_properties |> duplicated() |> sum(),
dfalster marked this conversation as resolved.
Show resolved Hide resolved
0,
info = sprintf(
"%s\tcontexts - duplicate `context_property` values detected: '%s'",
red(f),
paste(context_properties[duplicated(context_properties)], collapse = "', '"))
)
expect_equal(
context_vars_in |> duplicated() |> sum(),
0,
info = sprintf(
"%s\tcontexts - duplicate `var_in` values detected: '%s'",
red(f),
paste(context_vars_in[duplicated(context_vars_in)], collapse = "', '"))
)

# Check context details load
if (nrow(contexts) > 0) {

Expand All @@ -599,29 +620,30 @@ dataset_test_worker <-
info = paste0(red(f), "\tcontexts"), label = "field names"
)


# Check that unique context `value`'s only have one unique description
expect_equal(
contexts %>% dplyr::group_by(.data$context_property, .data$value) %>% dplyr::summarise(n = dplyr::n_distinct(.data$description)) %>%
filter(.data$n > 1) %>% nrow(),
dplyr::filter(.data$n > 1) %>% nrow(),
0, info = sprintf(
"%s\tcontexts - `value`'s should only have one unique description each: '%s'",
red(f),
paste(
contexts %>% dplyr::group_by(.data$context_property, .data$value) %>% dplyr::summarise(n = dplyr::n_distinct(.data$description)) %>%
filter(.data$n > 1) %>% dplyr::pull(.data$value) %>% unique(),
dplyr::filter(.data$n > 1) %>% dplyr::pull(.data$value) %>% unique(),
collapse = "', '")
)
)

# Check that there are no duplicate `find` fields
expect_equal(
contexts %>% dplyr::group_by(.data$context_property, .data$find) %>% dplyr::summarise(n = dplyr::n()) %>% filter(.data$n > 1) %>%
contexts %>% dplyr::group_by(.data$context_property, .data$find) %>% dplyr::summarise(n = dplyr::n()) %>% dplyr::filter(.data$n > 1) %>%
nrow(),
0, info = sprintf(
"%s\tcontexts - duplicate `find` values detected: '%s'",
red(f),
paste(
contexts %>% dplyr::group_by(.data$context_property, .data$find) %>% dplyr::summarise(n = dplyr::n()) %>% filter(.data$n > 1) %>%
contexts %>% dplyr::group_by(.data$context_property, .data$find) %>% dplyr::summarise(n = dplyr::n()) %>% dplyr::filter(.data$n > 1) %>%
dplyr::pull(.data$find) %>% unique(),
collapse = "', '")
)
Expand Down Expand Up @@ -682,6 +704,7 @@ dataset_test_worker <-
}

## Traits

expect_list_elements_contains_names(
metadata[["traits"]],
c("var_in", "unit_in", "trait_name", "value_type", "basis_of_value"),
Expand All @@ -707,8 +730,31 @@ dataset_test_worker <-
label = "`trait_name`'s"
)

# Check units are found in `unit_conversions.csv`
units <- read_csv("config/unit_conversions.csv")
expect_is_in(
traits$unit_in, units$unit_from,
info = paste0(red(f), "\ttraits"),
label = "`unit_in`'s"
)

# Check no duplicate `var_in`'s

expect_equal(
traits %>% dplyr::group_by(.data$var_in) %>% dplyr::summarise(n = dplyr::n()) %>%
dplyr::filter(.data$n > 1) %>% nrow(),
0,
info = sprintf(
"%s\ttraits - duplicate `var_in` values detected: '%s'",
red(f),
paste(
traits %>% dplyr::group_by(.data$var_in) %>% dplyr::summarise(n = dplyr::n()) %>%
dplyr::filter(.data$n > 1) %>% dplyr::pull(.data$var_in) %>% unique(),
collapse = "', '")
)
)


# Now that traits loaded, check details of contexts match
if (nrow(contexts > 0)) {

Expand All @@ -723,7 +769,7 @@ dataset_test_worker <-

for (j in unique(contexts[["var_in"]])) {

contextsub <- contexts %>% filter(var_in == j)
contextsub <- contexts %>% dplyr::filter(var_in == j)

# Context values align either with a column of data or a column of traits table
if (is.null(data[[j]])) {
Expand Down Expand Up @@ -852,13 +898,14 @@ dataset_test_worker <-

# First check no duplicate combinations of `find`
expect_equal(
x[[trait]] %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% filter(.data$n > 1) %>% nrow(),
x[[trait]] %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>%
dplyr::filter(.data$n > 1) %>% nrow(),
0, info = sprintf(
"%s\tsubstitutions - duplicate `find` values detected: '%s'",
red(f),
paste(
x[[trait]] %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% filter(.data$n > 1) %>%
dplyr::pull(.data$find) %>% unique(),
x[[trait]] %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>%
dplyr::filter(.data$n > 1) %>% dplyr::pull(.data$find) %>% unique(),
collapse = "', '")
)
)
Expand Down Expand Up @@ -898,12 +945,12 @@ dataset_test_worker <-

# Check no duplicate `find` values
expect_equal(
x %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% filter(.data$n > 1) %>% nrow(),
x %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% dplyr::filter(.data$n > 1) %>% nrow(),
0, info = sprintf(
"%s\ttaxonomic_updates - duplicate `find` values detected: '%s'",
red(f),
paste(
x %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% filter(.data$n > 1) %>%
x %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% dplyr::filter(.data$n > 1) %>%
dplyr::pull(.data$find) %>% unique(),
collapse = "', '")
)
Expand Down Expand Up @@ -1005,13 +1052,13 @@ dataset_test_worker <-
# Check no duplicate `find` values
expect_equal(
x %>% dplyr::group_by(.data$variable, .data$find) %>%
dplyr::summarise(n = dplyr::n()) %>% filter(.data$n > 1) %>% nrow(),
dplyr::summarise(n = dplyr::n()) %>% dplyr::filter(.data$n > 1) %>% nrow(),
0, info = sprintf(
"%s\texclude_observations - duplicate `find` values detected: '%s'",
red(f),
paste(
x %>% dplyr::group_by(.data$variable, .data$find) %>% dplyr::summarise(n = dplyr::n()) %>%
filter(.data$n > 1) %>% dplyr::pull(.data$find) %>% unique(),
dplyr::filter(.data$n > 1) %>% dplyr::pull(.data$find) %>% unique(),
collapse = "', '")
)
)
Expand Down Expand Up @@ -1061,9 +1108,8 @@ dataset_test_worker <-
} else {

# For wide datasets, expect variables in traits are headers in the data
values <- names(data)
expect_is_in(
traits[["var_in"]], values,
traits[["var_in"]], names(data),
info = paste0(red(files[2]), "\ttraits"), label = "`var_in`"
)

Expand All @@ -1083,10 +1129,10 @@ dataset_test_worker <-

## Check traits are not only NAs
yangsophieee marked this conversation as resolved.
Show resolved Hide resolved
expect_false(
nrow(metadata[["traits"]] %>% util_list_to_df2() %>% dplyr::filter(!is.na(.data$trait_name))) == 0,
nrow(traits %>% dplyr::filter(!is.na(.data$trait_name))) == 0,
info = paste0(red(f), "\ttraits - only contain NA `trait_name`'s"))

if (nrow(metadata[["traits"]] %>% util_list_to_df2() %>% dplyr::filter(!is.na(.data$trait_name))) > 0) {
if (nrow(traits %>% dplyr::filter(!is.na(.data$trait_name))) > 0) {

# Test build dataset
expect_no_error(
Expand Down
Loading