From d43eae0eb3250b59d2f36cc25050c7d80d00fdef Mon Sep 17 00:00:00 2001
From: yangsophieee <sophie.yang917@gmail.com>
Date: Thu, 23 Nov 2023 18:13:15 +1100
Subject: [PATCH 01/10] Update testdata.R

---
 R/testdata.R | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/R/testdata.R b/R/testdata.R
index a9170c56..e698ecee 100644
--- a/R/testdata.R
+++ b/R/testdata.R
@@ -682,6 +682,7 @@ dataset_test_worker <-
         }
 
         ## Traits
+
         expect_list_elements_contains_names(
           metadata[["traits"]],
           c("var_in", "unit_in", "trait_name", "value_type", "basis_of_value"),
@@ -707,7 +708,10 @@ dataset_test_worker <-
           label = "`trait_name`'s"
         )
 
-        # Check no duplicate `var_in`'s
+        # Check units are found in `unit_conversions.csv` #TODO
+
+        # Check no duplicate `var_in`'s #TODO
+        # For both traits and contexts
 
         # Now that traits loaded, check details of contexts match
         if (nrow(contexts > 0)) {
@@ -1061,9 +1065,8 @@ dataset_test_worker <-
         } else {
 
           # For wide datasets, expect variables in traits are headers in the data
-          values <- names(data)
           expect_is_in(
-            traits[["var_in"]], values,
+            traits[["var_in"]], names(data),
             info = paste0(red(files[2]), "\ttraits"), label = "`var_in`"
           )
 
@@ -1083,10 +1086,10 @@ dataset_test_worker <-
 
         ## Check traits are not only NAs
         expect_false(
-          nrow(metadata[["traits"]] %>% util_list_to_df2() %>% dplyr::filter(!is.na(.data$trait_name))) == 0,
+          nrow(traits %>% dplyr::filter(!is.na(.data$trait_name))) == 0,
           info = paste0(red(f), "\ttraits - only contain NA `trait_name`'s"))
 
-        if (nrow(metadata[["traits"]] %>% util_list_to_df2() %>% dplyr::filter(!is.na(.data$trait_name))) > 0) {
+        if (nrow(traits %>% dplyr::filter(!is.na(.data$trait_name))) > 0) {
 
           # Test build dataset
           expect_no_error(

From 9da197baaf907fa7eced62881ecc311f2fbc00bb Mon Sep 17 00:00:00 2001
From: yangsophieee <sophie.yang917@gmail.com>
Date: Thu, 23 Nov 2023 18:20:35 +1100
Subject: [PATCH 02/10] Add test for checking units

---
 R/testdata.R | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/R/testdata.R b/R/testdata.R
index e698ecee..de5aeaf4 100644
--- a/R/testdata.R
+++ b/R/testdata.R
@@ -709,6 +709,12 @@ dataset_test_worker <-
         )
 
         # Check units are found in `unit_conversions.csv` #TODO
+        units <- read_csv("config/unit_conversions.csv")
+        expect_is_in(
+          traits$unit_in, units$unit_from,
+          info = paste0(red(f), "\ttraits"),
+          label = "`unit_in`'s"
+        )
 
         # Check no duplicate `var_in`'s #TODO
         # For both traits and contexts

From 51e5406893ae204ca062f5138b4e78cc8ab6a946 Mon Sep 17 00:00:00 2001
From: yangsophieee <sophie.yang917@gmail.com>
Date: Thu, 23 Nov 2023 18:46:55 +1100
Subject: [PATCH 03/10] Add tests for checking no duplicate `var_in` or
 `context_property` in contexts and no duplicate `var_in`'s in traits

---
 R/testdata.R | 49 +++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 43 insertions(+), 6 deletions(-)

diff --git a/R/testdata.R b/R/testdata.R
index de5aeaf4..06dcaba2 100644
--- a/R/testdata.R
+++ b/R/testdata.R
@@ -590,6 +590,27 @@ dataset_test_worker <-
             process_format_contexts(dataset_id, data)
         )
 
+        # Check that there are no duplicate `var_in` or `context_property` fields
+        context_properties <- sapply(metadata[["contexts"]], "[[", "context_property")
+        context_vars_in <- sapply(metadata[["contexts"]], "[[", "var_in")
+
+        expect_equal(
+          context_properties |> duplicated() |> sum(),
+          0,
+          info = sprintf(
+            "%s\tcontexts - duplicate `context_property` values detected: '%s'",
+            red(f),
+            paste(context_properties[duplicated(context_properties)], collapse = "', '"))
+        )
+        expect_equal(
+          context_vars_in |> duplicated() |> sum(),
+          0,
+          info =  sprintf(
+            "%s\tcontexts - duplicate `var_in` values detected: '%s'",
+            red(f),
+            paste(context_vars_in[duplicated(context_vars_in)], collapse = "', '"))
+        )
+
         # Check context details load
         if (nrow(contexts) > 0) {
 
@@ -599,6 +620,7 @@ dataset_test_worker <-
             info = paste0(red(f), "\tcontexts"), label = "field names"
           )
 
+
           # Check that unique context `value`'s only have one unique description
           expect_equal(
             contexts %>% dplyr::group_by(.data$context_property, .data$value) %>% dplyr::summarise(n = dplyr::n_distinct(.data$description)) %>%
@@ -708,7 +730,7 @@ dataset_test_worker <-
           label = "`trait_name`'s"
         )
 
-        # Check units are found in `unit_conversions.csv` #TODO
+        # Check units are found in `unit_conversions.csv`
         units <- read_csv("config/unit_conversions.csv")
         expect_is_in(
           traits$unit_in, units$unit_from,
@@ -716,8 +738,22 @@ dataset_test_worker <-
           label = "`unit_in`'s"
         )
 
-        # Check no duplicate `var_in`'s #TODO
-        # For both traits and contexts
+        # Check no duplicate `var_in`'s
+
+        expect_equal(
+          traits %>% dplyr::group_by(.data$var_in) %>% dplyr::summarise(n = dplyr::n()) %>%
+            filter(.data$n > 1) %>% nrow(),
+          0,
+          info = sprintf(
+            "%s\ttraits - duplicate `var_in` values detected: '%s'",
+            red(f),
+            paste(
+              traits %>% dplyr::group_by(.data$var_in) %>% dplyr::summarise(n = dplyr::n()) %>%
+                filter(.data$n > 1) %>% dplyr::pull(.data$var_in) %>% unique(),
+              collapse = "', '")
+          )
+        )
+
 
         # Now that traits loaded, check details of contexts match
         if (nrow(contexts > 0)) {
@@ -862,13 +898,14 @@ dataset_test_worker <-
 
             # First check no duplicate combinations of `find`
             expect_equal(
-              x[[trait]] %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% filter(.data$n > 1) %>% nrow(),
+              x[[trait]] %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>%
+                filter(.data$n > 1) %>% nrow(),
               0, info = sprintf(
                 "%s\tsubstitutions - duplicate `find` values detected: '%s'",
                 red(f),
                 paste(
-                  x[[trait]] %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% filter(.data$n > 1) %>%
-                    dplyr::pull(.data$find) %>% unique(),
+                  x[[trait]] %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>%
+                    filter(.data$n > 1) %>% dplyr::pull(.data$find) %>% unique(),
                   collapse = "', '")
               )
             )

From 22cef1352503175aa2a1cc99218fa03c306ac599 Mon Sep 17 00:00:00 2001
From: yangsophieee <sophie.yang917@gmail.com>
Date: Fri, 24 Nov 2023 12:33:51 +1100
Subject: [PATCH 04/10] Add prefixes to filter()

---
 R/process.R  |  2 +-
 R/setup.R    | 12 ++++++------
 R/testdata.R | 26 +++++++++++++-------------
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/R/process.R b/R/process.R
index 8e85e67c..d774e177 100644
--- a/R/process.R
+++ b/R/process.R
@@ -2050,7 +2050,7 @@ check_pivot_duplicates <- function(
 
   # Check for duplicates
   database_object$traits %>%
-    filter(.data$dataset_id %in% dataset_ids) %>%
+    dplyr::filter(.data$dataset_id %in% dataset_ids) %>%
     select(
       # `taxon_name` and `original_name` are not needed for pivoting but are included for informative purposes
       dplyr::all_of(
diff --git a/R/setup.R b/R/setup.R
index 0786365b..c4b1debd 100644
--- a/R/setup.R
+++ b/R/setup.R
@@ -900,15 +900,15 @@ metadata_add_taxonomic_change <- function(dataset_id, find, replace, reason, tax
       } else {
         message(sprintf(red("Existing substitution will be overwritten for ") %+% green("'%s'"), find))
         data <- data %>%
-                  filter(.data$find != to_add$find) %>%
-                  dplyr::bind_rows(to_add) %>%
-                  filter(!.data$find == replace) %>%
-                  arrange(.data$find)
+          dplyr::filter(.data$find != to_add$find) %>%
+          dplyr::bind_rows(to_add) %>%
+          dplyr::filter(!.data$find == replace) %>%
+          arrange(.data$find)
       }
     } else {
       data <- dplyr::bind_rows(data, to_add) %>%
-            filter(!.data$find == replace) %>%
-            arrange(.data$find)
+        dplyr::filter(!.data$find == replace) %>%
+        arrange(.data$find)
     }
   }
 
diff --git a/R/testdata.R b/R/testdata.R
index 06dcaba2..8e1bd10b 100644
--- a/R/testdata.R
+++ b/R/testdata.R
@@ -624,26 +624,26 @@ dataset_test_worker <-
           # Check that unique context `value`'s only have one unique description
           expect_equal(
             contexts %>% dplyr::group_by(.data$context_property, .data$value) %>% dplyr::summarise(n = dplyr::n_distinct(.data$description)) %>%
-              filter(.data$n > 1) %>% nrow(),
+              dplyr::filter(.data$n > 1) %>% nrow(),
             0, info = sprintf(
               "%s\tcontexts - `value`'s should only have one unique description each: '%s'",
               red(f),
               paste(
                 contexts %>% dplyr::group_by(.data$context_property, .data$value) %>% dplyr::summarise(n = dplyr::n_distinct(.data$description)) %>%
-                  filter(.data$n > 1) %>% dplyr::pull(.data$value) %>% unique(),
+                  dplyr::filter(.data$n > 1) %>% dplyr::pull(.data$value) %>% unique(),
                 collapse = "', '")
             )
           )
 
           # Check that there are no duplicate `find` fields
           expect_equal(
-            contexts %>% dplyr::group_by(.data$context_property, .data$find) %>% dplyr::summarise(n = dplyr::n()) %>% filter(.data$n > 1) %>%
+            contexts %>% dplyr::group_by(.data$context_property, .data$find) %>% dplyr::summarise(n = dplyr::n()) %>% dplyr::filter(.data$n > 1) %>%
               nrow(),
             0, info = sprintf(
               "%s\tcontexts - duplicate `find` values detected: '%s'",
               red(f),
               paste(
-                contexts %>% dplyr::group_by(.data$context_property, .data$find) %>% dplyr::summarise(n = dplyr::n()) %>% filter(.data$n > 1) %>%
+                contexts %>% dplyr::group_by(.data$context_property, .data$find) %>% dplyr::summarise(n = dplyr::n()) %>% dplyr::filter(.data$n > 1) %>%
                   dplyr::pull(.data$find) %>% unique(),
                 collapse = "', '")
             )
@@ -742,14 +742,14 @@ dataset_test_worker <-
 
         expect_equal(
           traits %>% dplyr::group_by(.data$var_in) %>% dplyr::summarise(n = dplyr::n()) %>%
-            filter(.data$n > 1) %>% nrow(),
+            dplyr::filter(.data$n > 1) %>% nrow(),
           0,
           info = sprintf(
             "%s\ttraits - duplicate `var_in` values detected: '%s'",
             red(f),
             paste(
               traits %>% dplyr::group_by(.data$var_in) %>% dplyr::summarise(n = dplyr::n()) %>%
-                filter(.data$n > 1) %>% dplyr::pull(.data$var_in) %>% unique(),
+                dplyr::filter(.data$n > 1) %>% dplyr::pull(.data$var_in) %>% unique(),
               collapse = "', '")
           )
         )
@@ -769,7 +769,7 @@ dataset_test_worker <-
 
           for (j in unique(contexts[["var_in"]])) {
 
-            contextsub <- contexts %>% filter(var_in == j)
+            contextsub <- contexts %>% dplyr::filter(var_in == j)
 
             # Context values align either with a column of data or a column of traits table
             if (is.null(data[[j]])) {
@@ -899,13 +899,13 @@ dataset_test_worker <-
             # First check no duplicate combinations of `find`
             expect_equal(
               x[[trait]] %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>%
-                filter(.data$n > 1) %>% nrow(),
+                dplyr::filter(.data$n > 1) %>% nrow(),
               0, info = sprintf(
                 "%s\tsubstitutions - duplicate `find` values detected: '%s'",
                 red(f),
                 paste(
                   x[[trait]] %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>%
-                    filter(.data$n > 1) %>% dplyr::pull(.data$find) %>% unique(),
+                    dplyr::filter(.data$n > 1) %>% dplyr::pull(.data$find) %>% unique(),
                   collapse = "', '")
               )
             )
@@ -945,12 +945,12 @@ dataset_test_worker <-
 
           # Check no duplicate `find` values
           expect_equal(
-            x %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% filter(.data$n > 1) %>% nrow(),
+            x %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% dplyr::filter(.data$n > 1) %>% nrow(),
             0, info = sprintf(
               "%s\ttaxonomic_updates - duplicate `find` values detected: '%s'",
               red(f),
               paste(
-                x %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% filter(.data$n > 1) %>%
+                x %>% dplyr::group_by(.data$find) %>% dplyr::summarise(n = dplyr::n()) %>% dplyr::filter(.data$n > 1) %>%
                   dplyr::pull(.data$find) %>% unique(),
                 collapse = "', '")
             )
@@ -1052,13 +1052,13 @@ dataset_test_worker <-
           # Check no duplicate `find` values
           expect_equal(
             x %>% dplyr::group_by(.data$variable, .data$find) %>%
-              dplyr::summarise(n = dplyr::n()) %>% filter(.data$n > 1) %>% nrow(),
+              dplyr::summarise(n = dplyr::n()) %>% dplyr::filter(.data$n > 1) %>% nrow(),
             0, info = sprintf(
               "%s\texclude_observations - duplicate `find` values detected: '%s'",
               red(f),
               paste(
                 x %>% dplyr::group_by(.data$variable, .data$find) %>% dplyr::summarise(n = dplyr::n()) %>%
-                  filter(.data$n > 1) %>% dplyr::pull(.data$find) %>% unique(),
+                  dplyr::filter(.data$n > 1) %>% dplyr::pull(.data$find) %>% unique(),
                 collapse = "', '")
             )
           )

From f39674d82c81371d9988f1cb2d985e8c6843690d Mon Sep 17 00:00:00 2001
From: yangsophieee <sophie.yang917@gmail.com>
Date: Fri, 24 Nov 2023 17:51:28 +1100
Subject: [PATCH 05/10] Add `dplyr::` prefixes

---
 R/pivot.R    | 2 +-
 R/process.R  | 6 +++---
 R/testdata.R | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/pivot.R b/R/pivot.R
index cd742750..0e834856 100644
--- a/R/pivot.R
+++ b/R/pivot.R
@@ -20,7 +20,7 @@ check_pivot_wider <- function(dataset) {
       dplyr::all_of(c("dataset_id", "trait_name", "number_of_duplicates", "observation_id",
       "value_type")), everything()
     ) %>%
-    filter(.data$number_of_duplicates > 1) %>%
+    dplyr::filter(.data$number_of_duplicates > 1) %>%
     nrow()
 
   if (duplicates == 0) {
diff --git a/R/process.R b/R/process.R
index d774e177..28a6ef1e 100644
--- a/R/process.R
+++ b/R/process.R
@@ -561,7 +561,7 @@ process_create_observation_id <- function(data, metadata) {
   if (!is.null(traits_table[["repeat_measurements_id"]])) {
 
     to_add_id <- traits_table %>%
-      filter(.data$repeat_measurements_id == TRUE) %>%
+      dplyr::filter(.data$repeat_measurements_id == TRUE) %>%
       dplyr::pull(.data$trait_name)
 
     i <- !is.na(data$value) & data$trait_name %in% to_add_id &
@@ -2060,10 +2060,10 @@ check_pivot_duplicates <- function(
     tidyr::pivot_wider(names_from = "trait_name", values_from = "value", values_fn = length) %>%
     tidyr::pivot_longer(cols = 9:ncol(.)) %>%
     dplyr::rename(dplyr::all_of(c("trait_name" = "name", "number_of_duplicates" = "value"))) %>%
-    select(
+    dplyr::select(
       dplyr::all_of(c("dataset_id", "trait_name", "number_of_duplicates",
       "taxon_name", "original_name", "observation_id", "value_type")), everything()
     ) %>%
-    filter(.data$number_of_duplicates > 1)
+    dplyr::filter(.data$number_of_duplicates > 1)
 
 }
diff --git a/R/testdata.R b/R/testdata.R
index 8e1bd10b..cdb26467 100644
--- a/R/testdata.R
+++ b/R/testdata.R
@@ -1077,7 +1077,7 @@ dataset_test_worker <-
               expect_is_in(
                 find_values,
                 # Extract values from the data for that variable
-                parsed_data %>% filter(.data$trait_name == variable) %>% dplyr::pull(.data$value) %>% unique(),
+                parsed_data %>% dplyr::filter(.data$trait_name == variable) %>% dplyr::pull(.data$value) %>% unique(),
                 info = paste0(red(f), "\texclude_observations"), label = sprintf("variable '%s'", variable)
               )
 

From e41e336578e3e5028e1bf0d2f21ece99d48208fd Mon Sep 17 00:00:00 2001
From: yangsophieee <sophie.yang917@gmail.com>
Date: Wed, 29 Nov 2023 14:36:36 +1100
Subject: [PATCH 06/10] Fix `write_metadata` adding spaces to `custom_R_code`

---
 R/utils.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/utils.R b/R/utils.R
index 819b000a..50157204 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -268,12 +268,13 @@ write_metadata <- function(data, path, style_code = FALSE) {
   if (!is.na(data$dataset$custom_R_code)) {
 
     code <- data$dataset$custom_R_code
+    code <- stringr::str_trim(code, side = "left")
 
     if (style_code)
       code <- code %>% suppressWarnings(styler::style_text(transformers = .data$tidyverse_style(strict = TRUE)))
 
     txt <- gsub("custom_R_code: .na", code %>% paste(collapse = "\n") %>%
-                  paste0("custom_R_code:", .), txt, fixed = TRUE)
+                  paste0("custom_R_code: ", .), txt, fixed = TRUE)
   }
 
   if (!stringr::str_sub(txt, nchar(txt)) == "\n")

From 228ba100ca19d26db32a997490548c5c6bc18a67 Mon Sep 17 00:00:00 2001
From: yangsophieee <sophie.yang917@gmail.com>
Date: Thu, 30 Nov 2023 14:35:10 +1100
Subject: [PATCH 07/10] Fix comment

---
 R/testdata.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/testdata.R b/R/testdata.R
index cdb26467..fcd90058 100644
--- a/R/testdata.R
+++ b/R/testdata.R
@@ -1127,7 +1127,7 @@ dataset_test_worker <-
 
 
 
-        ## Check traits are not only NAs
+        ## Check that not all trait names are NAs
         expect_false(
           nrow(traits %>% dplyr::filter(!is.na(.data$trait_name))) == 0,
           info = paste0(red(f), "\ttraits - only contain NA `trait_name`'s"))

From 9a996f005de7ac03082487050a4f0b030aaf9868 Mon Sep 17 00:00:00 2001
From: yangsophieee <sophie.yang917@gmail.com>
Date: Thu, 30 Nov 2023 14:45:55 +1100
Subject: [PATCH 08/10] Add tidyverse prefixes

---
 R/pivot.R                                        | 2 +-
 R/process.R                                      | 8 ++++----
 R/setup.R                                        | 6 +++---
 inst/support/report_dataset.Rmd                  | 6 +++---
 tests/testthat/examples/Test_2023_3/metadata.yml | 2 +-
 5 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/R/pivot.R b/R/pivot.R
index 0e834856..09474612 100644
--- a/R/pivot.R
+++ b/R/pivot.R
@@ -68,7 +68,7 @@ db_traits_pivot_wider <- function(traits) {
       .data$dataset_id, .data$observation_id, .data$method_id,
       .data$method_context_id, .data$repeat_measurements_id) %>%
     dplyr::summarise(n_value_type = length(unique(.data$value_type))) %>%
-    arrange(.data$observation_id) %>%
+    dplyr::arrange(.data$observation_id) %>%
     dplyr::filter(.data$n_value_type > 1)
 
   if (nrow(check_value_type) > 1) {
diff --git a/R/process.R b/R/process.R
index 1a54f6c7..49218571 100644
--- a/R/process.R
+++ b/R/process.R
@@ -803,12 +803,12 @@ process_create_context_ids <- function(data, contexts) {
 
   contexts_finished <-
     contexts %>%
-    filter(!is.na(.data$value)) %>%
+    dplyr::filter(!is.na(.data$value)) %>%
     dplyr::left_join(
       id_link %>% dplyr::bind_rows(),
       by = c("context_property", "category", "value")
     ) %>%
-    distinct(dplyr::across(-dplyr::any_of("find")))
+    dplyr::distinct(dplyr::across(-dplyr::any_of("find")))
 
   list(
     contexts = contexts_finished %>% util_df_convert_character(),
@@ -1866,8 +1866,8 @@ build_combine <- function(..., d = list(...)) {
   metadata[["contributors"]] <-
     contributors %>%
     dplyr::select(-dplyr::any_of(c("dataset_id", "additional_role"))) %>%
-    distinct() %>%
-    arrange(.data$last_name, .data$given_name) %>%
+    dplyr::distinct() %>%
+    dplyr::arrange(.data$last_name, .data$given_name) %>%
     util_df_to_list()
 
   ret <- list(traits = combine("traits", d),
diff --git a/R/setup.R b/R/setup.R
index c4b1debd..ad5d6fa0 100644
--- a/R/setup.R
+++ b/R/setup.R
@@ -903,12 +903,12 @@ metadata_add_taxonomic_change <- function(dataset_id, find, replace, reason, tax
           dplyr::filter(.data$find != to_add$find) %>%
           dplyr::bind_rows(to_add) %>%
           dplyr::filter(!.data$find == replace) %>%
-          arrange(.data$find)
+          dplyr::arrange(.data$find)
       }
     } else {
       data <- dplyr::bind_rows(data, to_add) %>%
         dplyr::filter(!.data$find == replace) %>%
-        arrange(.data$find)
+        dplyr::arrange(.data$find)
     }
   }
 
@@ -967,7 +967,7 @@ metadata_add_taxonomic_changes_list <- function(dataset_id, taxonomic_updates) {
       ))
     }
     # Write new taxonomic updates to metadata
-    metadata$taxonomic_updates <- existing_updates %>% dplyr::arrange(.data$find) %>% filter(!.data$find == .data$replace)
+    metadata$taxonomic_updates <- existing_updates %>% dplyr::arrange(.data$find) %>% dplyr::filter(!.data$find == .data$replace)
   } else {
 
     # Read in dataframe of taxonomic changes, split into single-row lists, and add to metadata file
diff --git a/inst/support/report_dataset.Rmd b/inst/support/report_dataset.Rmd
index c9bb0888..f42db845 100644
--- a/inst/support/report_dataset.Rmd
+++ b/inst/support/report_dataset.Rmd
@@ -255,9 +255,9 @@ new_question("(section `dataset`) Can you provide more detailed information for
 
 ```{r, results='asis', echo=FALSE}
 missing <- metadata$dataset %>%
- util_list_to_df1() %>%
- filter(value == "unknown") %>%
- pull(key)
+  util_list_to_df1() %>%
+  filter(value == "unknown") %>%
+  pull(key)
 
 for (v in missing) {
   sprintf("(section `dataset`) Can you provide missing details for the variable `%s`?\n", v) %>%
diff --git a/tests/testthat/examples/Test_2023_3/metadata.yml b/tests/testthat/examples/Test_2023_3/metadata.yml
index 6211a1ff..2ffa033a 100644
--- a/tests/testthat/examples/Test_2023_3/metadata.yml
+++ b/tests/testthat/examples/Test_2023_3/metadata.yml
@@ -91,7 +91,7 @@ dataset:
       arrange(category) %>%
       distinct() %>%
       mutate(entity_measured = ifelse(is.na(entity_measured), "unknown", entity_measured))
-  '
+    '
   collection_date: unknown/2022
   taxon_name: taxon_name
   trait_name: trait_name

From fe4eb5b60be0c9c1b5a570f290304c8cf88b1d10 Mon Sep 17 00:00:00 2001
From: yangsophieee <sophie.yang917@gmail.com>
Date: Thu, 30 Nov 2023 14:49:11 +1100
Subject: [PATCH 09/10] Add more prefixes

---
 R/pivot.R   | 6 +++---
 R/process.R | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/pivot.R b/R/pivot.R
index 09474612..cfd1c87d 100644
--- a/R/pivot.R
+++ b/R/pivot.R
@@ -9,14 +9,14 @@
 check_pivot_wider <- function(dataset) {
 
   duplicates <- dataset$traits %>%
-    select(
+    dplyr::select(
       dplyr::all_of(c("dataset_id", "trait_name", "value", "observation_id", "value_type",
       "repeat_measurements_id", "method_id", "method_context_id"))
     ) %>%
     tidyr::pivot_wider(names_from = "trait_name", values_from = "value", values_fn = length) %>%
     tidyr::pivot_longer(cols = 7:ncol(.)) %>%
     dplyr::rename(dplyr::all_of(c("trait_name" = "name", "number_of_duplicates" = "value"))) %>%
-    select(
+    dplyr::select(
       dplyr::all_of(c("dataset_id", "trait_name", "number_of_duplicates", "observation_id",
       "value_type")), everything()
     ) %>%
@@ -61,7 +61,7 @@ db_traits_pivot_wider <- function(traits) {
 
   # A check for if there are more than 1 value_type for a given taxon_name, observation_id and method
   check_value_type <- traits %>%
-    select(dplyr::all_of(c(
+    dplyr::select(dplyr::all_of(c(
       "trait_name", "value", "dataset_id", "observation_id", "method_id", "method_context_id",
       "repeat_measurements_id", "value_type"))) %>%
     dplyr::group_by(
diff --git a/R/process.R b/R/process.R
index 49218571..7b0bf096 100644
--- a/R/process.R
+++ b/R/process.R
@@ -1954,7 +1954,7 @@ dataset_update_taxonomy <- function(austraits_raw, taxa) {
                                 stringr::word(.data$taxon_name, 1), .data$name_to_match_to)
     ) %>%
     # Remove `taxon_rank`, as it is about to be merged back in, but matches will now be possible to more rows
-    select(-dplyr::any_of(c("taxon_rank", "taxonomic_resolution"))) %>%
+    dplyr::select(-dplyr::any_of(c("taxon_rank", "taxonomic_resolution"))) %>%
     util_df_convert_character() %>%
     # Merge in all data from taxa
     dplyr::left_join(by = c("taxon_name"),
@@ -2051,7 +2051,7 @@ check_pivot_duplicates <- function(
   # Check for duplicates
   database_object$traits %>%
     dplyr::filter(.data$dataset_id %in% dataset_ids) %>%
-    select(
+    dplyr::select(
       # `taxon_name` and `original_name` are not needed for pivoting but are included for informative purposes
       dplyr::all_of(
         c("dataset_id", "trait_name", "value", "taxon_name", "original_name", "observation_id",

From 988717b91e4d1fea416a83140f9479dfb2d015e5 Mon Sep 17 00:00:00 2001
From: yangsophieee <sophie.yang917@gmail.com>
Date: Thu, 30 Nov 2023 14:51:56 +1100
Subject: [PATCH 10/10] Add more prefixes

---
 R/process.R | 10 ++++------
 R/setup.R   |  2 +-
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/R/process.R b/R/process.R
index 7b0bf096..aecb374a 100644
--- a/R/process.R
+++ b/R/process.R
@@ -126,7 +126,7 @@ dataset_process <- function(filename_data_raw,
       )
     traits <-
       traits %>%
-      mutate(
+      dplyr::mutate(
         location_id = ifelse(.data$entity_type == "species", NA_character_, .data$location_id)
       )
   }
@@ -147,7 +147,7 @@ dataset_process <- function(filename_data_raw,
             by = "location_id",
             locations %>%
               tidyr::pivot_wider(names_from = "location_property", values_from = "value") %>%
-              mutate(col_tmp = .data[[v]]) %>%
+              dplyr::mutate(col_tmp = .data[[v]]) %>%
               dplyr::select(dplyr::any_of(c("location_id", "col_tmp"))) %>%
               stats::na.omit()
           )
@@ -185,13 +185,11 @@ dataset_process <- function(filename_data_raw,
     process_format_contributors(dataset_id, schema)
 
   # Record sources
-  sources <- metadata$source %>%
-            lapply(util_list_to_bib) %>% purrr::reduce(c)
+  sources <- metadata$source %>% lapply(util_list_to_bib) %>% purrr::reduce(c)
 
   # Record methods
   methods <- process_format_methods(metadata, dataset_id, sources, contributors)
 
-
   # Retrieve taxonomic details for known species
   taxonomic_updates <-
     traits %>%
@@ -985,7 +983,7 @@ util_check_disallowed_chars <- function(object) {
 process_flag_unsupported_characters <- function(data) {
 
   data <- data %>%
-    mutate(
+    dplyr::mutate(
       error = ifelse(is.na(.data$error) & util_check_disallowed_chars(.data$value),
       "Value contains unsupported characters", .data$error)
     )
diff --git a/R/setup.R b/R/setup.R
index ad5d6fa0..df350424 100644
--- a/R/setup.R
+++ b/R/setup.R
@@ -377,7 +377,7 @@ metadata_add_locations <- function(dataset_id, location_data, user_responses = N
   # Save and notify
   location_data <-  location_data %>%
     dplyr::select(dplyr::all_of(c(location_name, keep))) %>%
-    distinct()
+    dplyr::distinct()
 
   # If user didn't select any variables to keep, so add defaults
   if (is.na(keep[1])) {