Remove dependency on {austraits} by importing plot (#121)

* Import plot_trait_distribution_beeswarm and dependencies * Add test for function --------- Co-authored-by: yangsophieee <[email protected]>
traitecoevo · Nov 10, 2023 · 791c8a5 · 791c8a5
1 parent a7e4730
commit 791c8a5
Show file tree

Hide file tree

Showing 10 changed files with 218 additions and 7 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -7,6 +7,7 @@ Authors@R: c(
     person(given = "Daniel", family = "Falster", role = c("cre", "aut"), email = "[email protected]", comment = c(ORCID = "0000-0002-9814-092X")),
     person(given = "Elizabeth", family = "Wenk", role = c("cur", "aut"), comment = c(ORCID = "0000-0001-5640-5910")),
     person(given = "Sophie", family = "Yang", role = c("cur", "aut"), comment = c(ORCID = "0000-0001-7328-345X")),
+    person(given = "Fonti", family = "Kar", role = c("aut", "ctb"), comment = c(ORCID = "0000-0002-2760-3974")),
     person("ARDC", role = c("fnd")),
     person("ARC", role = c("fnd"))
     )
@@ -28,6 +29,7 @@ Imports:
     kableExtra,
     magrittr,
     purrr,
+    forcats,
     RefManageR,
     rlang,
     rmarkdown,
@@ -40,7 +42,6 @@ Imports:
 Suggests:
     furrr,
     remake,
-    austraits,
     leaflet,
     bibtex,
     knitr,
@@ -49,10 +50,13 @@ Suggests:
     markdown,
     pkgdown,
     rcrossref,
+    ggplot2,
+    ggbeeswarm,
+    gridExtra, 
+    scales,
     zip,
     covr
 Remotes:
-    traitecoevo/austraits@develop,
     richfitz/remake
 Encoding: UTF-8
 VignetteBuilder: knitr

diff --git a/NAMESPACE b/NAMESPACE
@@ -30,6 +30,7 @@ export(metadata_exclude_observations)
 export(metadata_find_taxonomic_change)
 export(metadata_remove_taxonomic_change)
 export(metadata_update_taxonomic_change)
+export(plot_trait_distribution_beeswarm)
 export(read_csv_char)
 export(read_metadata)
 export(util_df_to_list)

diff --git a/R/plot_trait_beeswarm.R b/R/plot_trait_beeswarm.R
@@ -0,0 +1,156 @@
+#' @title Beeswarm Trait distribution
+#' @description Plots distribution of trait values by a  grouping variable using ggbeeswarm package
+#'
+#' @param austraits austraits data object
+#' @param trait_name Name of trait to plot
+#' @param y_axis_category One of `dataset_id`, `family`
+#' @param highlight specify a group to highlight
+#' @param hide_ids add label on y_axis?
+#'
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' austraits %>% plot_trait_distribution_beeswarm("wood_density", "dataset_id", "Westoby_2014")
+#' }
+#' @author Daniel Falster - [email protected]
+#' @export
+
+#
+plot_trait_distribution_beeswarm <- function(austraits, trait_name, y_axis_category, highlight=NA, hide_ids = FALSE) {
+
+  # Subset data to this trait
+  austraits_trait <-
+    austraits$traits %>% filter(trait_name == trait_name) %>%
+    mutate(value = as.numeric(.data$value))
+
+  my_shapes = c("_min" = 60, "_mean" = 16, "_max" =62, "unknown" = 18)
+
+  as_shape <- function(value_type) {
+    p <- rep("unknown", length(value_type))
+
+    p[grepl("mean", value_type)] <- "_mean" #16
+    p[grepl("min", value_type)] <- "_min" #60
+    p[grepl("max", value_type)] <- "_max" #62
+    factor(p, levels=names(my_shapes))
+  }
+
+  tax_info <- austraits$taxa
+
+  data <-
+    austraits_trait %>%
+    dplyr::mutate(shapes = as_shape(.data$value_type)) %>%
+    dplyr::left_join(by = "taxon_name", tax_info)
+
+  # Define grouping variables and derivatives
+  if(!y_axis_category %in% names(data)){
+    stop("Incorrect grouping variable! Currently implemented for `family` or `dataset_id`")
+  }
+
+  # define grouping variable, ordered by group-level by mean values
+  # use log_value where possible
+  if(min(data$value, na.rm=TRUE) > 0 ) {
+    data$value2 <- log10(data$value)
+  } else {
+    data$value2 <- data$value
+  }
+  data$Group = forcats::fct_reorder(data[[y_axis_category]], data$value2, na.rm=TRUE)
+
+  n_group <- levels(data$Group) %>% length()
+
+  # set colour to be alternating
+  data$colour = ifelse(data$Group %in% levels(data$Group)[seq(1, n_group, by=2)],
+                       "a", "b")
+
+  # set colour of group to highlight
+  if(!is.na(highlight) & highlight %in% data$Group) {
+    data <- dplyr::mutate(data, colour = ifelse(.data$Group %in% highlight, "c", .data$colour))
+  }
+
+  vals <- list(minimum = purrr::pluck(austraits, "definitions", trait_name, "allowed_values_min"),
+           maximum = purrr::pluck(austraits, "definitions", trait_name, "allowed_values_max"))
+
+  range <- (vals$maximum/vals$minimum)
+
+  # Check range on y-axis
+  y.text <- ifelse(n_group > 20, 0.75, 1)
+  heights = c(1, max(1, n_group/7))
+
+  # Top plot - plain histogram of data
+  p1 <-
+    ggplot2::ggplot(data, ggplot2::aes(x=.data$value)) +
+    ggplot2::geom_histogram(ggplot2::aes(y = ..density..), color="darkgrey", fill="darkgrey", bins=50) +
+    ggplot2::geom_density(color="black") +
+    ggplot2::xlab("") + ggplot2::ylab("All data") +
+    ggplot2::theme_bw()  +
+    ggplot2::theme(legend.position = "none",
+          panel.grid.minor = ggplot2::element_blank(),
+          panel.grid.major = ggplot2::element_blank(),
+          axis.ticks.y= ggplot2::element_blank(),
+          axis.text= ggplot2::element_blank(),
+          panel.background = ggplot2::element_blank()
+    )
+  # Second plot -- dots by groups, using ggbeeswarm package
+  p2 <-
+    ggplot2::ggplot(data, ggplot2::aes(x = .data$value, y = .data$Group, colour = .data$colour, shape = .data$shapes)) +
+    ggbeeswarm::geom_quasirandom(groupOnX=FALSE) +
+    ggplot2::ylab(paste("By ", y_axis_category)) +
+    # inclusion of custom shapes: for min, mean, unknown
+    # NB: this single line of code makes function about 4-5 slower for some reason
+    ggplot2::scale_shape_manual(values = my_shapes) +
+    ggplot2::theme_bw() +
+    ggplot2::theme(legend.position = "none",
+          panel.grid.major.x = ggplot2::element_blank(),
+          panel.grid.minor.x = ggplot2::element_blank(),
+          axis.text.x = ggplot2::element_text(size=ggplot2::rel(1.25)),
+          axis.text.y = ggplot2::element_text(size=ggplot2::rel(y.text))
+    ) #+
+   # guides(colour=FALSE)
+
+
+  if(hide_ids) {
+    p2 <- p2 + ggplot2::theme(axis.text.y = ggplot2::element_blank())
+  }
+
+  #Sourced from https://gist.github.com/bbolker/5ba6a37d64b06a176e320b2b696b6733
+  scientific_10 <- function(x,suppress_ones=TRUE) {
+    s <- scales::scientific_format()(x)
+    ## substitute for exact zeros
+    s[s=="0e+00"] <- "0"
+    ## regex: [+]?  = "zero or one occurrences of '+'"
+    s2 <- gsub("e[+]?", " %*% 10^", s )
+    ## suppress 1 x
+    if (suppress_ones) s2 <- gsub("1 %\\*% +","",s2)
+    parse(text=s2)
+  }
+
+  # Define scale on x-axis and transform to log if required
+  if(vals$minimum !=0 & range > 20) {
+    #log transformation
+    p1 <- p1 +
+      ggplot2::scale_x_log10(name="",
+                    breaks = scales::breaks_log(),
+                    labels = scientific_10,
+                    limits=c(vals$minimum, vals$maximum))
+    p2 <- p2 +
+      ggplot2::scale_x_log10(name=paste(trait_name, ' (', data$unit[1], ')'),
+                    breaks = scales::breaks_log(),
+                    labels = scientific_10,
+                    limits=c(vals$minimum, vals$maximum))
+  } else {
+    p1 <- p1 + ggplot2::scale_x_continuous(limits=c(vals$minimum, vals$maximum))
+    p2 <- p2 + ggplot2::scale_x_continuous(limits=c(vals$minimum, vals$maximum)) +
+      ggplot2::xlab(paste(trait_name, ' (', data$unit[1], ')'))
+
+  }
+
+  # combine plots
+  # Might be a better way to do this with other packages?
+
+  f <- function(x) {suppressWarnings(ggplot2::ggplot_gtable(ggplot2::ggplot_build(x)))}
+  p1 <- f(p1)
+  p2 <- f(p2)
+  # Fix width of second plot to be same as bottom using ggplot_table
+  p1$widths[2:3] <- p2$widths[2:3]
+  gridExtra::grid.arrange(p1, p2, nrow=2, widths=c(1), heights=heights)
+}
diff --git a/R/setup.R b/R/setup.R
@@ -966,11 +966,11 @@ metadata_add_taxonomic_changes_list <- function(dataset_id, taxonomic_updates) {
       ))
     }
     # Write new taxonomic updates to metadata
-    metadata$taxonomic_updates <- existing_updates %>% arrange(.data$find) %>% filter(!find == replace)
+    metadata$taxonomic_updates <- existing_updates %>% dplyr::arrange(.data$find) %>% filter(!.data$find == .data$replace)
   } else {
 
     # Read in dataframe of taxonomic changes, split into single-row lists, and add to metadata file
-    metadata$taxonomic_updates <- taxonomic_updates %>% filter(!find == replace)
+    metadata$taxonomic_updates <- taxonomic_updates %>% dplyr::filter(!.data$find == .data$replace)
 
   }
 

diff --git a/R/traits.build-package.R b/R/traits.build-package.R
@@ -11,6 +11,7 @@ NULL
 utils::globalVariables(
   c(
     ".",
-    ".data"
+    ".data",
+    "..density.."
   )
 )
diff --git a/inst/support/report_dataset.Rmd b/inst/support/report_dataset.Rmd
@@ -567,7 +567,7 @@ if (nrow(filter(data_study$excluded_data, error == "Value out of allowable range
     writeLines()
 }
 
- austraits::plot_trait_distribution_beeswarm(austraits, trait, "dataset_id", highlight = dataset_id, hide_ids = TRUE)
+ plot_trait_distribution_beeswarm(austraits, trait, "dataset_id", highlight = dataset_id, hide_ids = TRUE)
 
   writeLines(c(""))
 

diff --git a/man/plot_trait_distribution_beeswarm.Rd b/man/plot_trait_distribution_beeswarm.Rd
diff --git a/man/traits.build-package.Rd b/man/traits.build-package.Rd
diff --git a/tests/testthat/test-setup.R b/tests/testthat/test-setup.R
@@ -654,6 +654,9 @@ test_that("`build_setup_pipeline` is working", {
   expect_silent(suppressMessages(austraits_raw <- remake::make("database_raw")))
   expect_silent(suppressMessages(austraits <- remake::make("database")))
 
+  # Save output for future tests on database
+  saveRDS(austraits, "test_austraits.rds")
+
   # Test that austraits_raw has no version number or git_SHA
   expect_null(austraits_raw$build_info$version)
   expect_null(austraits_raw$build_info$git_SHA)
@@ -705,7 +708,7 @@ test_that("reports and plots are produced", {
   # Not testing right now
   #expect_no_error(
     #p <-
-      #austraits::plot_trait_distribution_beeswarm(
+      #plot_trait_distribution_beeswarm(
         #austraits, "huber_value", "dataset_id", highlight = "Test_2022", hide_ids = TRUE)
   #)
   expect_silent(

diff --git a/tests/testthat/test-usage.R b/tests/testthat/test-usage.R
@@ -0,0 +1,9 @@
+
+
+austraits <- readRDS("test_austraits.rds")
+# This file tests usage of the database
+# Note, requires existnec of "test_austraits.rds", generated from `test-process.R`
+
+test_that("plots", {
+  expect_invisible(suppressMessages(austraits %>% plot_trait_distribution_beeswarm("wood_density", "dataset_id", "Test_2022")))
+})