Skip to content

Commit

Permalink
Fix plot_trait_distribution_beeswarm error (#130)
Browse files Browse the repository at this point in the history
* Try fix error

* Remove browsers

* Fix error with `plot_trait_distribution_beeswarm`

* Rename file to plot.R

* Update documentation
  • Loading branch information
yangsophieee authored Nov 16, 2023
1 parent b5e9696 commit 256ab7a
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 45 deletions.
79 changes: 40 additions & 39 deletions R/plot_trait_beeswarm.R → R/plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#' @description Plots distribution of trait values by a grouping variable using ggbeeswarm package
#'
#' @param austraits austraits data object
#' @param trait_name Name of trait to plot
#' @param trait Name of trait to plot
#' @param y_axis_category One of `dataset_id`, `family`
#' @param highlight specify a group to highlight
#' @param hide_ids add label on y_axis?
Expand All @@ -17,22 +17,22 @@
#' @export

#
plot_trait_distribution_beeswarm <- function(austraits, trait_name, y_axis_category, highlight=NA, hide_ids = FALSE) {
plot_trait_distribution_beeswarm <- function(austraits, trait, y_axis_category, highlight = NA, hide_ids = FALSE) {

# Subset data to this trait
austraits_trait <-
austraits$traits %>% filter(trait_name == trait_name) %>%
mutate(value = as.numeric(.data$value))
austraits$traits %>% dplyr::filter(.data$trait_name == trait) %>%
dplyr::mutate(value = as.numeric(.data$value))

my_shapes = c("_min" = 60, "_mean" = 16, "_max" =62, "unknown" = 18)
my_shapes <- c("_min" = 60, "_mean" = 16, "_max" = 62, "unknown" = 18)

as_shape <- function(value_type) {
p <- rep("unknown", length(value_type))

p[grepl("mean", value_type)] <- "_mean" #16
p[grepl("min", value_type)] <- "_min" #60
p[grepl("max", value_type)] <- "_max" #62
factor(p, levels=names(my_shapes))
factor(p, levels = names(my_shapes))
}

tax_info <- austraits$taxa
Expand All @@ -43,57 +43,58 @@ plot_trait_distribution_beeswarm <- function(austraits, trait_name, y_axis_categ
dplyr::left_join(by = "taxon_name", tax_info)

# Define grouping variables and derivatives
if(!y_axis_category %in% names(data)){
if (!y_axis_category %in% names(data)) {
stop("Incorrect grouping variable! Currently implemented for `family` or `dataset_id`")
}

# define grouping variable, ordered by group-level by mean values
# use log_value where possible
if(min(data$value, na.rm=TRUE) > 0 ) {
if (min(data$value, na.rm = TRUE) > 0) {
data$value2 <- log10(data$value)
} else {
data$value2 <- data$value
}
data$Group = forcats::fct_reorder(data[[y_axis_category]], data$value2, na.rm=TRUE)

data$Group <- forcats::fct_reorder(data[[y_axis_category]], data$value2, na.rm = TRUE)

n_group <- levels(data$Group) %>% length()

# set colour to be alternating
data$colour = ifelse(data$Group %in% levels(data$Group)[seq(1, n_group, by=2)],
# Set colour to be alternating
data$colour <- ifelse(data$Group %in% levels(data$Group)[seq(1, n_group, by = 2)],
"a", "b")

# set colour of group to highlight
if(!is.na(highlight) & highlight %in% data$Group) {
# Set colour of group to highlight
if (!is.na(highlight) && highlight %in% data$Group) {
data <- dplyr::mutate(data, colour = ifelse(.data$Group %in% highlight, "c", .data$colour))
}

vals <- list(minimum = purrr::pluck(austraits, "definitions", trait_name, "allowed_values_min"),
maximum = purrr::pluck(austraits, "definitions", trait_name, "allowed_values_max"))
vals <- list(minimum = purrr::pluck(austraits, "definitions", trait, "allowed_values_min"),
maximum = purrr::pluck(austraits, "definitions", trait, "allowed_values_max"))

range <- (vals$maximum/vals$minimum)

# Check range on y-axis
y.text <- ifelse(n_group > 20, 0.75, 1)
heights = c(1, max(1, n_group/7))
heights <- c(1, max(1, n_group / 7))

# Top plot - plain histogram of data
p1 <-
ggplot2::ggplot(data, ggplot2::aes(x=.data$value)) +
ggplot2::geom_histogram(ggplot2::aes(y = ..density..), color="darkgrey", fill="darkgrey", bins=50) +
ggplot2::geom_density(color="black") +
ggplot2::ggplot(data, ggplot2::aes(x = .data$value)) +
ggplot2::geom_histogram(ggplot2::aes(y = ..density..), color = "darkgrey", fill = "darkgrey", bins = 50) +
ggplot2::geom_density(color = "black") +
ggplot2::xlab("") + ggplot2::ylab("All data") +
ggplot2::theme_bw() +
ggplot2::theme(legend.position = "none",
panel.grid.minor = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
axis.ticks.y= ggplot2::element_blank(),
axis.text= ggplot2::element_blank(),
axis.ticks.y = ggplot2::element_blank(),
axis.text = ggplot2::element_blank(),
panel.background = ggplot2::element_blank()
)
# Second plot -- dots by groups, using ggbeeswarm package
p2 <-
ggplot2::ggplot(data, ggplot2::aes(x = .data$value, y = .data$Group, colour = .data$colour, shape = .data$shapes)) +
ggbeeswarm::geom_quasirandom(groupOnX=FALSE) +
ggbeeswarm::geom_quasirandom(groupOnX = FALSE) +
ggplot2::ylab(paste("By ", y_axis_category)) +
# inclusion of custom shapes: for min, mean, unknown
# NB: this single line of code makes function about 4-5 slower for some reason
Expand All @@ -102,45 +103,45 @@ plot_trait_distribution_beeswarm <- function(austraits, trait_name, y_axis_categ
ggplot2::theme(legend.position = "none",
panel.grid.major.x = ggplot2::element_blank(),
panel.grid.minor.x = ggplot2::element_blank(),
axis.text.x = ggplot2::element_text(size=ggplot2::rel(1.25)),
axis.text.y = ggplot2::element_text(size=ggplot2::rel(y.text))
axis.text.x = ggplot2::element_text(size = ggplot2::rel(1.25)),
axis.text.y = ggplot2::element_text(size = ggplot2::rel(y.text))
) #+
# guides(colour=FALSE)


if(hide_ids) {
if (hide_ids) {
p2 <- p2 + ggplot2::theme(axis.text.y = ggplot2::element_blank())
}

#Sourced from https://gist.github.com/bbolker/5ba6a37d64b06a176e320b2b696b6733
scientific_10 <- function(x,suppress_ones=TRUE) {
scientific_10 <- function(x, suppress_ones = TRUE) {
s <- scales::scientific_format()(x)
## substitute for exact zeros
s[s=="0e+00"] <- "0"
s[s == "0e+00"] <- "0"
## regex: [+]? = "zero or one occurrences of '+'"
s2 <- gsub("e[+]?", " %*% 10^", s )
s2 <- gsub("e[+]?", " %*% 10^", s)
## suppress 1 x
if (suppress_ones) s2 <- gsub("1 %\\*% +","",s2)
parse(text=s2)
if (suppress_ones) s2 <- gsub("1 %\\*% +", "", s2)
parse(text = s2)
}

# Define scale on x-axis and transform to log if required
if(vals$minimum !=0 & range > 20) {
if (vals$minimum != 0 && range > 20) {
#log transformation
p1 <- p1 +
ggplot2::scale_x_log10(name="",
ggplot2::scale_x_log10(name = "",
breaks = scales::breaks_log(),
labels = scientific_10,
limits=c(vals$minimum, vals$maximum))
limits = c(vals$minimum, vals$maximum))
p2 <- p2 +
ggplot2::scale_x_log10(name=paste(trait_name, ' (', data$unit[1], ')'),
ggplot2::scale_x_log10(name = paste(trait, " (", data$unit[1], ")"),
breaks = scales::breaks_log(),
labels = scientific_10,
limits=c(vals$minimum, vals$maximum))
limits = c(vals$minimum, vals$maximum))
} else {
p1 <- p1 + ggplot2::scale_x_continuous(limits=c(vals$minimum, vals$maximum))
p2 <- p2 + ggplot2::scale_x_continuous(limits=c(vals$minimum, vals$maximum)) +
ggplot2::xlab(paste(trait_name, ' (', data$unit[1], ')'))
p1 <- p1 + ggplot2::scale_x_continuous(limits = c(vals$minimum, vals$maximum))
p2 <- p2 + ggplot2::scale_x_continuous(limits = c(vals$minimum, vals$maximum)) +
ggplot2::xlab(paste(trait, " (", data$unit[1], ")"))

}

Expand All @@ -152,5 +153,5 @@ plot_trait_distribution_beeswarm <- function(austraits, trait_name, y_axis_categ
p2 <- f(p2)
# Fix width of second plot to be same as bottom using ggplot_table
p1$widths[2:3] <- p2$widths[2:3]
gridExtra::grid.arrange(p1, p2, nrow=2, widths=c(1), heights=heights)
gridExtra::grid.arrange(p1, p2, nrow = 2, widths = c(1), heights = heights)
}
2 changes: 1 addition & 1 deletion inst/support/report_dataset.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ if (nrow(filter(data_study$excluded_data, error == "Value out of allowable range
writeLines()
}
plot_trait_distribution_beeswarm(austraits, trait, "dataset_id", highlight = dataset_id, hide_ids = TRUE)
traits.build::plot_trait_distribution_beeswarm(austraits, trait, "dataset_id", highlight = dataset_id, hide_ids = TRUE)
writeLines(c(""))
Expand Down
6 changes: 3 additions & 3 deletions man/plot_trait_distribution_beeswarm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test-setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -731,7 +731,7 @@ test_that("reports and plots are produced", {
# Not testing right now
#expect_no_error(
#p <-
#plot_trait_distribution_beeswarm(
#traits.build::plot_trait_distribution_beeswarm(
#austraits, "huber_value", "dataset_id", highlight = "Test_2022", hide_ids = TRUE)
#)
expect_silent(
Expand Down
4 changes: 3 additions & 1 deletion tests/testthat/test-usage.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,7 @@ austraits <- readRDS("test_austraits.rds")
# Note, requires existnec of "test_austraits.rds", generated from `test-process.R`

test_that("plots", {
expect_invisible(suppressMessages(austraits %>% plot_trait_distribution_beeswarm("wood_density", "dataset_id", "Test_2022")))
expect_invisible(suppressMessages(
austraits %>% traits.build::plot_trait_distribution_beeswarm("wood_density", "dataset_id", "Test_2022")
))
})

0 comments on commit 256ab7a

Please sign in to comment.