Add March entry

VIS-SIG · Sep 9, 2024 · db5d597 · db5d597
1 parent a8e63e6
commit db5d597
Show file tree

Hide file tree

Showing 58 changed files with 19,042 additions and 9 deletions.
diff --git a/_posts/2024-09-09-wonderful-wednesdays-march-2024/code/MicrobiomeChallenge - T M.R b/_posts/2024-09-09-wonderful-wednesdays-march-2024/code/MicrobiomeChallenge - T M.R
@@ -0,0 +1,149 @@
+# Microbiome Challenge
+
+# WonderfulWednesday
+
+library(tidyverse)
+library(stringr)
+library(scales)
+library(RColorBrewer)
+library(glue)
+library(janitor)
+library(readr)
+
+root_dir <- here::here()
+tues_dir <- file.path(root_dir,
+                      "microbiome")
+tt_theme <- "microbiome"
+author_initials <- "TomMarlow"
+azTidyTuesday_date <- "20240228"
+
+# Relative abundance of each microbial community in each woman in pregnancy and the perinatal period
+ra <- file.path(tues_dir, "202402_Collaboration_DataVizESIG-BiomarkersESIG_relative_abundance.csv")
+# Taxonomy information of each microbial community
+ti <- file.path(tues_dir, "202402_Collaboration_DataVizESIG-BiomarkersESIG_taxonomy_info.csv")
+# Clinical information of each woman (simulated data)
+sm <- file.path(tues_dir, "202402_Collaboration_DataVizESIG-BiomarkersESIG_simulated_metadata.csv")
+
+rel_dat <- read.csv(ra, sep = ";")
+sim_dat <- read.csv(sm, sep = ";")
+tax_dat <- read.csv(ti, sep = ";")
+
+rel_id <- rel_dat %>%
+  pivot_longer(.,
+               cols = 2:ncol(.),
+               names_to = "id",
+               values_to = "rel_abu",
+               values_drop_na = TRUE) %>%
+  mutate(sample_id = substr(id, 2, nchar(id))) %>%
+  filter(rel_abu != 0)
+
+# Merge to get audio characteristics
+
+microb <- merge(x = rel_id, 
+                y = sim_dat, 
+              by.x = c("sample_id"), 
+              by.y = c("sample_id"), 
+              all.y = FALSE) 
+
+
+# Taxonomy details
+
+micro_tax <- merge(x = microb, 
+                y = tax_dat, 
+                by.x = c("taxID"), 
+                by.y = c("taxID"), 
+                all.y = FALSE) %>%
+  mutate(abu = as.numeric(gsub(",", ".", rel_abu)))
+
+table(micro_tax$phylum, micro_tax$group)
+table(micro_tax$class, micro_tax$group)
+
+table(sim_dat$subject_id)
+
+msd_pat <- micro_tax %>%
+  group_by(group, subject_id, visit_number, class) %>%
+  summarise(suma = sum(abu)) %>%
+  mutate(sump = suma * 100) %>%
+  mutate(visits = if_else(visit_number >= 5, 5, visit_number)) %>%
+  mutate(Class = gsub("c__", "", class))
+
+unique(msd_pat$Class)
+
+cls_avg <- msd_pat %>%
+  group_by(Class) %>%
+  summarise(mean = mean(sump)) %>%
+  arrange(-mean)
+
+cls_pop <- as.data.frame(table(msd_pat$Class)) %>%
+  arrange(-Freq) %>%
+  rename(Class = Var1)
+
+msd_pat <- micro_tax %>%
+  group_by(group, subject_id, visit_number, phylum) %>%
+  summarise(suma = sum(abu)) %>%
+  mutate(sump = suma * 100) %>%
+  mutate(visits = if_else(visit_number >= 5, 5, visit_number)) %>%
+  mutate(Phylum = gsub("p__", "", phylum))
+write.csv(msd_pat, file.path(tues_dir, "pat.csv"))
+
+phy_avg <- msd_pat %>%
+  group_by(Phylum) %>%
+  summarise(mean = mean(sump)) %>%
+  arrange(-mean)
+
+phy_pop <- as.data.frame(table(msd_pat$Phylum)) %>%
+  arrange(-Freq) %>%
+  rename(Phylum = Var1)
+
+# Remove low frequency phylums / classes
+
+phy_hi <- c(unlist(phy_pop$Phylum[phy_pop$Freq >= 20]))
+cls_hi <- c(unlist(cls_pop$Class[cls_pop$Freq >= 1000]))
+
+
+# Line plot
+
+lin_dat <- msd_pat %>%
+  group_by(Class, group, visit_number) %>%
+  summarise(meanab = mean(sump)) %>%
+  mutate(Class = factor(Class,
+                         levels = cls_avg$Class,
+                         labels = cls_avg$Class)) %>%
+  filter(Class %in% cls_hi) 
+
+line_fig <- ggplot() +
+  geom_line(
+    data = lin_dat, linewidth = 2,
+    aes(
+      x = visit_number, y = meanab, group = group,
+      colour = group
+    )
+  ) +
+  xlab("") +
+  ylab(NULL) +
+  labs(
+    title = "Mean relative abundance by microbiome class, term and visit number",
+    caption = "Multi-Omic Microbiome Study: Pregnancy Initiative (MOMS-PI) is part of the longitudinal Human Microbiome Project (HMP)",
+    colour = "Term",
+    x = "Visit number",
+    y = "Mean abundance %"
+  ) +
+  theme_bw() +
+  theme(
+    strip.text.x = element_text(size = 10),
+    axis.title = element_text(size = 10),
+    axis.text.y = element_text(size = 8),
+    axis.text.x = element_text(size = 8, angle = 0, hjust = .5),
+    legend.position = "bottom"
+  ) +
+  scale_color_viridis_d(option = "viridis") +
+  scale_fill_viridis_d(option = "viridis") +
+  facet_wrap(. ~ Class)
+
+line_fig
+
+ggsave(
+  plot = line_fig,
+  filename = glue("{root_dir}/azTidyTuesday_{azTidyTuesday_date}_{tt_theme}_{author_initials}_{Sys.Date()}.jpg"),
+  width = 16,
+  height = 9)
diff --git a/...s/2024-09-09-wonderful-wednesdays-march-2024/code/WW_Mar24_SteveMallett - Steve Mallett.R b/...s/2024-09-09-wonderful-wednesdays-march-2024/code/WW_Mar24_SteveMallett - Steve Mallett.R
@@ -0,0 +1,173 @@
+library(readr)
+library(tidyr)
+library(tidyverse)
+library(ggplot2)
+library(cowplot)
+library(RColorBrewer)
+library(paletteer)
+library(colorspace)
+
+
+################################################
+# Combine abundance, taxonomy and clinical data
+
+clin1 <- read_csv2("./Data/Metadata.csv") %>%
+  arrange(subject_id, visit_number)
+
+tax <- read_csv2("./Data/Taxonomy_info.csv")
+
+abun1 <- read_csv2("./Data/Relative_abundance.csv") %>%
+  gather(key = "sample_id", value = "Value", -taxID) %>%
+  group_by(taxID, sample_id) %>%
+  filter(Value>0)
+
+abun3 <- left_join(clin1, abun1, by = "sample_id") 
+abun4 <- left_join(tax, abun3, by = "taxID") %>% 
+  arrange(subject_id, visit_number, taxID)
+
+################################################
+# Identify most prevalent taxIDs and subset data
+
+num_gen <- 6
+
+tax_prev <- abun4 %>% 
+  group_by(genus) %>% 
+  summarise(mn = mean(Value)) %>% 
+  arrange(desc(mn)) %>% 
+  select(genus) 
+
+tax_prev2 <- tax_prev[1:num_gen,] %>% 
+  as_vector()
+
+abun5 <- abun4 %>% 
+  filter(genus %in% tax_prev2)
+
+################################################
+# Groups subjects by most prevalent genus
+
+overall_most.prev<- abun5 %>% 
+  group_by(genus) %>% 
+  summarise(sum = sum(Value)) %>% 
+  arrange(desc(sum)) %>% 
+  select(genus) %>% 
+  mutate(sort.order = row_number())
+
+################################################
+# Create plots
+
+myColors <- brewer.pal(num_gen,"Set1")
+names(myColors) <- levels(overall_most.prev$genus)
+
+make.plot <- function(gp, vs) {
+
+  tmp <- abun5 %>% 
+    filter(group == gp & visit_number == vs) %>% 
+    group_by(subject_id, genus) %>% 
+    summarise(sum = sum(Value)) %>% 
+    arrange(subject_id, desc(sum))
+  # select(subject_id, genus, Value)
+
+  sub_most.prev <- tmp %>% 
+    group_by(subject_id, genus) %>% 
+    arrange(subject_id, desc(sum)) %>% 
+    group_by(subject_id) %>%
+    slice(1) %>% 
+    select(subject_id, genus)
+
+  sub_most.prev2 <- left_join(sub_most.prev, overall_most.prev, by = "genus") %>% 
+    ungroup() %>% 
+    select(subject_id, sort.order)
+
+  tmp2 <- left_join(tmp, sub_most.prev2, by = "subject_id")
+
+  subID <- tmp2 %>% 
+    group_by(sort.order, subject_id) %>% 
+    distinct(subject_id) %>% 
+    arrange(sort.order) %>% 
+    ungroup() %>% 
+    mutate(new.subjid = row_number()) %>% 
+    select(subject_id, new.subjid)
+
+  tmp3 <- left_join(tmp2, subID, by = "subject_id") 
+
+  ggplot(tmp3, aes(fill=genus, y=sum, x=new.subjid)) + 
+    geom_bar(show.legend = TRUE,
+             position="fill", stat="identity", width=1, color="black", linewidth=0.1) +
+    coord_polar(theta="x") +
+    scale_fill_manual(name = "genus", values = myColors) +
+    scale_y_continuous(limits=c(-0.2, 1)) +
+    theme(
+      panel.background=element_rect(fill="white"),
+      axis.text=element_blank(),
+      axis.ticks=element_blank(),
+      axis.title=element_blank(),
+      legend.text=element_text(size=14),
+      legend.title=element_text(size=14)
+    ) +
+    annotate("text", label = str_c("Visit ", vs), x = 0, y = -0.2, color = "black", size=5)
+}
+
+p1 <- make.plot("Preterm delivery", 1)
+p2 <- make.plot("Preterm delivery", 2)
+p3 <- make.plot("Preterm delivery", 3)
+p4 <- make.plot("Preterm delivery", 4)
+p5 <- make.plot("Term delivery", 1)
+p6 <- make.plot("Term delivery", 2)
+p7 <- make.plot("Term delivery", 3)
+p8 <- make.plot("Term delivery", 4)
+
+p_ptd <- plot_grid(
+  p1 + theme(legend.position="none"),
+  p2 + theme(legend.position="none"),
+  p3 + theme(legend.position="none"),
+  p4 + theme(legend.position="none"),
+  align = 'vh',
+  hjust = -1,
+  ncol = 4,
+  nrow = 1
+)
+
+p_td <- plot_grid(
+  p5 + theme(legend.position="none"),
+  p6 + theme(legend.position="none"),
+  p7 + theme(legend.position="none"),
+  p8 + theme(legend.position="none"),
+  align = 'vh',
+  labels = c("  ", " ", " ", " "),
+  hjust = -1,
+  ncol = 4,
+  nrow = 1
+)
+
+################################################
+# Formatting and output to file
+
+title0 <- ggdraw() +
+  draw_label("Longitudinal Human Microbiome Project (MOMS-PI Pre-Term Birth Study)\n", size = 20, color = "blue")
+
+title1 <- ggdraw() +
+  draw_label("Pre-Term Delivery", size = 22)
+
+title2 <- ggdraw() + 
+  draw_label("Term Delivery", size = 22)
+
+legend <- get_legend(
+  p2 + theme(legend.box.margin = margin(0, 0, 0, 12))
+)
+
+cap <- "How to read this chart: Each wheel represents a set of samples from the Pre-Term or Term Delivery cohort at a particular visit. Each spoke in the wheel represents a single participant in the study, with coloured segments representing the relative abundance
+of bacteria types (genus) in the sample. For simplicity, only the most prevalent genus types and the first four visits are included. Approximately 75-80% of samples had Lactobacillus as the dominant genus, with no obvious difference between visits or cohorts."
+
+caption <- ggdraw() + 
+  draw_label(cap, size = 14)
+
+ptd2 <- plot_grid(title1, p_ptd, ncol=1, rel_heights = c(1.7, 25), align = "v")  
+td2 <- plot_grid(title2, p_td, ncol=1, rel_heights = c(1.7, 25), align = "v") 
+all1 <- plot_grid(ptd2, td2, ncol=1, rel_heights = c(1, 1), align = "v") 
+all2 <- plot_grid(all1, legend, rel_widths = c(3, 0.5))
+all3 <- plot_grid(title0, all2, caption, ncol=1, rel_heights = c(8, 100, 5), align = "v") +
+  theme(plot.background = element_rect(fill = "white", colour = "blue"))
+
+ggsave("./WW_Mar24_01_OkabeIto.png", all3, width=24, height=12, dpi=300)
+
+################################################