-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
58 changed files
with
19,042 additions
and
9 deletions.
There are no files selected for viewing
149 changes: 149 additions & 0 deletions
149
_posts/2024-09-09-wonderful-wednesdays-march-2024/code/MicrobiomeChallenge - T M.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
# Microbiome Challenge | ||
|
||
# WonderfulWednesday | ||
|
||
library(tidyverse) | ||
library(stringr) | ||
library(scales) | ||
library(RColorBrewer) | ||
library(glue) | ||
library(janitor) | ||
library(readr) | ||
|
||
root_dir <- here::here() | ||
tues_dir <- file.path(root_dir, | ||
"microbiome") | ||
tt_theme <- "microbiome" | ||
author_initials <- "TomMarlow" | ||
azTidyTuesday_date <- "20240228" | ||
|
||
# Relative abundance of each microbial community in each woman in pregnancy and the perinatal period | ||
ra <- file.path(tues_dir, "202402_Collaboration_DataVizESIG-BiomarkersESIG_relative_abundance.csv") | ||
# Taxonomy information of each microbial community | ||
ti <- file.path(tues_dir, "202402_Collaboration_DataVizESIG-BiomarkersESIG_taxonomy_info.csv") | ||
# Clinical information of each woman (simulated data) | ||
sm <- file.path(tues_dir, "202402_Collaboration_DataVizESIG-BiomarkersESIG_simulated_metadata.csv") | ||
|
||
rel_dat <- read.csv(ra, sep = ";") | ||
sim_dat <- read.csv(sm, sep = ";") | ||
tax_dat <- read.csv(ti, sep = ";") | ||
|
||
rel_id <- rel_dat %>% | ||
pivot_longer(., | ||
cols = 2:ncol(.), | ||
names_to = "id", | ||
values_to = "rel_abu", | ||
values_drop_na = TRUE) %>% | ||
mutate(sample_id = substr(id, 2, nchar(id))) %>% | ||
filter(rel_abu != 0) | ||
|
||
# Merge to get audio characteristics | ||
|
||
microb <- merge(x = rel_id, | ||
y = sim_dat, | ||
by.x = c("sample_id"), | ||
by.y = c("sample_id"), | ||
all.y = FALSE) | ||
|
||
|
||
# Taxonomy details | ||
|
||
micro_tax <- merge(x = microb, | ||
y = tax_dat, | ||
by.x = c("taxID"), | ||
by.y = c("taxID"), | ||
all.y = FALSE) %>% | ||
mutate(abu = as.numeric(gsub(",", ".", rel_abu))) | ||
|
||
table(micro_tax$phylum, micro_tax$group) | ||
table(micro_tax$class, micro_tax$group) | ||
|
||
table(sim_dat$subject_id) | ||
|
||
msd_pat <- micro_tax %>% | ||
group_by(group, subject_id, visit_number, class) %>% | ||
summarise(suma = sum(abu)) %>% | ||
mutate(sump = suma * 100) %>% | ||
mutate(visits = if_else(visit_number >= 5, 5, visit_number)) %>% | ||
mutate(Class = gsub("c__", "", class)) | ||
|
||
unique(msd_pat$Class) | ||
|
||
cls_avg <- msd_pat %>% | ||
group_by(Class) %>% | ||
summarise(mean = mean(sump)) %>% | ||
arrange(-mean) | ||
|
||
cls_pop <- as.data.frame(table(msd_pat$Class)) %>% | ||
arrange(-Freq) %>% | ||
rename(Class = Var1) | ||
|
||
msd_pat <- micro_tax %>% | ||
group_by(group, subject_id, visit_number, phylum) %>% | ||
summarise(suma = sum(abu)) %>% | ||
mutate(sump = suma * 100) %>% | ||
mutate(visits = if_else(visit_number >= 5, 5, visit_number)) %>% | ||
mutate(Phylum = gsub("p__", "", phylum)) | ||
write.csv(msd_pat, file.path(tues_dir, "pat.csv")) | ||
|
||
phy_avg <- msd_pat %>% | ||
group_by(Phylum) %>% | ||
summarise(mean = mean(sump)) %>% | ||
arrange(-mean) | ||
|
||
phy_pop <- as.data.frame(table(msd_pat$Phylum)) %>% | ||
arrange(-Freq) %>% | ||
rename(Phylum = Var1) | ||
|
||
# Remove low frequency phylums / classes | ||
|
||
phy_hi <- c(unlist(phy_pop$Phylum[phy_pop$Freq >= 20])) | ||
cls_hi <- c(unlist(cls_pop$Class[cls_pop$Freq >= 1000])) | ||
|
||
|
||
# Line plot | ||
|
||
lin_dat <- msd_pat %>% | ||
group_by(Class, group, visit_number) %>% | ||
summarise(meanab = mean(sump)) %>% | ||
mutate(Class = factor(Class, | ||
levels = cls_avg$Class, | ||
labels = cls_avg$Class)) %>% | ||
filter(Class %in% cls_hi) | ||
|
||
line_fig <- ggplot() + | ||
geom_line( | ||
data = lin_dat, linewidth = 2, | ||
aes( | ||
x = visit_number, y = meanab, group = group, | ||
colour = group | ||
) | ||
) + | ||
xlab("") + | ||
ylab(NULL) + | ||
labs( | ||
title = "Mean relative abundance by microbiome class, term and visit number", | ||
caption = "Multi-Omic Microbiome Study: Pregnancy Initiative (MOMS-PI) is part of the longitudinal Human Microbiome Project (HMP)", | ||
colour = "Term", | ||
x = "Visit number", | ||
y = "Mean abundance %" | ||
) + | ||
theme_bw() + | ||
theme( | ||
strip.text.x = element_text(size = 10), | ||
axis.title = element_text(size = 10), | ||
axis.text.y = element_text(size = 8), | ||
axis.text.x = element_text(size = 8, angle = 0, hjust = .5), | ||
legend.position = "bottom" | ||
) + | ||
scale_color_viridis_d(option = "viridis") + | ||
scale_fill_viridis_d(option = "viridis") + | ||
facet_wrap(. ~ Class) | ||
|
||
line_fig | ||
|
||
ggsave( | ||
plot = line_fig, | ||
filename = glue("{root_dir}/azTidyTuesday_{azTidyTuesday_date}_{tt_theme}_{author_initials}_{Sys.Date()}.jpg"), | ||
width = 16, | ||
height = 9) |
173 changes: 173 additions & 0 deletions
173
...s/2024-09-09-wonderful-wednesdays-march-2024/code/WW_Mar24_SteveMallett - Steve Mallett.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
library(readr) | ||
library(tidyr) | ||
library(tidyverse) | ||
library(ggplot2) | ||
library(cowplot) | ||
library(RColorBrewer) | ||
library(paletteer) | ||
library(colorspace) | ||
|
||
|
||
################################################ | ||
# Combine abundance, taxonomy and clinical data | ||
|
||
clin1 <- read_csv2("./Data/Metadata.csv") %>% | ||
arrange(subject_id, visit_number) | ||
|
||
tax <- read_csv2("./Data/Taxonomy_info.csv") | ||
|
||
abun1 <- read_csv2("./Data/Relative_abundance.csv") %>% | ||
gather(key = "sample_id", value = "Value", -taxID) %>% | ||
group_by(taxID, sample_id) %>% | ||
filter(Value>0) | ||
|
||
abun3 <- left_join(clin1, abun1, by = "sample_id") | ||
abun4 <- left_join(tax, abun3, by = "taxID") %>% | ||
arrange(subject_id, visit_number, taxID) | ||
|
||
################################################ | ||
# Identify most prevalent taxIDs and subset data | ||
|
||
num_gen <- 6 | ||
|
||
tax_prev <- abun4 %>% | ||
group_by(genus) %>% | ||
summarise(mn = mean(Value)) %>% | ||
arrange(desc(mn)) %>% | ||
select(genus) | ||
|
||
tax_prev2 <- tax_prev[1:num_gen,] %>% | ||
as_vector() | ||
|
||
abun5 <- abun4 %>% | ||
filter(genus %in% tax_prev2) | ||
|
||
################################################ | ||
# Groups subjects by most prevalent genus | ||
|
||
overall_most.prev<- abun5 %>% | ||
group_by(genus) %>% | ||
summarise(sum = sum(Value)) %>% | ||
arrange(desc(sum)) %>% | ||
select(genus) %>% | ||
mutate(sort.order = row_number()) | ||
|
||
################################################ | ||
# Create plots | ||
|
||
myColors <- brewer.pal(num_gen,"Set1") | ||
names(myColors) <- levels(overall_most.prev$genus) | ||
|
||
make.plot <- function(gp, vs) { | ||
|
||
tmp <- abun5 %>% | ||
filter(group == gp & visit_number == vs) %>% | ||
group_by(subject_id, genus) %>% | ||
summarise(sum = sum(Value)) %>% | ||
arrange(subject_id, desc(sum)) | ||
# select(subject_id, genus, Value) | ||
|
||
sub_most.prev <- tmp %>% | ||
group_by(subject_id, genus) %>% | ||
arrange(subject_id, desc(sum)) %>% | ||
group_by(subject_id) %>% | ||
slice(1) %>% | ||
select(subject_id, genus) | ||
|
||
sub_most.prev2 <- left_join(sub_most.prev, overall_most.prev, by = "genus") %>% | ||
ungroup() %>% | ||
select(subject_id, sort.order) | ||
|
||
tmp2 <- left_join(tmp, sub_most.prev2, by = "subject_id") | ||
|
||
subID <- tmp2 %>% | ||
group_by(sort.order, subject_id) %>% | ||
distinct(subject_id) %>% | ||
arrange(sort.order) %>% | ||
ungroup() %>% | ||
mutate(new.subjid = row_number()) %>% | ||
select(subject_id, new.subjid) | ||
|
||
tmp3 <- left_join(tmp2, subID, by = "subject_id") | ||
|
||
ggplot(tmp3, aes(fill=genus, y=sum, x=new.subjid)) + | ||
geom_bar(show.legend = TRUE, | ||
position="fill", stat="identity", width=1, color="black", linewidth=0.1) + | ||
coord_polar(theta="x") + | ||
scale_fill_manual(name = "genus", values = myColors) + | ||
scale_y_continuous(limits=c(-0.2, 1)) + | ||
theme( | ||
panel.background=element_rect(fill="white"), | ||
axis.text=element_blank(), | ||
axis.ticks=element_blank(), | ||
axis.title=element_blank(), | ||
legend.text=element_text(size=14), | ||
legend.title=element_text(size=14) | ||
) + | ||
annotate("text", label = str_c("Visit ", vs), x = 0, y = -0.2, color = "black", size=5) | ||
} | ||
|
||
p1 <- make.plot("Preterm delivery", 1) | ||
p2 <- make.plot("Preterm delivery", 2) | ||
p3 <- make.plot("Preterm delivery", 3) | ||
p4 <- make.plot("Preterm delivery", 4) | ||
p5 <- make.plot("Term delivery", 1) | ||
p6 <- make.plot("Term delivery", 2) | ||
p7 <- make.plot("Term delivery", 3) | ||
p8 <- make.plot("Term delivery", 4) | ||
|
||
p_ptd <- plot_grid( | ||
p1 + theme(legend.position="none"), | ||
p2 + theme(legend.position="none"), | ||
p3 + theme(legend.position="none"), | ||
p4 + theme(legend.position="none"), | ||
align = 'vh', | ||
hjust = -1, | ||
ncol = 4, | ||
nrow = 1 | ||
) | ||
|
||
p_td <- plot_grid( | ||
p5 + theme(legend.position="none"), | ||
p6 + theme(legend.position="none"), | ||
p7 + theme(legend.position="none"), | ||
p8 + theme(legend.position="none"), | ||
align = 'vh', | ||
labels = c(" ", " ", " ", " "), | ||
hjust = -1, | ||
ncol = 4, | ||
nrow = 1 | ||
) | ||
|
||
################################################ | ||
# Formatting and output to file | ||
|
||
title0 <- ggdraw() + | ||
draw_label("Longitudinal Human Microbiome Project (MOMS-PI Pre-Term Birth Study)\n", size = 20, color = "blue") | ||
|
||
title1 <- ggdraw() + | ||
draw_label("Pre-Term Delivery", size = 22) | ||
|
||
title2 <- ggdraw() + | ||
draw_label("Term Delivery", size = 22) | ||
|
||
legend <- get_legend( | ||
p2 + theme(legend.box.margin = margin(0, 0, 0, 12)) | ||
) | ||
|
||
cap <- "How to read this chart: Each wheel represents a set of samples from the Pre-Term or Term Delivery cohort at a particular visit. Each spoke in the wheel represents a single participant in the study, with coloured segments representing the relative abundance | ||
of bacteria types (genus) in the sample. For simplicity, only the most prevalent genus types and the first four visits are included. Approximately 75-80% of samples had Lactobacillus as the dominant genus, with no obvious difference between visits or cohorts." | ||
|
||
caption <- ggdraw() + | ||
draw_label(cap, size = 14) | ||
|
||
ptd2 <- plot_grid(title1, p_ptd, ncol=1, rel_heights = c(1.7, 25), align = "v") | ||
td2 <- plot_grid(title2, p_td, ncol=1, rel_heights = c(1.7, 25), align = "v") | ||
all1 <- plot_grid(ptd2, td2, ncol=1, rel_heights = c(1, 1), align = "v") | ||
all2 <- plot_grid(all1, legend, rel_widths = c(3, 0.5)) | ||
all3 <- plot_grid(title0, all2, caption, ncol=1, rel_heights = c(8, 100, 5), align = "v") + | ||
theme(plot.background = element_rect(fill = "white", colour = "blue")) | ||
|
||
ggsave("./WW_Mar24_01_OkabeIto.png", all3, width=24, height=12, dpi=300) | ||
|
||
################################################ |
Oops, something went wrong.