title | output | editor_options | ||||||
---|---|---|---|---|---|---|---|---|
Multilocus Genotype And Mycelial Compatability Group Assessment |
|
|
The purpose of this document is to assess the distribution of the MLGs and MCGs within the data. Specifically, we want to know if there are any MLGs that consistently coordinate with a single MCG, or if there are anything close.
library('igraph')
library('ggraph')
library('tidyverse')
library('poppr')
library('viridis')
library('visNetwork')
load(file.path(PROJHOME, "data", "sclerotinia_16_loci.rda"))
load(file.path(PROJHOME, "data", "mlg-crosspop-graph.rda"))
dat11
##
## This is a genclone object
## -------------------------
## Genotype information:
##
## 165 original multilocus genotypes
## 366 haploid individuals
## 11 codominant loci
##
## Population information:
##
## 5 strata - MCG, Region, Source, Year, Host
## 14 populations defined - NE, NY, MN, ..., France, Mexico, ND
g11o <- igraph::cluster_optimal(graph11loc$total)
comm <- igraph::communities(g11o)
names(comm) <- c("International", "Costal", "Midwest")
comm
## $International
## [1] "Australia" "France" "MN"
##
## $Costal
## [1] "CA" "NY" "OR" "WA"
##
## $Midwest
## [1] "CO" "MI" "ND" "NE"
strata(dat11) <- strata(dat11) %>%
mutate(MLGRegion = case_when(
.$Region %in% comm$International ~ "International",
.$Region %in% comm$Costal ~ "Costal",
.$Region %in% comm$Midwest ~ "Midwest",
TRUE ~ as.character(.$Region)
))
setPop(dat11) <- ~MLGRegion
pal <- unlist(map2(LETTERS[2:4], comm, function(a, b) setNames(viridis::viridis(length(b), option = a), b)))
pal <- c(pal, setNames(rep("#F6F6F6FF", 3), c("Mexico", "ID", "WI")))
strat <- bind_cols(strata(dat11),
other(dat11)$meta,
data_frame(MLG = mll(dat11, "original")))
First, it would be nice to visualize the MLGs across populations
mtab <- mlg.table(dat11, color = TRUE)
mplot <- last_plot()
mplot + scale_fill_viridis(discrete = TRUE, direction = -1, option = "C") +
aes(color = I("black"))
Now we can take a look at the concordance of MLGs to MCGs. We can do this by creating a contigency table. Of course, since we have 87 and well over 100 MLGs, this means that the contingency table is going to be big, so to summarize it further, I'm creating two tables, one based on MLGs that will count the number of MCGs within each MLG and vice-versa. Of course we lose information like, if an MCG contains several MLGs, how can we tell what the abundance is? A handy measure is Evenness, which scales from 0 to 1, indicating how skewed the observations are.
mll.custom(dat11) <- strata(dat11)$MCG
mcgmlg <- as.data.frame(table(mll(dat11, "original"), mll(dat11, "custom"))) %>%
setNames(c("MLG", "MCG", "Freq")) %>%
mutate(MLG = as.character(MLG)) %>%
mutate(MCG = as.character(MCG)) %>%
as_tibble() %>%
filter(Freq > 0)
mcgs <- mcgmlg %>%
arrange(MCG) %>%
group_by(MCG) %>%
mutate(MLGs = sum(Freq > 0),
Samples = sum(Freq),
Evenness = diversity_stats(Freq)["E.5"],
data = list(data_frame(MLG = MLG, Freq = Freq) %>% arrange(desc(Freq)))) %>%
arrange(desc(MLGs)) %>%
ungroup() %>%
distinct(MCG, MLGs, Samples, Evenness, data)
mlgs <- mcgmlg %>%
arrange(MLG) %>%
group_by(MLG) %>%
mutate(MCGs = sum(Freq > 0),
Samples = sum(Freq),
Evenness = diversity_stats(Freq)["E.5"],
data = list(data_frame(MCG = MCG, Freq = Freq) %>% arrange(desc(Freq)))) %>%
arrange(desc(Samples), desc(MCGs)) %>%
ungroup() %>%
distinct(MLG, MCGs, Samples, Evenness, data)
mcgs
## # A tibble: 87 x 5
## MCG MLGs Samples Evenness data
## <chr> <int> <int> <dbl> <list>
## 1 5 37 73 0.4811239 <tibble [37 x 2]>
## 2 44 19 36 0.6249418 <tibble [19 x 2]>
## 3 1 10 15 0.8217819 <tibble [10 x 2]>
## 4 2 9 10 0.9517005 <tibble [9 x 2]>
## 5 4 9 14 0.7242515 <tibble [9 x 2]>
## 6 53 9 9 1.0000000 <tibble [9 x 2]>
## 7 3 8 8 1.0000000 <tibble [8 x 2]>
## 8 9 8 15 0.5493741 <tibble [8 x 2]>
## 9 45 7 16 0.6304310 <tibble [7 x 2]>
## 10 16 6 7 0.9371824 <tibble [6 x 2]>
## # ... with 77 more rows
mlgs
## # A tibble: 165 x 5
## MLG MCGs Samples Evenness data
## <chr> <int> <int> <dbl> <list>
## 1 25 5 27 0.5490333 <tibble [5 x 2]>
## 2 163 2 15 0.9955736 <tibble [2 x 2]>
## 3 65 2 11 0.5560301 <tibble [2 x 2]>
## 4 140 3 10 0.9063854 <tibble [3 x 2]>
## 5 66 1 8 NaN <tibble [1 x 2]>
## 6 165 3 7 0.6693363 <tibble [3 x 2]>
## 7 78 4 6 0.8116548 <tibble [4 x 2]>
## 8 160 4 5 0.9218931 <tibble [4 x 2]>
## 9 104 2 5 0.7246677 <tibble [2 x 2]>
## 10 109 3 4 0.9115303 <tibble [3 x 2]>
## # ... with 155 more rows
These tables are a good start, but we want to have a publication-ready table.
fct2int <- function(x) as.integer(as.character(x))
psex_from_graph <- function(g, MLG){
weights <- setNames(E(g)$weight, E(g)$label)
weights <- split(weights, names(weights))
if (length(MLG) > 1){
weights <- weights[MLG]
res <- 1 - vapply(weights, unique, numeric(1))
} else {
res <- 1 - unique(weights[[MLG]])
}
res
}
# Paste region and count vectors
pasteRC <- function(R, C){
ord <- rev(order(C))
RC <- paste(R[ord], C[ord], sep = " (")
RC <- paste(RC, collapse = "), ")
paste0(RC, ")")
}
mlg_table <- mcgmlg %>%
filter(MLG %in% mlgs$MLG[1:5]) %>%
mutate(MLG = as.integer(MLG)) %>%
select(-Freq) %>%
inner_join(strat) %>%
group_by(MLG, MCG, Region) %>%
summarize(N = n()) %>%
ungroup() %>%
group_by(MLG, MCG) %>%
summarize(Region = pasteRC(Region, N), N = sum(N)) %>%
ungroup() %>%
mutate(mlmatch = match(MLG, mlgs$MLG[1:5])) %>%
arrange(mlmatch, desc(N), MCG) %>%
group_by(MLG) %>%
mutate(Psex = psex_from_graph(graph11loc$total, as.character(unique(MLG)))) %>%
ungroup() %>%
select(MLG, Psex, MCG, Region) %>%
mutate(Psex = ifelse(duplicated(MLG), "", format(Psex, scientific = FALSE, digits = 2))) %>%
mutate(MLG = ifelse(duplicated(MLG), "", as.character(MLG)))
## Joining, by = c("MLG", "MCG")
## Warning: Column `MCG` joining character vector and factor, coercing into
## character vector
mcg_table <- mcgmlg %>%
filter(MCG %in% mcgs$MCG[1:5]) %>%
mutate(MLG = as.integer(MLG)) %>%
select(-Freq) %>%
inner_join(strat) %>%
group_by(MLG, MCG, Region) %>%
summarize(N = n()) %>%
ungroup() %>%
group_by(MLG, MCG) %>%
summarize(Region = pasteRC(Region, N), N = sum(N)) %>%
ungroup() %>%
mutate(mlmatch = match(MCG, mcgs$MCG[1:5])) %>%
arrange(mlmatch, desc(N), MLG) %>%
select(MCG, MLG, Region) %>%
mutate(MCG = ifelse(duplicated(MCG), "", as.character(MCG))) %>%
mutate(MLG = as.character(MLG))
## Joining, by = c("MLG", "MCG")
## Warning: Column `MCG` joining character vector and factor, coercing into
## character vector
kable(mlg_table, caption = "Top 5 MLGs with associated MCGs and Regions (number of samples in parenthesis)")
MLG | Psex | MCG | Region |
---|---|---|---|
25 | 0.016824 | 5 | ND (15), CO (2), MI (2) |
13 | ND (3) | ||
60 | ND (2), WA (1) | ||
1 | NE (1) | ||
4 | MI (1) | ||
163 | 0.049932 | 45 | CO (5), ND (2), NE (1) |
5 | MI (7) | ||
65 | 0.000071 | 44 | NE (10) |
5 | MI (1) | ||
140 | 0.000155 | 8 | CO (5) |
5 | MI (3) | ||
20 | MI (2) | ||
66 | 0.000016 | 9 | NE (4), CO (2), MI (2) |
if (!dir.exists(file.path(PROJHOME, "results/tables"))) dir.create(file.path(PROJHOME, "results/tables"))
readr::write_csv(mlg_table, path = file.path(PROJHOME, "results/tables/mlg_table.csv"))
kable(mcg_table, caption = "Top 5 MCGs with associated MLGs and Regions (number of samples in parenthesis)")
MCG | MLG | Region |
---|---|---|
5 | 25 | ND (15), CO (2), MI (2) |
163 | MI (7) | |
15 | ND (1), CO (1), WA (1), MI (1) | |
39 | WA (2), ND (1), CO (1) | |
9 | ND (2), MI (1) | |
42 | OR (2), NY (1) | |
140 | MI (3) | |
7 | ND (1) | |
12 | MI (1) | |
16 | MI (1) | |
20 | MI (1) | |
21 | ND (1) | |
22 | CO (1) | |
23 | OR (1) | |
24 | OR (1) | |
26 | ND (1) | |
29 | MI (1) | |
35 | MI (1) | |
36 | WA (1) | |
37 | WI (1) | |
43 | WI (1) | |
45 | WA (1) | |
46 | OR (1) | |
65 | MI (1) | |
72 | WA (1) | |
75 | MI (1) | |
76 | MI (1) | |
83 | OR (1) | |
88 | MI (1) | |
90 | ND (1) | |
106 | CO (1) | |
109 | MI (1) | |
127 | WA (1) | |
130 | ND (1) | |
138 | MI (1) | |
146 | MI (1) | |
153 | MI (1) | |
44 | 65 | NE (10) |
56 | NE (3), CO (1) | |
68 | MN (3) | |
78 | ND (3) | |
75 | ND (2) | |
2 | MN (1) | |
3 | MN (1) | |
4 | MN (1) | |
52 | CO (1) | |
63 | NE (1) | |
70 | CO (1) | |
71 | CO (1) | |
76 | ND (1) | |
123 | MN (1) | |
141 | MN (1) | |
145 | MI (1) | |
156 | NE (1) | |
160 | NE (1) | |
161 | MI (1) | |
1 | 104 | NE (4) |
103 | NE (2) | |
128 | NE (2) | |
7 | NE (1) | |
17 | NE (1) | |
25 | NE (1) | |
47 | NE (1) | |
107 | France (1) | |
134 | NE (1) | |
153 | NE (1) | |
2 | 103 | WA (2) |
8 | WA (1) | |
12 | WA (1) | |
17 | OR (1) | |
18 | WA (1) | |
38 | WA (1) | |
69 | WA (1) | |
120 | WA (1) | |
125 | WA (1) | |
4 | 165 | MN (2), CO (1), MI (1), NE (1) |
160 | OR (2) | |
5 | OR (1) | |
25 | MI (1) | |
57 | CO (1) | |
63 | WA (1) | |
68 | MN (1) | |
77 | WA (1) | |
154 | ND (1) |
if (!dir.exists(file.path(PROJHOME, "results/tables"))) dir.create(file.path(PROJHOME, "results/tables"))
readr::write_csv(mcg_table, path = file.path(PROJHOME, "results/tables/mcg_table.csv"))
It might be better to visualize these data as barplots. Here we are mapping the type (MCG/Count) to color and the opacity (alpha) to Evenness.
mcg_graph <- mcgs %>%
filter(Samples > 1) %>%
gather(type, count, MLGs, Samples, -Evenness) %>%
arrange(desc(type), desc(count)) %>%
rename(Type = type) %>%
mutate(Type = ifelse(Type == "MLGs", "MLHs", Type)) %>%
mutate(MCG = forcats::fct_inorder(MCG, ordered = TRUE)) %>%
ggplot(aes(x = MCG, y = count, group = Type, fill = Type, alpha = Evenness)) +
geom_col(aes(width = ifelse(Type == "MLHs", 0.5, 0.85)), color = "black", position = "identity") +
annotate(geom = "rect", xmin = 0.5, xmax = 8.5, ymin = 0, ymax = 75, color = "black", size = 1.125, alpha = 0, lty = 2) +
# annotate(geom = "text", x = 18, y =35,
# label = sprintf("Mean Evenness: %.3f", mean(mcgs$Evenness, na.rm = TRUE))) +
scale_fill_manual(values = c("black", "white"), labels = c("MLHs", "Isolates")) +
scale_y_continuous(expand = c(0, 2)) +
# scale_alpha(breaks = c(0.5, 0.75, 1)) +
theme_minimal(base_size = 16, base_family = "Helvetica") +
# coord_flip() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(panel.grid.major.x = element_blank()) +
theme(panel.grid.major.y = element_line(color = "grey50")) +
theme(panel.grid.minor.y = element_line(color = "grey70")) +
theme(legend.box.just = "top") +
theme(legend.box.margin = unit(c(0, 6.55, 0, 0), "lines")) +
theme(legend.box.background = element_rect(fill = "white", color = "black")) +
theme(legend.box = "horizontal") +
theme(legend.position = c(0.77, 0.75)) +
theme(aspect.ratio = 1/2) +
labs(list(
alpha = "MLH\nEvenness"
))
## Warning: Ignoring unknown aesthetics: width
even <- mcgs %>%
filter(MCG %in% c(53, 9)) %>%
mutate(MCG = ifelse(MCG == 53, "Evenness = 1.00", "Evenness = 0.55")) %>%
unnest() %>%
ggplot(aes(x = forcats::fct_inorder(MLG), y = Freq, alpha = Evenness)) +
geom_col(color = "grey30", width = 0.75) +
theme_void() +
theme(strip.text = element_text(family = "Helvetica", color = "grey30", face = "bold")) +
theme(aspect.ratio = 0.66) +
theme(panel.spacing = unit(0, "line")) +
theme(legend.position = "none") +
# theme(plot.background = element_rect(fill = NA, colour = "black")) +
facet_wrap(~MCG, ncol = 1, scale = "free")
# vp1 <- grid::viewport(width = 0.25, height = 0.4, x = 1, y = 0.975, just = c("right", "top"))
# mcg_graph <- mcg_graph + theme(legend.position = c(.763, .75))
# print(mcg_graph)
# print(even, vp = vp1)
mcg_graph_inset <- cowplot::ggdraw() +
cowplot::draw_plot(mcg_graph, 0, 0, 1, 1) +
cowplot::draw_plot(even, 0.75, 0.5675, 0.25, 0.4)
mcg_graph_inset
if (!interactive()){
cowplot::ggsave(mcg_graph_inset,
filename = file.path(PROJHOME, "results/figures/publication/mcg-barplot.pdf"),
width = 10,
height = 5,
units = "in")
# pdf(file.path(PROJHOME, "results/figures/publication/Figure1Z.pdf"), width = 10, height = 5)
# print(mcg_graph)
# print(even, vp = vp1)
# dev.off()
}
mlgs %>%
gather(type, count, MCGs, Samples, -Evenness) %>%
arrange(desc(type), count) %>%
mutate(MLG = forcats::fct_inorder(MLG, ordered = TRUE)) %>%
ggplot(aes(x = MLG, y = count, group = type, fill = type)) +
geom_col(aes(width = ifelse(type == "MCGs", 0.5, 0.85), alpha = Evenness), color = "black", position = "identity") +
annotate(geom = "text", x = 20, y = 21, label = sprintf("Mean Evenness: %.3f", mean(mlgs$Evenness, na.rm = TRUE))) +
# scale_fill_viridis(end = 0.75, discrete = TRUE, direction = -1) +
scale_fill_manual(values = c("black", "white")) +
scale_y_continuous(expand = c(0, 2)) +
theme_minimal() +
theme(panel.grid.major.x = element_blank()) +
coord_flip()
## Warning: Ignoring unknown aesthetics: width
# theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
ggtitle("How Evenly are MCGs spread across Multilocus Genotypes (MLGs)?")
## $title
## [1] "How Evenly are MCGs spread across Multilocus Genotypes (MLGs)?"
##
## $subtitle
## NULL
##
## attr(,"class")
## [1] "labels"
I believe that making a graph to visualize this might help me understand what the h*ck is going on.
make_mcgmlg_graph <- function(x){
gdf <- mutate(x, MLG = paste0('MLG.', MLG))
MLGS <- gdf %>%
group_by(MLG) %>%
summarize(size = sum(Freq)) %>%
rename(vertex = MLG)
MCGS <- gdf %>%
group_by(MCG) %>%
summarize(size = sum(Freq)) %>%
rename(vertex = MCG)
VAT <- bind_rows(MLGS, MCGS)
g <- gdf %>%
select(MLG, MCG, Freq) %>%
rename(weight = Freq) %>%
graph_from_data_frame(vertices = VAT)
V(g)$type <- ifelse(grepl("MLG", V(g)$name), "Multilocus Genotype", "Mycelial Compatibility Group")
g
}
add_communities <- function(g, clusters){
comm <- communities(clusters)
commlist <- setNames(rep(names(comm), lengths(comm)), unlist(comm, use.names = FALSE))
V(g)$community <- commlist[V(g)$name]
g
}
g <- make_mcgmlg_graph(mcgmlg)
g <- add_communities(g, cluster_walktrap(g))
osize <- V(g)$size
Because I have more control over the size and feel of the graph, I'm going to use ggraph. Of course, since this IS a complicated data set, It's not going to be very pretty to look at, but I'm going to save it as supplementary materials because it's valuable to at least look this ugliness in the face and say, "Yeah, I guess it's not so simple after all."
V(g)$size <- sqrt(osize)/10
set.seed(2017-05-03)
# set.seed(2017-08-02)
lay2 <- create_layout(g, layout = "igraph",
algorithm = "fr",
weights = rep(1, length(E(g)$weight))
# weights = ifelse(E(g)$weight == 1, 1, 1 + E(g)$weight/100)
)
the_communities <- data_frame(vertex = lay2$name, community = lay2$community) %>%
mutate(comm = case_when(
.$community == 7 ~ "A",
.$community == 5 ~ "B",
.$community == 1 ~ "C",
TRUE ~ as.character(.$community)
)) %>%
group_by(community) %>%
mutate(comm = ifelse(n() > 10, paste("Community", comm), "Other Communities (n < 10)")) %>%
mutate(comm = comm) # It just so happens that the communities are in order by size.
lay2$community <- the_communities$comm
mcg_mlg_graph <- ggraph(lay2) +
geom_node_circle(aes(r = size, lty = type, fill = community)) +
geom_edge_link(aes(start_cap = circle(node1.size, unit = "native"),
end_cap = circle(node2.size, unit = "native"),
width = weight),
arrow = arrow(length = unit(0.005, "native"))) +
geom_node_text(aes(label = gsub("MLG.", "", name),
color = community,
size = size/10),
show.legend = FALSE) +
coord_fixed() +
scale_linetype_manual(values = c(1, 3)) +
# scale_alpha_manual(values = c(1, 0.4)) +
scale_edge_width(range = c(0.25, 1.5)) +
scale_color_manual(values = c("black", "black", "white", "white")) +
scale_fill_viridis(direction = -1, discrete = TRUE) +
theme_graph(base_size = 16, base_family = "Helvetica") +
theme(legend.position = "bottom", legend.direction = "vertical") +
theme(legend.box.spacing = unit(-1, "lines")) +
theme(plot.margin = unit(c(0, 0, 0, 0), "lines"))
# ggtitle("Relation of Multilocus Genotypes and MCGs")
mcg_mlg_graph
if (!interactive()){
ggsave(plot = mcg_mlg_graph, filename = file.path(PROJHOME, "results/figures/publication/full-graph.pdf"),
width = 88*3, height = 88*3.5, units = "mm")
tmp <- new.env()
assign("mcg_mlg_igraph", g, envir = tmp)
assign("mcg_mlg_gg", mcg_mlg_graph, envir = tmp)
V(tmp$mcg_mlg_igraph)$size <- osize
save(list = ls(envir = tmp),
envir = tmp,
file = file.path(PROJHOME, "data", "mcg-mlg-graph.rda"))
}
cat("Community Structure:\n")
## Community Structure:
cluster_walktrap(g)
## IGRAPH clustering walktrap, groups: 51, mod: 0.71
## + groups:
## $`1`
## [1] "MLG.124" "MLG.136" "MLG.144" "MLG.156" "MLG.55" "MLG.58"
## [7] "MLG.66" "66" "67" "68" "9"
##
## $`2`
## [1] "MLG.31" "MLG.4" "MLG.41" "10" "39" "83"
##
## $`3`
## [1] "MLG.113" "MLG.85" "32" "40" "80"
##
## + ... omitted several groups/vertices
Because showing this big, hairy graph can overload a reader, I'm creating an inset graph for later use.
mcg_mlg_graph2 <- ggraph(lay2) +
geom_edge_arc(#aes(width = weight),
start_cap = circle(1.1, "mm"),
end_cap = circle(1.1, "mm"),
curvature = 0.1
) + #arrow = arrow(length = unit(2, "mm"))) +
geom_node_point(aes(fill = type), pch = 21, size = 0.75) +
coord_fixed() +
scale_edge_width(range = c(0.25, 1.5)) +
scale_fill_manual(values = c("black", "white")) +
theme_graph(base_size = 16, base_family = "Helvetica") +
theme(legend.position = "none", legend.direction = "vertical") +
# theme(legend.box.spacing = unit(-1, "lines")) +
theme(plot.margin = unit(c(0, 0, 0, 0), "lines")) +
theme(plot.background = element_rect(fill = "grey98", color = "black"))
mcg_mlg_graph2
Of interest is accounting for the communities that have a single MLG within the community. This gives us a lower bound at how many MLGs are reproductively isolated. We detect these by filtering for communities that only have one MLG. But, because we do have communities with one MLG that are peripheral to larger communities, we can also check the degree of the MCG and MLG nodes to ensure that they correspond one-to-one.
singles <- the_communities %>%
mutate(MLG = grepl("MLG", vertex)) %>%
group_by(community) %>%
summarize(n = sum(MLG)) %>%
filter(n == 1) %>%
inner_join(the_communities, by = "community") %>%
mutate(type = case_when(grepl("MLG", vertex) ~ "MLG", TRUE ~ "MCG")) %>%
group_by(community, type) %>%
summarize(vertex = paste(as.character(vertex), collapse = ", ")) %>%
spread(type, vertex) %>%
mutate(MCG_degree = map_dbl(strsplit(MCG, ", "), . %>% degree(g, .) %>% sum())) %>%
mutate(MLG_degree = degree(g, MLG)) %>%
filter(MLG_degree - MCG_degree == 0)
# assessing that this truely contains the singletons
nrow(inner_join(singles, the_communities, by = c("MLG" = "vertex")))
## Warning: Column `MLG`/`vertex` joining character vector and factor,
## coercing into character vector
## [1] 15
singles
## # A tibble: 15 x 5
## # Groups: community [15]
## community MCG MLG MCG_degree MLG_degree
## <fctr> <chr> <chr> <dbl> <dbl>
## 1 35 26, 82 MLG.61 2 2
## 2 36 36, 37 MLG.114 2 2
## 3 39 14, 29 MLG.1 2 2
## 4 40 87 MLG.135 1 1
## 5 41 85 MLG.116 1 1
## 6 42 76 MLG.50 1 1
## 7 43 63 MLG.49 1 1
## 8 44 57 MLG.143 1 1
## 9 45 74 MLG.32 1 1
## 10 46 62 MLG.34 1 1
## 11 47 55 MLG.142 1 1
## 12 48 47 MLG.98 1 1
## 13 49 12 MLG.14 1 1
## 14 50 54 MLG.118 1 1
## 15 51 73 MLG.137 1 1
knitr::kable(singles)
community | MCG | MLG | MCG_degree | MLG_degree |
---|---|---|---|---|
35 | 26, 82 | MLG.61 | 2 | 2 |
36 | 36, 37 | MLG.114 | 2 | 2 |
39 | 14, 29 | MLG.1 | 2 | 2 |
40 | 87 | MLG.135 | 1 | 1 |
41 | 85 | MLG.116 | 1 | 1 |
42 | 76 | MLG.50 | 1 | 1 |
43 | 63 | MLG.49 | 1 | 1 |
44 | 57 | MLG.143 | 1 | 1 |
45 | 74 | MLG.32 | 1 | 1 |
46 | 62 | MLG.34 | 1 | 1 |
47 | 55 | MLG.142 | 1 | 1 |
48 | 47 | MLG.98 | 1 | 1 |
49 | 12 | MLG.14 | 1 | 1 |
50 | 54 | MLG.118 | 1 | 1 |
51 | 73 | MLG.137 | 1 | 1 |
This result indicates that there are 15 MLGs that are unable to pair with any other MLGs due to mycelial incompatibility. This represents 9.09% of the total MLGs.
mll(dat11) <- "original"
mlg.table(dat11, ~Region, color = TRUE, mlgsub = as.integer(gsub("MLG.", "", singles$MLG)))
## MLG.61 MLG.114 MLG.1 MLG.135 MLG.116 MLG.50 MLG.49 MLG.143
## OR 0 0 1 0 0 0 0 0
## WA 1 0 0 0 0 1 1 1
## Australia 0 0 0 0 1 0 0 0
## CA 1 0 1 0 0 0 0 0
## France 0 2 0 0 0 0 0 0
## Mexico 0 0 0 0 0 0 0 0
## ND 0 0 0 1 0 0 0 0
## MLG.32 MLG.34 MLG.142 MLG.98 MLG.14 MLG.118 MLG.137
## OR 0 0 0 0 1 0 0
## WA 1 1 1 0 0 1 2
## Australia 0 0 0 0 0 0 0
## CA 0 0 0 0 0 0 0
## France 0 0 0 0 0 0 0
## Mexico 0 0 0 2 0 0 0
## ND 0 0 0 0 0 0 0
p <- last_plot()
p +
scale_fill_brewer(palette = "Set1") +
ggtitle("Incompatible Multilocus Genotypes") +
labs(list(caption = "These MLGs cannot pair with any other MLG due to\nmycelial incompatibility"))
The opposite question is, how connected is this huge hairy mess? The community
analysis gave us an idea of where different centers lie, but it doesn't really
do much for us in actually giving us an idea of what the possabilities for
connection are. One way we can look at this is by using the ego()
function to
assess the neighborhood of vertices for a given node order, which is to say the
number of edges away from the node you must travel to visit all your neighbors.
Order 0 would give us the node itself, and order 1 would give us the degree plus
one. If we set this degree to a large number, we will find all the possible
connections.
Here I'm getting a simple measure by counting all the vertices, I'm subtracting two here because I want to remove the genotype and the MCG associated.
# getting the sizes for all genotypes.
sizes <- ego_size(g, 200, nodes = which(grepl("Genotype", V(g)$type)), mode = "all") - 2
table(sizes)
## sizes
## 0 1 3 13 186
## 12 7 5 11 130
Here we can see there are 12 genotypes that are only associated with themselves and their respective MCGs. This corresponds with the results we found above. Up next are the genotypes that are associated with another partner, which could represent either two MCGs to one genotype (there are three of these), or two genotypes to one MCG. By logical deduction, we can see that there are 5 of these left. The rest of the entries are not so easy to parse, but it's clear that there are 130 genotypes that all have the ability to reach each other through mutual mycelial compatibility groups.
To focus on strictly the MLGs, we can see a more clear picture. Here, we are
taking the results of ego()
and counting how many MLGs are in the vertex list.
From there, we can tabulate the results and divide the number of entries by the
category, giving us the number of communities in each category.
ncommunities <- ego(g, 200, nodes = which(grepl("Genotype", V(g)$type)), mode = "all") %>%
map_int(. %>% names %>% grepl("MLG", .) %>% sum) %>%
table() %>%
{./as.integer(names(.))}
ncommunities
## .
## 1 2 3 11 130
## 15 3 1 1 1
map_dbl(as.integer(names(ncommunities)), choose, 2)
## [1] 0 1 3 55 8385
From here, we can see that we have 15 MLGs that only interact with themselves, 3 pairs of MLGs, 1 triplet, 1 hendeclet, and 1 community with 130 members, of which there are 8385 possible combinations.
Of course, the 8385 possible combinations doesn't give us an idea of what the number of immediately possible combinations exist. We can examine that by setting the mode to 2.
in_reach <- ego(g, 2, nodes = which(grepl("Genotype", V(g)$type)), mode = "all") %>%
map_df(function(.x){
as_tibble(
list(
MLG = list(names(.x[grepl("MLG", names(.x))][-1])), # -1 to remove self
MCG = list(names(.x[!grepl("MLG", names(.x))])),
WHO = names(.x[1])
)
)
}) %>%
mutate(N = lengths(MLG))
in_reach_summary <- in_reach %>%
group_by(N) %>%
summarize(E = diversity_stats(as.integer(table(unlist(MCG))))["E.5"],
MCG = n_distinct(unlist(MCG)),
MLG = n())
knitr::kable(in_reach_summary)
N | E | MCG | MLG |
---|---|---|---|
0 | 1.0000000 | 18 | 15 |
1 | 0.9353124 | 10 | 9 |
2 | 0.9093024 | 13 | 16 |
3 | 0.8848082 | 10 | 10 |
4 | 1.0000000 | 4 | 2 |
5 | 0.8361178 | 7 | 15 |
6 | NaN | 1 | 2 |
7 | 0.7819388 | 7 | 12 |
8 | 0.7286386 | 10 | 14 |
9 | 0.9428738 | 7 | 4 |
10 | 1.0000000 | 7 | 3 |
11 | 0.9303060 | 5 | 3 |
12 | 1.0000000 | 6 | 3 |
14 | 1.0000000 | 2 | 1 |
16 | 1.0000000 | 2 | 2 |
17 | 1.0000000 | 3 | 1 |
18 | 0.6319993 | 4 | 8 |
19 | 0.8213964 | 5 | 3 |
24 | 0.8213191 | 5 | 4 |
36 | 0.3950058 | 10 | 21 |
37 | 0.9115303 | 3 | 2 |
38 | 1.0000000 | 2 | 1 |
39 | 0.9308473 | 4 | 3 |
41 | 0.9115303 | 3 | 2 |
43 | 0.8116548 | 4 | 3 |
44 | 1.0000000 | 2 | 1 |
47 | 1.0000000 | 3 | 1 |
52 | 1.0000000 | 2 | 2 |
53 | 1.0000000 | 5 | 1 |
57 | 1.0000000 | 3 | 1 |
in_reach_plot <- ggplot(in_reach_summary, aes(x = N, y = MLG, fill = E)) +
geom_segment(aes(yend = 0, xend = N)) +
geom_point(aes(size = MCG), pch = 21) +
# ggrepel::geom_label_repel(aes(label = MCG)) +
viridis::scale_fill_viridis(option = "D") +
theme_bw(base_size = 16, base_family = "Helvetica") +
theme(aspect.ratio = 1/1.75) +
theme(axis.text = element_text(color = "black")) +
scale_y_continuous(expand = c(0, 0.1), limits = c(0, 25)) +
scale_x_continuous(expand = c(0, 0.1), breaks = c((0:6)*10), limits = c(-2, 60)) +
theme(panel.grid.major = element_line(colour = "grey20")) +
theme(panel.grid.minor = element_line(colour = "grey50", linetype = 3)) +
theme(panel.grid.major.x = element_blank()) +
theme(panel.grid.minor.x = element_blank()) +
theme(panel.border = element_blank()) +
labs(list(fill = expression(E[5]),
size = "MCG"
# caption = "size and fill represent\nthe number and distribution\nof MCG"
)) +
xlab("Potential Heterothallic Pairings") +
ylab("Multilocus Haplotypes")
in_reach_plot
if (!interactive()){
save(in_reach, in_reach_summary, in_reach_plot, file = file.path(PROJHOME, "data/heterothallic-pairings.rda"))
}
The maximum here is 57, which represents MLG.75.
The visNetwork package wraps a java library that has very good network visualization routines. I can conver this graph to that.
make_rgb <- function(x, alpha = 1){
out <- col2rgb(x, alpha = TRUE)
out[4, ] <- floor(out[4, ]*alpha)
out <- apply(out, 2, paste, collapse = ", ")
paste0("rgba(", out, ")")
}
community_colors <- viridis::viridis(4, direction = -1)[as.integer(factor(the_communities$comm))]
vg <- g %>%
set_vertex_attr("size", value = osize * 10) %>%
set_vertex_attr("value", value = osize) %>%
set_vertex_attr("label", value = NULL) %>%
set_vertex_attr("color", value = community_colors) %>%
set_vertex_attr("shape", value = ifelse(V(.)$type == "Multilocus Genotype", "triangle", "dot")) %>%
set_edge_attr("width", value = E(.)$weight) %>%
toVisNetworkData()
vg$nodes <- vg$nodes %>%
dplyr::group_by(id) %>%
dplyr::mutate(color = list(list(background = make_rgb(color, 0.8),
border = make_rgb(rep("black", n()), 0.8),
highlight = list(background = make_rgb(color),
border = make_rgb(rep("black", n()))
)
))) %>%
dplyr::mutate(title = paste0("<p>",
"<i>", type, " ", gsub("MLG.", "", id), "</i><br>",
"<b>Community: ", community, "</b><br>",
"<b>N: ", size/10, "</b><br>",
"</p>")
)
vg$edges <- vg$edges %>%
dplyr::mutate(title = paste0("<p>",
"<b>Mycelial Compatibility Group: ", from, "</b><br>",
"<b>Multilocus Genotype: ", gsub("MLG.", "", to), "</b><br>",
"<b>N: ", weight, "</b>",
"</p>")
)
vgn <- visNetwork(nodes = vg$nodes, edges = vg$edges,# height = "500px",
main = "Relation of Multilocus Genotypes and MCGs")
set.seed(2017-05-03)
vgn %>%
# visIgraphLayout("layout_nicely") %>% # activate this for a poseable network
visNodes(label = NULL, shadow = TRUE) %>%
visOptions(highlightNearest = list(enabled = TRUE, hover = TRUE),
nodesIdSelection = TRUE)
So that's a big hairy graph. What happens when we look at a subgraph of the top 8 MCGs? Note, that these are all MCGs that have > 10 isolates associated.
top5 <- filter(mcgmlg, as.character(MCG) %in% (mcgs %>% arrange(desc(Samples)) %>% slice(1:8) %>% pull(MCG)))
top5g <- make_mcgmlg_graph(top5)
tosize <- V(top5g)$size
V(top5g)$size <- sqrt(tosize)/10
# set.seed(2017-05-08)
set.seed(2017-08-1)
top5lay <- create_layout(top5g, layout = "igraph", algorithm = "fr", weights = rep(1, length(E(top5g))))
top5lay$community <- inner_join(data_frame(vertex = top5lay$name), the_communities)$comm
## Joining, by = "vertex"
## Warning: Column `vertex` joining factors with different levels, coercing to
## character vector
levels(top5lay$type) <- c("Multilocus Haplotype", "Mycelial Compatibility\nGroup")
t5g <- ggraph(top5lay) +
geom_node_circle(aes(r = size, fill = type)) +
geom_edge_link(aes(start_cap = circle(node1.size, unit = "native"),
end_cap = circle(node2.size, unit = "native"),
width = weight),
arrow = arrow(length = unit(0.01, "native"))) +
geom_node_text(aes(label = ifelse(size > 0.1, gsub("MLG.", "", name), ""),
color = type,
size = size/10),
show.legend = FALSE) +
coord_fixed() +
scale_edge_width(range = c(0.25, 1.5)) +
scale_fill_manual(values = c("black", "white")) +
scale_color_manual(values = c("white", "black")) +
theme_graph(base_size = 16, base_family = "Helvetica") +
# theme(legend.position = c(0.85, 0.35), legend.direction = "vertical") +
theme(legend.position = c(0.5, 0), legend.direction = "horizontal", legend.box = "horizontal") +
theme(legend.box.spacing = unit(-1, "lines")) +
theme(legend.spacing.x = unit(0, "lines")) +
# theme(legend.box.background = element_rect(color = "black")) +
theme(legend.text = element_text(size = 10)) +
theme(plot.margin = unit(c(0, 0, 0, 0), "lines"))
t5g$labels$edge_width <- "Shared Isolates"
t5g$labels$fill <- ""
t5g
if (!interactive())
ggsave(t5g, file = file.path("results", "figures", "publication", "mcg-mlg-subgraph.pdf"), width = 88, height = 88, scale = 3, units = "mm")
Vey nice!
greyground <- theme(plot.background = element_rect(color = "black", fill = "grey98"))
mcg_theme <- theme(legend.text = element_text(size = 10)) +
theme(legend.title = element_text(size = 10)) +
theme(axis.text.x = element_blank()) +
# theme(legend.background = element_blank()) +
theme(legend.box.background = element_rect(fill = "grey98")) +
theme(legend.key.size = unit(0.75, "line")) +
theme(legend.box.just = "left") +
theme(legend.position = c(0.9, 0.5)) +
theme(legend.box = "vertical") +
theme(legend.box.margin = unit(c(0, 0, 0, 0), "lines")) +
theme(legend.spacing = unit(0, "lines")) +
theme(aspect.ratio = 0.45) +
greyground
mcg_subgraph_with_context <- cowplot::ggdraw(xlim = c(0, 2), ylim = c(0, 2.25)) +
cowplot::draw_plot(t5g, 0, -0.125, 2, 2) +
cowplot::draw_plot(mcg_graph + scale_alpha(breaks = c(0.5, 0.75, 1)) + mcg_theme, 0, 1.45, 1.35, 1) +
cowplot::draw_plot(mcg_mlg_graph2 + greyground, x = 1.355, y = 1.633, width = 0.6345, height = 0.6345) +
cowplot::draw_plot_label(c("A", "B", "C"), x = c(0, 1.35, 0), y = c(2.25, 2.25, 1.65), size = 25)
mcg_subgraph_with_context
if (!interactive()){
cowplot::ggsave(plot = mcg_subgraph_with_context,
filename = file.path(PROJHOME, "results", "figures", "publication", "mcg-subgraph-with-context.pdf"),
width = 8,
height = 10)
}
There are a whole boatload of MLGs for those 8 MCGs. What are the severity ratings for those? Again, we take take a look at these AND simultaneously query the top 5 MLGs for this.
count_group <- . %>%
mutate(nobs = n()) %>% # count the number of samples/MCG
ungroup() %>%
arrange(desc(nobs)) # arrange by number of samples and reorder factors
Severity <- filter(strat, MCG %in% mcgs$MCG[1:8]) %>%
group_by(MCG) %>%
count_group %>%
mutate(MCG = forcats::fct_inorder(factor(MCG), ordered = TRUE)) %>%
mutate(MLG = ifelse(MLG %in% mlgs$MLG[1:5], paste("MLH", MLG), "Other"))
severity_plot <- ggplot(Severity, aes(x = MCG, y = Severity)) +
geom_point(aes(fill = MLG),
position = position_jitter(width = 0.2),
alpha = 0.75,
pch = 21) +
scale_fill_manual(values = c(viridis(5, direction = -1), NA)) +
theme_bw(base_size = 16, base_family = "Helvetica") +
theme(legend.position = "bottom") +
theme(aspect.ratio = 0.5) +
facet_wrap(~Region, nrow = 3) +
ylim(c(1, 9)) +
labs(list(
y = "Aggressiveness",
#title = "Aggressiveness by MCG and Region",
fill = "Multilocus Haplotype"
#subtitle = "Five most abundant multilocus haplotypes shown"
)) +
theme(legend.position = c(0.75, 0.1)) +
theme(legend.title.align = 0.5) +
guides(fill = guide_legend(nrow = 2))
severity_plot
if (!interactive())
ggsave(severity_plot, filename = "results/figures/publication/RMM-aggressiveness.pdf", width = 183, height = 183*(4.5/7), unit = "mm")
On 2017-07-27, Sydney came up with the idea to assess MCG accumulation with increasing genetic distance similar to the genotype accumulation curve. First, let's see what the curve would look like if we had one MLG/MCG.
First, we need to set up everything to calculate this quantity
bd <- bruvo.dist(dat11, replen = other(dat11)$REPLEN)
thresholds <- filter_stats(dat11, distance = bd, plot = TRUE)
thresh <- c(0, unique(thresholds$farthest$THRESHOLDS) + .Machine$double.eps)
collapsed <- map(thresh, . %>% mlg.filter(dat11, threshold = ., distance = bd))
# This function will count the number of original MLGs within each collapsed MLG
# Note that "original" here can be any MLG source.
compare_mlg <- function(collapsed, thresh, original){
as_tibble(list(MLG = collapsed, y = original, threshold = thresh)) %>%
group_by(MLG, threshold) %>%
summarize(N = n_distinct(y)) %>%
ungroup() %>%
mutate(NMLG = n())
}
# A custom function for stat summary that returns a single point when there is
# only a single observation
my_meanse <- function (x, mult = 1) {
x <- stats::na.omit(x)
se <- mult * sqrt(stats::var(x)/length(x))
mean <- mean(x)
if (length(x) == 1) return(data.frame(y = mean, ymin = mean, ymax = mean))
data.frame(y = mean, ymin = mean - se, ymax = mean + se)
}
Now, as a demontration, we can see what the curve would look like if all MCGs were randmly distributed (e.g. not clumpy) among the MLGs
map2_df(collapsed, thresh, compare_mlg, original = sample(87, 366, replace = TRUE)) %>%
ggplot(aes(x = threshold, y = N)) +
geom_point(alpha = 0.1) +
stat_smooth(se = FALSE, color = "black") +
stat_summary(pch = 21, fun.data = my_meanse, fill = "white")
## `geom_smooth()` using method = 'gam'
Now we can see what it looks like when there is a 1:1 relationship of MCGs to MLGs:
map2_df(collapsed, thresh, compare_mlg, original = mll(dat11, "original")) %>%
ggplot(aes(x = threshold, y = N)) +
geom_point(alpha = 0.1) +
stat_smooth(se = FALSE, color = "black") +
stat_summary(pch = 21, fun.data = my_meanse, fill = "white")
## `geom_smooth()` using method = 'gam'
Yet another comparison would be if each sample were a unique MCG:
map2_df(collapsed, thresh, compare_mlg, original = seq(366)) %>%
ggplot(aes(x = threshold, y = N)) +
geom_point(alpha = 0.1) +
stat_smooth(se = FALSE, color = "black") +
stat_summary(pch = 21, fun.data = my_meanse, fill = "white")
## `geom_smooth()` using method = 'gam'
And it's worth examining what happens if there are only 10 MCGs:
map2_df(collapsed, thresh, compare_mlg, original = sample(10, 366, replace = TRUE)) %>%
ggplot(aes(x = threshold, y = N)) +
geom_point(alpha = 0.1) +
stat_smooth(se = FALSE, color = "black") +
stat_summary(pch = 21, fun.data = my_meanse, fill = "white")
## `geom_smooth()` using method = 'gam'
And, of course, we should see what our observed data looks like:
observed <- map2_df(collapsed, thresh, compare_mlg, original = mll(dat11, "custom")) %>%
mutate(sample_number = "observed") %>%
mutate(source = "observed")
# Revisiting random dist
map2_df(collapsed, thresh, compare_mlg, original = sample(87, 366, replace = TRUE)) %>%
ggplot(aes(x = threshold, y = N)) +
geom_point(alpha = 0.1) +
stat_smooth(se = FALSE, color = "black") +
scale_y_continuous(breaks = c(25, 50, 75)) +
stat_summary(pch = 21, fun.data = my_meanse, fill = "white")
## `geom_smooth()` using method = 'gam'
observed %>%
ggplot(aes(x = threshold, y = N)) +
geom_point(alpha = 0.1) +
stat_smooth(se = FALSE, color = "black") +
scale_y_continuous(breaks = c(25, 50, 75)) +
stat_summary(pch = 21, fun.data = my_meanse, fill = "white")
## `geom_smooth()` using method = 'gam'
set.seed(5000)
random <- map_df(1:20, function(x) map2_df(collapsed, thresh, compare_mlg, original = sample(mll(dat11, "custom"))), .id = "sample_number") %>%
mutate(sample_number = sprintf("%3d", as.integer(sample_number))) %>%
mutate(source = "random")
ggplot(random, aes(x = threshold, y = N, group = sample_number, color = source, fill = source)) +
# geom_point(alpha = 0.01, color = "grey50") +
stat_summary(alpha = 0.125, fun.data = my_meanse) +
# geom_point(alpha = 0.5, pch = 21, data = observed, fill = "white") +
stat_smooth(se = FALSE, data = observed) +
stat_summary(pch = 21, fun.data = my_meanse, data = observed) +
scale_color_manual(values = c("black", "grey50")) +
scale_fill_manual(values = c("white", "grey50")) +
theme_bw(base_size = 16, base_family = "Helvetica") +
theme(aspect.ratio = 1/2) +
theme(axis.text = element_text(color = "black")) +
theme(panel.grid.major = element_line(colour = "grey20")) +
theme(panel.grid.minor = element_line(colour = "grey50", linetype = 3)) +
theme(panel.grid.major.x = element_blank()) +
theme(panel.grid.minor.x = element_blank()) +
theme(panel.border = element_blank()) +
theme(legend.position = "top") +
ylab("Number of MCG/MLG") +
xlab("Bruvo's Distance")
## `geom_smooth()` using method = 'gam'
The communities defined above are defined by the connections between different MCGs. There is still the question of how cohesive these communities are. I believe one way to investigate this is to add the communities assignments back to the data and assess how well they split up on a dendrogram created with genetic distance.
strat_mcg <- left_join(strat, the_communities, by = c("MCG" = "vertex"))
## Warning: Column `MCG`/`vertex` joining factors with different levels,
## coercing to character vector
datdist <- bruvo.dist(dat11, replen = other(dat)$REPLEN)
datree <- phangorn::upgma(datdist)
Now we can visualize that trees.
ape::plot.phylo(ape::ladderize(datree), type = "fan", no.margin = FALSE, tip.col = viridis(4, direction = -1)[as.integer(factor(strat_mcg$comm))])
legend(-0.015, 0.11, legend = levels(factor(strat_mcg$comm)), fill = viridis(4, direction = -1), bty = "n")
title("MCG-based assignment")
ape::plot.phylo(ape::ladderize(datree), type = "fan", no.margin = FALSE, tip.col = plasma(366)[rank(strat$Severity, ties.method = "first")])
legend(-0.015, 0.125, legend = quantile(strat$Severity), fill = plasma(5), bty = "n")
title("Severity")
From what I can see, it appears that there's not much of a tight correlation of genetic structure and MCG. There is the clear cluster of MLG 66 in Community C, but there are other isolates in distant parts of the tree. I'm wondering what happens if we average the distances between groups.
datmat <- as.matrix(datdist)
distmcg <- strat_mcg %>%
group_by(comm) %>%
summarize(dist = list(datmat[Isolate, Isolate]), N = n())
isolist <- strat_mcg %>%
group_by(comm) %>%
filter(!duplicated(MLG)) %>%
summarize(Isolates = list(Isolate)) %>%
inner_join(expand(., comm, comm)) %>%
rowwise() %>%
mutate(COMMS = paste(sort(c(comm, comm1)), collapse = " ")) %>%
ungroup() %>%
filter(!duplicated(COMMS))
## Joining, by = "comm"
isolist <- isolist %>%
filter(comm == comm1) %>%
select(-comm, -COMMS) %>%
rename(Isolates2 = Isolates) %>%
inner_join(isolist)
## Joining, by = "comm1"
distmcg <- isolist %>%
group_by(comm, comm1, COMMS) %>%
summarize(dist = list(datmat[Isolates[[1]], Isolates2[[1]]]),
N = length(Isolates[[1]])) %>%
rowwise() %>%
mutate(dist = case_when(isSymmetric(dist) ~ list(dist[lower.tri(dist)]),
TRUE ~ list(as.numeric(dist))))
distmcg %>%
rowwise() %>%
mutate(mean = mean(dist),#mean(ifelse(dist == 0, NA, dist), na.rm = TRUE),
median = median(dist)) %>%#median(ifelse(dist == 0, NA, dist), na.rm = TRUE)) %>%
ungroup() %>%
select(COMMS, mean, median, N) %>%
rename(Comparison = COMMS) %>%
knitr::kable()
Comparison | mean | median | N |
---|---|---|---|
Community A Community A | 0.3938339 | 0.4204102 | 59 |
Community A Community B | 0.4570305 | 0.4770508 | 59 |
Community A Community C | 0.4640803 | 0.4857511 | 59 |
Community A Other Communities (n < 10) | 0.4359960 | 0.4487083 | 59 |
Community B Community B | 0.3998535 | 0.4279119 | 29 |
Community B Community C | 0.4211572 | 0.4655540 | 29 |
Community B Other Communities (n < 10) | 0.4662095 | 0.4872159 | 29 |
Community C Community C | 0.4430856 | 0.5492276 | 9 |
Community C Other Communities (n < 10) | 0.4870535 | 0.5195091 | 9 |
Other Communities (n < 10) Other Communities (n < 10) | 0.4536293 | 0.4687472 | 103 |
distplot <- distmcg %>%
mutate(mean = mean(dist)) %>%
mutate(COMMS = ifelse(comm == comm1, paste0(comm, "\n"), paste0(comm, "/\n", comm1))) %>%
mutate(comm = gsub(" \\(", "\n(", comm)) %>%
mutate(comm1 = gsub(" \\(", "\n(", comm1)) %>%
mutate(comparison = ifelse(comm == comm1, "Within", "Among")) %>%
ungroup() %>%
unnest()
distplot %>%
group_by(comparison) %>%
summarize(mean = mean(dist), median = median(dist))
## # A tibble: 2 x 3
## comparison mean median
## <chr> <dbl> <dbl>
## 1 Among 0.4507717 0.4708141
## 2 Within 0.4368156 0.4545455
if (require("ggridges")){
ggplot(distplot, aes(x = dist, y = COMMS, group = COMMS, height = ..density..)) +
geom_density_ridges(aes(lty = comparison, fill = mean)) +
theme_ridges() +
theme(axis.text.y = element_text(hjust = 0)) +
viridis::scale_fill_viridis(option = "A", limits = quantile(distplot$dist, c(0.25, 0.75)))
}
## Loading required package: ggridges
## Picking joint bandwidth of 0.039
ggplot(distplot, aes(x = dist, group = COMMS)) +
geom_density(aes(fill = mean, lty = comparison)) +
ggtitle("Bruvo's distance by MCG community") +
xlab("Bruvo's distance") +
facet_grid(comm1~comm) +
theme_minimal(base_size = 16, base_family = "Helvetica") +
theme(aspect.ratio = 1) +
theme(panel.grid.major.y = element_blank()) +
theme(panel.background = element_rect(fill = "grey95")) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
viridis::scale_fill_viridis(option = "A", limits = quantile(distplot$dist, c(0.25, 0.75)))
commamova <- dat11 %>%
addStrata(data.frame(Community = strat_mcg$comm)) %>%
clonecorrect(~Community) %>%
poppr.amova(~Community, dist = as.dist(datmat[indNames(.), indNames(.)]),
method = "pegas", nperm = 1000)
##
## No missing values detected.
## Warning in is.euclid(xdist): Zero distance(s)
commamova
##
## Analysis of Molecular Variance
##
## Call: pegas::amova(formula = hier, data = hierdf, nperm = nperm, is.squared = FALSE)
##
## SSD MSD df
## Community 2.408421 0.8028068 3
## Error 41.926571 0.2139111 196
## Total 44.334991 0.2227889 199
##
## Variance components:
## sigma2 P.value
## Community 0.01414 0
## Error 0.21391
##
## Variance coefficients:
## a
## 41.64667
This is quite distant. For reference, a distance of 0.32 is on average 7 steps.
Session Information
## Session info --------------------------------------------------------------------------------------
## setting value
## version R version 3.4.2 (2017-09-28)
## system x86_64, linux-gnu
## ui X11
## language (EN)
## collate en_US.UTF-8
## tz UTC
## date 2018-04-12
## Packages ------------------------------------------------------------------------------------------
## package * version date source
## ade4 * 1.7-8 2017-08-09 cran (@1.7-8)
## adegenet * 2.1.0 2017-10-12 cran (@2.1.0)
## ape 5.0 2017-10-30 cran (@5.0)
## assertr 2.0.2.2 2017-06-06 cran (@2.0.2.2)
## assertthat 0.2.0 2017-04-11 CRAN (R 3.4.2)
## base * 3.4.2 2018-03-01 local
## bindr 0.1 2016-11-13 CRAN (R 3.4.2)
## bindrcpp * 0.2 2017-06-17 CRAN (R 3.4.2)
## boot 1.3-20 2017-07-30 cran (@1.3-20)
## broom 0.4.3 2017-11-20 CRAN (R 3.4.2)
## cellranger 1.1.0 2016-07-27 CRAN (R 3.4.2)
## cli 1.0.0 2017-11-05 CRAN (R 3.4.2)
## cluster 2.0.6 2017-03-16 CRAN (R 3.4.2)
## coda 0.19-1 2016-12-08 cran (@0.19-1)
## codetools 0.2-15 2016-10-05 CRAN (R 3.4.2)
## colorspace 1.3-2 2016-12-14 CRAN (R 3.4.2)
## compiler 3.4.2 2018-03-01 local
## cowplot 0.9.1 2017-11-16 cran (@0.9.1)
## crayon 1.3.4 2017-09-16 CRAN (R 3.4.2)
## datasets * 3.4.2 2018-03-01 local
## deldir 0.1-14 2017-04-22 cran (@0.1-14)
## devtools 1.13.4 2017-11-09 CRAN (R 3.4.2)
## digest 0.6.12 2017-01-27 CRAN (R 3.4.2)
## dplyr * 0.7.4 2017-09-28 CRAN (R 3.4.2)
## evaluate 0.10.1 2017-06-24 CRAN (R 3.4.2)
## expm 0.999-2 2017-03-29 cran (@0.999-2)
## ezknitr 0.6 2016-09-16 cran (@0.6)
## fastmatch 1.1-0 2017-01-28 cran (@1.1-0)
## forcats * 0.2.0 2017-01-23 CRAN (R 3.4.2)
## foreign 0.8-69 2017-06-21 CRAN (R 3.4.2)
## gdata 2.18.0 2017-06-06 cran (@2.18.0)
## ggcompoplot * 0.1.0 2018-04-09 Github (zkamvar/ggcompoplot@bcf007d)
## ggforce 0.1.1 2016-11-28 cran (@0.1.1)
## ggplot2 * 2.2.1 2016-12-30 CRAN (R 3.4.2)
## ggraph * 1.0.0 2017-02-24 cran (@1.0.0)
## ggrepel 0.7.0 2017-09-29 cran (@0.7.0)
## ggridges * 0.4.1 2017-09-15 cran (@0.4.1)
## glue 1.2.0 2017-10-29 CRAN (R 3.4.2)
## gmodels 2.16.2 2015-07-22 cran (@2.16.2)
## graphics * 3.4.2 2018-03-01 local
## grDevices * 3.4.2 2018-03-01 local
## grid 3.4.2 2018-03-01 local
## gridExtra 2.3 2017-09-09 CRAN (R 3.4.2)
## gtable 0.2.0 2016-02-26 CRAN (R 3.4.2)
## gtools 3.5.0 2015-05-29 cran (@3.5.0)
## haven 1.1.0 2017-07-09 CRAN (R 3.4.2)
## highr 0.6 2016-05-09 CRAN (R 3.4.2)
## hms 0.4.0 2017-11-23 CRAN (R 3.4.2)
## htmltools 0.3.6 2017-04-28 CRAN (R 3.4.2)
## htmlwidgets 0.9 2017-07-10 CRAN (R 3.4.2)
## httpuv 1.3.5 2017-07-04 CRAN (R 3.4.2)
## httr 1.3.1 2017-08-20 CRAN (R 3.4.2)
## igraph * 1.1.2 2017-07-21 CRAN (R 3.4.2)
## jsonlite 1.5 2017-06-01 CRAN (R 3.4.2)
## knitr * 1.17 2017-08-10 CRAN (R 3.4.2)
## labeling 0.3 2014-08-23 CRAN (R 3.4.2)
## lattice 0.20-35 2017-03-25 CRAN (R 3.4.2)
## lazyeval 0.2.1 2017-10-29 CRAN (R 3.4.2)
## LearnBayes 2.15 2014-05-29 cran (@2.15)
## lubridate 1.7.1 2017-11-03 CRAN (R 3.4.2)
## magrittr 1.5 2014-11-22 CRAN (R 3.4.2)
## MASS 7.3-47 2017-04-21 CRAN (R 3.4.2)
## Matrix 1.2-12 2017-11-16 CRAN (R 3.4.2)
## memoise 1.1.0 2017-04-21 CRAN (R 3.4.2)
## methods * 3.4.2 2018-03-01 local
## mgcv 1.8-22 2017-09-19 CRAN (R 3.4.2)
## mime 0.5 2016-07-07 CRAN (R 3.4.2)
## mnormt 1.5-5 2016-10-15 CRAN (R 3.4.2)
## modelr 0.1.1 2017-07-24 CRAN (R 3.4.2)
## munsell 0.4.3 2016-02-13 CRAN (R 3.4.2)
## nlme 3.1-131 2017-02-06 CRAN (R 3.4.2)
## parallel 3.4.2 2018-03-01 local
## pegas 0.10 2017-05-03 cran (@0.10)
## permute 0.9-4 2016-09-09 cran (@0.9-4)
## phangorn 2.3.1 2017-11-01 cran (@2.3.1)
## pkgconfig 2.0.1 2017-03-21 CRAN (R 3.4.2)
## plyr 1.8.4 2016-06-08 CRAN (R 3.4.2)
## poppr * 2.5.0 2017-09-11 cran (@2.5.0)
## psych 1.7.8 2017-09-09 CRAN (R 3.4.2)
## purrr * 0.2.4 2017-10-18 CRAN (R 3.4.2)
## quadprog 1.5-5 2013-04-17 cran (@1.5-5)
## R.methodsS3 1.7.1 2016-02-16 cran (@1.7.1)
## R.oo 1.21.0 2016-11-01 cran (@1.21.0)
## R.utils 2.6.0 2017-11-05 cran (@2.6.0)
## R6 2.2.2 2017-06-17 CRAN (R 3.4.2)
## RColorBrewer 1.1-2 2014-12-07 CRAN (R 3.4.2)
## Rcpp 0.12.14 2017-11-23 CRAN (R 3.4.2)
## readr * 1.1.1 2017-05-16 CRAN (R 3.4.2)
## readxl 1.0.0 2017-04-18 CRAN (R 3.4.2)
## reshape2 1.4.2 2016-10-22 CRAN (R 3.4.2)
## rlang 0.1.4 2017-11-05 CRAN (R 3.4.2)
## rstudioapi 0.7 2017-09-07 CRAN (R 3.4.2)
## rvest 0.3.2 2016-06-17 CRAN (R 3.4.2)
## scales 0.5.0 2017-08-24 CRAN (R 3.4.2)
## seqinr 3.4-5 2017-08-01 cran (@3.4-5)
## shiny 1.0.5 2017-08-23 CRAN (R 3.4.2)
## sp 1.2-5 2017-06-29 CRAN (R 3.4.2)
## spData 0.2.6.7 2017-11-28 cran (@0.2.6.7)
## spdep 0.7-4 2017-11-22 cran (@0.7-4)
## splines 3.4.2 2018-03-01 local
## stats * 3.4.2 2018-03-01 local
## stringi 1.1.6 2017-11-17 CRAN (R 3.4.2)
## stringr * 1.2.0 2017-02-18 CRAN (R 3.4.2)
## tibble * 1.3.4 2017-08-22 CRAN (R 3.4.2)
## tidyr * 0.7.2 2017-10-16 CRAN (R 3.4.2)
## tidyselect 0.2.3 2017-11-06 CRAN (R 3.4.2)
## tidyverse * 1.2.1 2017-11-14 CRAN (R 3.4.2)
## tools 3.4.2 2018-03-01 local
## tweenr 0.1.5 2016-10-10 cran (@0.1.5)
## udunits2 0.13 2016-11-17 cran (@0.13)
## units 0.4-6 2017-08-27 cran (@0.4-6)
## utils * 3.4.2 2018-03-01 local
## vegan 2.4-4 2017-08-24 cran (@2.4-4)
## viridis * 0.4.0 2017-03-27 CRAN (R 3.4.2)
## viridisLite * 0.2.0 2017-03-24 CRAN (R 3.4.2)
## visNetwork * 2.0.1 2017-07-30 cran (@2.0.1)
## withr 2.1.0 2017-11-01 CRAN (R 3.4.2)
## xml2 1.1.1 2017-01-24 CRAN (R 3.4.2)
## xtable 1.8-2 2016-02-05 CRAN (R 3.4.2)