-
Notifications
You must be signed in to change notification settings - Fork 0
/
pca_analysis.R
102 lines (85 loc) · 3.48 KB
/
pca_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
library(easypackages)
libraries("adegenet","RColorBrewer","parallel","RColorBrewer", "ggplot2")
###########################
# import and convert data #
###########################
locality.info <- read.csv("sample_summary.csv",stringsAsFactors = T)
head(locality.info)
total.vcf <- vcfR::read.vcfR("sirtalis.vcf")
# check that all samples in vcf and sample info are represented
all(colnames(total.vcf@gt)[-1]== locality.info$sample)
total.gl <- vcfR::vcfR2genlight(total.vcf)
ploidy(total.gl) <- 2
pop(total.gl) <- locality.info$population
z.vcf <- vcfR::read.vcfR("sirtalis_z.vcf")
# check that all samples in vcf and sample info are represented
all(colnames(z.vcf@gt)[-1]== locality.info$sample)
z.gl <- vcfR::vcfR2genlight(z.vcf)
ploidy(z.gl) <- 2
pop(z.gl) <- locality.info$population
autosomes.vcf <- vcfR::read.vcfR("sirtalis_auto.vcf")
# check that all samples in vcf and sample info are represented
all(colnames(autosomes.vcf@gt)[-1]== locality.info$sample)
autosomes.gl <- vcfR::vcfR2genlight(autosomes.vcf)
ploidy(autosomes.gl) <- 2
pop(autosomes.gl) <- locality.info$population
# div colors "#2C7BB6", "#FDAE61", "#D7191C", "#ABD9E9"
# K = 4 pop order E C S W
# div.colors <- c("#2C7BB6", "#FDAE61", "#D7191C", "#ABD9E9")
# colors incorrectly assigned in figure
div.colors <- c("#FDAE61", "#2C7BB6", "#D7191C", "#ABD9E9")
#############
# TOTAL PCA #
#############
total.pca <- glPca(total.gl, n.cores = 8, nf = 3, parallel = TRUE)
total.pca.scores <- as.data.frame(total.pca$scores)
total.pca.scores$Population <- pop(total.gl)
set.seed(9)
p <- ggplot(total.pca.scores, aes(x=PC1, y=PC2, colour=Population))
p <- p + geom_point(size=2)
p <- p + stat_ellipse(level = 0.95, size = 1)
p <- p + scale_color_manual(values = div.colors)
p <- p + geom_hline(yintercept = 0)
p <- p + geom_vline(xintercept = 0)
p <- p + theme_bw()
p <- p + theme(axis.title = element_text(size = 15))
p <- p + theme(legend.title = element_text(size = 15))
p <- p + theme(legend.text = element_text(size = 12))
p <- p + xlab("PC1 (12.11%)") + ylab("PC2 (9.48%)")
p
####################
# Z CHROMOSOME PCA #
####################
z.pca <- glPca(z.gl, n.cores = 8, nf = 3, parallel = TRUE)
z.pca.scores <- as.data.frame(z.pca$scores)
z.pca.scores$Population <- pop(z.gl)
p <- ggplot(z.pca.scores, aes(x=PC1, y=PC2, colour=Population))
p <- p + geom_point(size=2)
p <- p + stat_ellipse(level = 0.95, size = 1)
p <- p + scale_color_manual(values = div.colors)
p <- p + geom_hline(yintercept = 0)
p <- p + geom_vline(xintercept = 0)
p <- p + theme_bw()
p <- p + theme(axis.title = element_text(size = 15))
p <- p + theme(legend.title = element_text(size = 15))
p <- p + theme(legend.text = element_text(size = 12))
p <- p + xlab("PC1 (1.86%)") + ylab("PC2 (0.97%)")
p
################
# AUTOSOME PCA #
################
autosomes.pca <- glPca(autosomes.gl, n.cores = 8, nf = 3, parallel = TRUE)
autosomes.pca.scores <- as.data.frame(autosomes.pca$scores)
autosomes.pca.scores$Population <- pop(autosomes.gl)
p <- ggplot(autosomes.pca.scores, aes(x=PC1, y=PC2, colour=Population))
p <- p + geom_point(size=2)
p <- p + stat_ellipse(level = 0.95, size = 1)
p <- p + scale_color_manual(values = div.colors)
p <- p + geom_hline(yintercept = 0)
p <- p + geom_vline(xintercept = 0)
p <- p + theme_bw()
p <- p + theme(axis.title = element_text(size = 15))
p <- p + theme(legend.title = element_text(size = 15))
p <- p + theme(legend.text = element_text(size = 12))
p <- p + xlab("PC1 (9.34%)") + ylab("PC2 (8.11%)")
p