-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathanalyse_GO_KEGG_富集分析
222 lines (169 loc) · 8.53 KB
/
analyse_GO_KEGG_富集分析
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# GO及KEGG富集分析
#### 安装包 #####
# install.packages("BiocManager")
# BiocManager::install(version = "3.13")
# BiocManager::install("gprofiler2")
# BiocManager::install("clusterProfiler")
# BiocManager::install("AnnotationHub")
# BiocManager::install("org.Bt.eg.db") ##方法1:安装牛注释数据包
#############
library(AnnotationDbi)
library(AnnotationHub)
## 羊
# hub = AnnotationHub()
# query<-query(hub, "org.Ovis_aries.eg.sqlite")
# sheep.db <- hub[['AH85595']]
setwd("D:/生信/sheep.db")
sheep.db <- AnnotationDbi::loadDb('org.Ovis_aries.eg.sqlite')
columns(sheep.db)
## 牛
setwd("D:/生信/cattle.db")
bos.db <- AnnotationDbi::loadDb('org.Bt.eg.db.sqlite')## 方法2:下载到本地加载,每次使用上传
columns(bos.db)
#### 读取基因列表文件中的基因名称,上下调分开做 ######
library("org.Bt.eg.db")
library(clusterProfiler)
setwd("D:/桌面/DP-STH/AS_M/富集结果")
#GO富集分析,上下调分开做
genes <- read.table('GENEID.txt', header = T, stringsAsFactors = FALSE,sep = '\t')
#转id
covID <- bitr(genes$GeneID, fromType = "SYMBOL",
toType = "ENTREZID", OrgDb = sheep.db, drop = T)
#对于加载的注释库的使用,以上述为例,就直接在 OrgDb 中指定人(org.Hs.eg.db)或绵羊(sheep)
enrich.go <- enrichGO(gene = genes$GeneID, #基因列表文件中的基因名称
OrgDb = sheep.db, #指定物种的基因数据库
keyType = 'SYMBOL', #指定给定的基因名称类型,例如这里以 SYMBOL 为例
ont = 'ALL', #可选 BP、MF、CC,也可以指定 ALL 同时计算 3 者
pAdjustMethod = 'BH', #指定 p 值校正方法
pvalueCutoff = 1, #指定 p 值阈值,不显著的值将不显示在结果中
qvalueCutoff = 1 #指定 q 值阈值,不显著的值将不显示在结果中
)
write.table(enrich.go@result,'GO-AS-2.txt',sep = '\t',quote = F)
# 例如上述指定 ALL 同时计算 BP、MF、CC,这里将它们作个拆分后输出
BP <- enrich.go[enrich.go$ONTOLOGY=='BP', ]
CC <- enrich.go[enrich.go$ONTOLOGY=='CC', ]
MF <- enrich.go[enrich.go$ONTOLOGY=='MF', ]
write.table(as.data.frame(BP), 'go.BP.txt', sep = '\t', row.names = FALSE, quote = FALSE)
write.table(as.data.frame(CC), 'go.CC.txt', sep = '\t', row.names = FALSE, quote = FALSE)
write.table(as.data.frame(MF), 'go.MF.txt', sep = '\t', row.names = FALSE, quote = FALSE)
# kegg, 上下调分开做
library(clusterProfiler)
genes <- read.table('txt', header = T, stringsAsFactors = FALSE,sep = '\t')
covID <- bitr(genes$V1, fromType = "SYMBOL",
toType="ENTREZID",OrgDb= bos.db , drop = TRUE) # 转换为entrezID,KEGG只识别这种ID
write.table(covID,'ENTREZID-up.txt',sep = '\t',quote = F) # 写出去保存
#每次打开R计算时,它会自动连接kegg官网获得最近的物种注释信息,因此数据库一定都是最新的
search_kegg_organism("sheep", by="common_name") # 寻找对应动物的organism
R.utils::setOption("clusterProfiler.download.method",'auto')
enrich_kegg <- enrichKEGG(gene = covID$ENTREZID, # 基因列表文件中的基因名
organism = 'oas', # 指定物种,牛为bta,羊为oas
keyType = 'kegg', # kegg富集
pAdjustMethod = 'BH', # 指定矫正方法
pvalueCutoff = 1, # 指定p值阈值,不显著的值将不显示在结果中
qvalueCutoff = 1) # 指定q值阈值,不显著的值将不显示在结果中
#输出结果
write.table(enrich_kegg@result,'kegg-AS.txt',quote = F,sep = '\t')
# 富集气泡图
library(ggplot2)
dotplot(enrich.go, # GO富集分析结果
split="ONTOLOGY", showCategory = 15, #展示前20个点,默认为10个
x = "GeneRatio", # 横坐标,默认GeneRation,也可以为Count
color = "p.adjust", #右纵坐标,默认p.adjust,也可以为pvalue和qvalue
size = NULL, #点的大小
title = "kegg_dotplot" ) + #设置图片的标题
facet_grid(ONTOLOGY~., scale="free")
dotplot(enrich_kegg,
showCategory = 15,#展示前15个点,默认为10个
x = "GeneRatio", # 横坐标,默认GeneRation,也可以为Count
y = "Description",
color = 'pvalue',
title = "kegg_dotplot")
# 条形图
barplot(enrich_kegg,
showCategory = 15,#展示前15个点,默认为10个
x = "GeneRatio", # 横坐标,默认GeneRation,也可以为Count
color = 'pvalue',
label_format = 100, # 设置标签长度
title = "kegg_dotplot")
barplot(enrich.go, split="ONTOLOGY",
showCategory = 15,#展示前15个点,默认为10个
x = "GeneRatio", # 横坐标,默认GeneRation,也可以为Count
color = 'pvalue',
label_format = 100, # 设置标签长度
title = "GO-term")+
facet_grid(ONTOLOGY~., scale="free")
###### up及down基因的合并处理 #####
gene_down <- read.table('up_gene.txt', header = T, stringsAsFactors = FALSE,sep = '\t')
gene_up <- read.table('down_gene.txt', header = T, stringsAsFactors = FALSE,sep = '\t')
# 转基因ID
library("org.Bt.eg.db")
library(clusterProfiler)
deg.up.gene <- bitr(gene_up$x, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = "org.Bt.eg.db")$ENTREZID
deg.down.gene <- bitr(gene_down$x, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = "org.Bt.eg.db")$ENTREZID
# 将up 和 down基因合并到一个表格
deg.ei.ls <- list(up = deg.up.gene, down = deg.down.gene)
# GO
GO.cmp <- compareCluster(deg.ei.ls,
fun = "enrichGO",
qvalueCutoff = 1,
pvalueCutoff = 1,
pAdjustMethod = 'BH',
ont = 'ALL', #可选 BP、MF、CC,也可以指定 ALL 同时计算 3 者
OrgDb = "org.Bt.eg.db")
write.table(GO.cmp@compareClusterResult,'GO-up-down-1.txt',sep = '\t',quote = F)
# 条形图
library(ggplot2)
library(ggsci)
ddf<-read.table('GO-AS.txt',header = T,sep = '\t')
dat=ddf
dat$Cluster = ifelse(dat$Cluster =='up',1,-1)
dat$pvalue = -log10(dat$pvalue)
dat$pvalue <- dat$pvalue*dat$Cluster
dat=dat[order(dat$pvalue,decreasing = F),]
ggplot(dat, aes(x=reorder(Description,order(pvalue, decreasing = F)),
y=pvalue,fill=Cluster)) +
geom_bar(stat="identity",width = 0.78,position = position_dodge(0.7)) +
scale_fill_gradient(low="#3685af",high="#af4236",guide = FALSE)+
scale_x_discrete(name ="GO Terms") +
#scale_y_discrete(labels =Description )
scale_y_continuous(name ="-log10Pvalue",expand = c(0,0)) +
coord_flip() + # 翻转坐标
theme(panel.grid.major.y = element_blank(),panel.grid.minor.y = element_blank())+ #删除纵向网络
theme(axis.text.y = element_blank())+ #刻度标签空白
theme(axis.ticks = element_blank())+ #刻度为空白
theme(panel.border = element_blank())+ #边框为空白
theme(axis.text=element_text(face = "bold",size = 15),
axis.title = element_text(face = 'bold',size = 15), #坐标轴字体
plot.title = element_text(size = 20,hjust = 0.3),
legend.position = "top",panel.grid = element_line(colour = 'white'))+
geom_text(aes(label=Description),size=3.5,hjust=ifelse(dat$pvalue>0,1.5,-0.5))+ # 两侧加上标签文字
ggtitle("The most enriched GO-BP terms") # 添加标题
# KEGG, 上下调一起做
####### install.packages("R.utils")
R.utils::setOption("clusterProfiler.download.method",'auto')
kegg.cmp <- compareCluster(deg.ei.ls,
fun = "enrichKEGG",
qvalueCutoff = 1,
pvalueCutoff = 1,
pAdjustMethod = 'BH',
organism = 'bta')
write.table(kegg.cmp@compareClusterResult,'kegg-up-down-pq.1.txt',sep = '\t',quote = F)
### cnetplot: Gene-Concept Network
#构建含log2FC信息的genelist
BiocManager::install("cnetplot")
deg <- read.table("diffexpr_padj_results.txt",header = T)
go <- read.table("GO-up-down.txt",header = T,sep = '\t' )
genelist <- as.numeric(deg[,2])
names(genelist) <- row.names(deg)
library(patchwork)
library("cnetplot")
cnetp1 <- cnetplot(go, foldChange = genelist,
showCategory = 6,
colorEdge = T,
node_label = 'all',
color_category ='steelblue')
cnetp2 <- cnetplot(go, foldChange = genelist,
showCategory = 6,
node_label = 'gene',
circular = T,
colorEdge = T)