-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathenrichment.R
128 lines (105 loc) · 4.56 KB
/
enrichment.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# Zinka 2020_06_16
# Functional enrichemnt analysis
#
# Inputs:
# - T. pseudonana GO annotations file (thaps_GO.txt)
# - Table of WGCNA network summary (WGCNA_network.csv)
# - Table of differentially expressed genes at each time point (DE.csv)
#
# Outputs:
# - Summary of enriched terms for the module or DE time comparison being analyzed
# (for topGO enrichment: GO_enrich_summary, for KEGG enrichment: KEGG_enrich_summary)
# # Use Bioconductor to install topGO, clusterProfiler, and enrichplot with BiocManager
# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install(version = "3.15")
# #install packages
# BiocManager::install("topGO")
# BiocManager::install("clusterProfiler")
# BiocManager::install("enrichplot")
library(topGO)
library(ggplot2)
library(plyr)
library(dplyr)
library(tidyverse)
library(clusterProfiler)
library(enrichplot)
setwd('~/Desktop/Thaps_Catl')
################################################################
# Importing files:
################################################################
##### Importing T. pseudonana GO annotations from a txt file:
thaps_GO <- readMappings(file = '~/Desktop/Thaps_Catl/thaps_GO.txt')
geneNames <- names(thaps_GO) #Setting the gene universe genes
##### Importing table of WGCNA network:
WGCNA <- read.csv('~/Desktop/Thaps_Catl/WGCNA_network.csv')
WGCNA = as.data.frame(WGCNA)
##### Importing table of differentially expresed genes:
DE <- read.csv('~/Desktop/Thaps_Catl/DE.csv')
################################################################
# Selecting the module or time comparison for enrichment analysis
################################################################
#Specify the WGCNA module or DE time comparison to do enrichemnt analysis on.
#Un-comment the DE section to do enrichemnt on differentally expressed genes.
##### For enrichment of WGCNA modules:
#Set module to look at (all possible modules: 1,5,6,7,9,11,17,21,23,33,34,35)
module = "9"
#Define list of genes that belongs to that module:
MyInterestingGenes = WGCNA$Gene_ID [ WGCNA$ModuleLabel == module]
# ##### For enrichment of DE genes at each time point, the 'modules' can be defined here.
# #Set the timepoint to look at (all possible timepoints: t24, t72, t120)
# module = "t24"
# #Define list of genes that belongs to DE timepoint:
# if (module == "t24") {
# MyInterestingGenes <- DE$t24_geneID
# }
# if (module == "t72") {
# MyInterestingGenes <- DE$t72_geneID
# }
# if (module == "t120") {
# MyInterestingGenes <- DE$t120_geneID
# }
################################################################
# Functional enrichemnt with topGO
################################################################
#Creating a geneList object that is a named factor indicatig which genes are interesting and which are not.
geneList <- factor(as.integer(geneNames %in% MyInterestingGenes))
names(geneList) <- geneNames
str(geneList)
#Building the topGOdata object:
ontology = "BP" #Select what type of GO enrichemnt I want to do (BP, MF, CC)
GOdata <- new("topGOdata",
ontology = ontology,
allGenes = geneList,
annot = annFUN.gene2GO,
gene2GO = thaps_GO,
nodeSize = 5)
GOdata
#Classic Fisher's exact test:
test.stat <- new("classicCount", testStatistic = GOFisherTest, name = "Fisher test")
resultFisher <- getSigGroups(GOdata, test.stat)
resultFisher
#Summary table:
GO_enrich_summary <- GenTable(GOdata,
classicFisher = resultFisher,
orderBy = "classicFisher",
ranksOf = "classicFisher",
topNodes = length(resultFisher@score),
numChar = 1000)
################################################################
# Functional enrichemnt with KEGG
################################################################
KEGG_enrich <- enrichKEGG(gene = MyInterestingGenes,
organism = 'tps',
pvalueCutoff = 0.05,
qvalueCutoff = 0.1)
cnetplot(KEGG_enrich, categorySize = "pvalue") #Visualize enriched term and associated genes.
# Create summary:
KEGG_enrich_summary = subset(KEGG_enrich@result , KEGG_enrich@result$qvalue <= 0.1)
################################################################
# Output files:
################################################################
out.file <- paste0("Module_",module, "_GO_enrich_summary.csv")
write.csv(GO_enrich_summary, file=out.file)
out.file <- paste0("Module_",module, "_KEGG_enrich_summary.csv")
write.csv(KEGG_enrich_summary, file=out.file)