forked from CandiceChuDVM/RNA-Seq
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_edger.R
142 lines (119 loc) · 4.67 KB
/
run_edger.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
library(edgeR)
# From: http://stackoverflow.com/questions/1815606/rscript-determine-path-of-the-executing-script
# So we can copy this script to where the analysis output is saved
thisFile <- function() {
cmdArgs <- commandArgs(trailingOnly = FALSE)
needle <- "--file="
match <- grep(needle, cmdArgs)
if (length(match) > 0) {
# Rscript
return(normalizePath(sub(needle, "", cmdArgs[match])))
} else {
# 'source'd via R console
return(normalizePath(sys.frames()[[1]]$ofile))
}
}
#args = c("lists/organoid","keys/organoid.csv")
#args = c("ahr-list","ahr-key")
args = c("lists/candice_list","keys/candice-key.csv")
# Get counts file from analysis/fname/fname.T.csv
bname = basename(args[1])
fname = paste(bname,"-count.T.csv",sep='')
dat = read.csv(file.path("analysis",bname,fname), header = TRUE, row.names=1)
# Filter low count reads
keep = rowSums(cpm(dat) > 3) >= 3
counts = dat[keep,]
## Read in key file
key = read.csv(args[2], header=TRUE, row.names=1)
#############################################
## Create model comparison matrix with sample key
#############################################
# Lampe Biopsy main
sel = grepl("MT-.*", rownames(counts)) + grepl("ERCC-.*", rownames(counts)) + grepl("mt-.*", rownames(counts))
# We have to filter out 6123 because its not full rank
# > summary(factor(sapply(names(counts), function(x) substr(x,2,5))))
# 6102 6103 6105 6106 6108 6110 6121 6122 6123 6127
# 8 8 8 8 8 8 8 8 6 8
counts = counts[!sel,]
# counts = counts[!sel,!grepl("6123", names(counts))]
# key = key[!grepl("6123", rownames(key)),]
ename = "edger-pair-treatment2"
factors = key[order(rownames(key)), c("type", "treatment")]
# factors$idnum = factor(factors$idnum)
# factors$treatment = relevel(factors$treatment, "placebo")
design = model.matrix(~type+treatment, data=factors)
groups = factors$treatment
#groups = factor(paste(key$type,key$treatment,sep='.'))
#############################################
system(paste("mkdir -p ",file.path("analysis",bname,ename)))
file.copy(thisFile(), file.path("analysis", bname, ename, "edger_script.R"))
counts = counts[,order(names(counts))]
########################
# run Pairwise analysis ...
########################
y = DGEList(counts=counts, group=groups)
y = calcNormFactors(y)
y = estimateCommonDisp(y)
y = estimateTagwiseDisp(y)
########################
# or run GLM analysis
########################
#y = DGEList(counts=counts)
#y = calcNormFactors(y)
#y = estimateGLMCommonDisp(y, design)
#y = estimateGLMTrendedDisp(y, design)
#y = estimateGLMTagwiseDisp(y, design)
#fit = glmFit(y, design)
## get normalized counts for each group for outputting to summary spreadsheet
scaled.counts = data.frame(mapply(`*`, counts, y$samples$lib.size *
y$samples$norm.factors/mean(y$samples$lib.size)))
rownames(scaled.counts) = rownames(counts)
dfs = split.data.frame(t(scaled.counts), groups)
dfss = sapply(dfs, colMeans)
#group_names = levels(groups)
#group_names_means = sapply(group_names, function(x) paste("mean_",x,sep=""), USE.NAMES=FALSE)
#colnames(dfss) = group_names_means
#### Write results
run_analysis = function(outfile, contrast=NULL, coef=NULL) {
# Pairwise test
lrt = exactTest(y)
# GLM Test
# lrt = glmLRT(fit, contrast=contrast, coef=coef)
ot1 = topTags(lrt,n=nrow(counts),sort.by="PValue")$table
#if (is.null(contrast)) {
#sel = which(as.logical(contrast))
#ot1 = merge(ot1, dfss[,sel], by=0)
#} else {
#ot1 = merge(ot1, dfss, by=0)
#}
ot1 = merge(ot1, dfss, by=0)
ot1 = ot1[order(ot1$FDR),] # Sort by ascending FDR
write.csv(ot1,outfile,row.names=FALSE)
if (!is.null(lrt$table$logFC)){
detags = rownames(ot1)[ot1$FDR < 0.05]
png(paste(outfile,".png",sep=""))
plotSmear(lrt, de.tags=detags)
abline(h=c(-2,2),col="blue")
dev.off()
}
sink(file=file.path(dirname(outfile), "summary.txt"), append=TRUE)
print(outfile)
print(summary(decideTestsDGE(lrt, p=0.05, adjust="BH")))
sink(NULL)
#print(cpm(y)[detags,])
#print(summary(decideTestsDGE(lrt, p=0.05, adjust="none")))
}
meta_run = function(coef) {run_analysis(file.path("analysis",bname,ename,paste(colnames(design)[coef],".csv",sep="")),coef=coef)}
meta_run(dim(design)[2])
#meta_run(dim(design)[2]-1)
#meta_run(dim(design)[2]-2)
system(paste("cat",file.path("analysis",bname,ename,"summary.txt")))
# Pairwise test
# run_analysis(file.path("analysis",bname,paste(ename,".csv",sep="")))
## output MA, MDS, etc.., plots
png(file.path("analysis",bname,ename,"edger-mds.png"))
p = plotMDS(y)
dev.off()
png(file.path("analysis",bname,ename,"edger-bcv.png"))
p = plotBCV(y)
dev.off()