-
Notifications
You must be signed in to change notification settings - Fork 1
/
gen-unmapped-report.Rscript
151 lines (129 loc) · 4.73 KB
/
gen-unmapped-report.Rscript
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env Rscript
options(error = quote({
dump.frames(to.file=TRUE, dumpto="Rcoredump")
load("Rcoredump.rda")
print(Rcoredump)
q()
}))
require(genoPlotR)
help = paste(
"gen-unmapped-report.Rscript Generate a kmer GWAS report for unmapped kmers",
"Daniel Wilson (2022)",
"",
"Usage: Rscript gen-unmapped-report.Rscript prefix anatype k refname ref_gb maf alignident mincount srcdir outdir logdir",
sep="\n")
# Read options from command line
args = commandArgs(trailingOnly = TRUE)
if(length(args)!=11) {
cat(help,sep="\n")
stop("\nIncorrect usage\n")
}
PREFIX = args[1]
ANATYPE = args[2]
K = args[3]
REFNAME = args[4]
REF_GB = args[5]
MAF = args[6]
ALIGNIDENT = args[7]
MINCOUNT = args[8]
SRC = args[9]
PWD = args[10]
LOGDIR = args[11]
FIGDIR = paste0(ANATYPE,"kmer",K,"_kmergenealign_figures/")
MACORMAF = ifelse(MAF==0,NULL,ifelse(MAF<1,"maf","mac"))
# Functions
# From https://stackoverflow.com/questions/3245862/format-numbers-to-significant-figures-nicely-in-r
s3 = function(vec, digits=3) gsub("\\.$", "", formatC(signif(vec,digits=digits), digits=digits, format="fg", flag="#"))
laststr = function(s) {x=unlist(strsplit(s," ")); x[length(x)]}
`%+%` = paste0
`%-%` = function(a, b) paste(a, b, sep=" ")
`%/%` = function(a, b) paste(a, b, sep="\n")
newline = "\n"
grepit = function(filename,regex,gz=FALSE) {
cmd = "grep"
if(gz) cmd = "zgrep"
tryCatch({
s = readLines((PIPE=pipe(paste0(cmd," '",regex,"' ",filename))))
suppressWarnings(close(PIPE))
s = unlist(strsplit(s," "))
return(s[length(s)])
},error=function(e) NA)
}
nth = function(n) paste0(n, ifelse((n %% 100)>10 & (n %% 100)<20, "th", c("th","st","nd","rd",rep("th",96))[1 + (n %% 10)]))
# Preliminaries
setwd(PWD)
# MAF vs MAC text
is.maf = ifelse(MAF==0,TRUE,ifelse(MAF<1,TRUE,FALSE))
is.maf.text = ifelse(is.maf,"minor allele frequency (MAF) threshold of" %-% MAF,"minor allele count (MAC) threshold of" %-% MAF)
is.maf.text2 = ifelse(is.maf,"minor allele frequency (MAF)","minor allele count (MAC)")
is.maf.text3 = ifelse(is.maf,"MAF","MAC")
html.head =
"<!DOCTYPE html>" %/%
"<html>" %/%
"<head>" %/%
" <title>Kmer GWAS report: unmapped kmers</title>" %/%
" <link rel='stylesheet' href='report.css'>" %/%
newline
html.body =
"</head>" %/%
"<body>" %/%
" <h1>Kmer GWAS report: unmapped kmers</h1>" %/%
" <div><p class='timestamp'><code>Prefix:" %-% PREFIX %+% "; KmerType:" %-% ANATYPE %+% "; K:" %/%
" " %+% K %+% "; ReferenceGenome:" %-% REFNAME %+% ";" %-% is.maf.text3 %+% ":" %-% MAF %+% "; MinCount:" %/%
" " %+% MINCOUNT %+% "; AlignIdent:" %-% ALIGNIDENT %+% "; ReportTimeStamp:" %/%
" " %+% date() %+% ".</code></p></div>" %/%
newline
html.foot =
"<script src='report.js'></script>" %/%
"</body>" %/%
"</html>" %/%
""
# Gene-specific input/output files
# To be written to PWD (overwriting if necessary)
outfile.prefix = paste0(PREFIX,"_",ANATYPE,K,".")
outfile.html = paste0(outfile.prefix,"report_unmapped.html")
filename.unmapped = paste0(FIGDIR,PREFIX,"_",ANATYPE,K,"_",REFNAME,"_",MACORMAF,"_",MAF,"_alignIdent_",ALIGNIDENT,"_alignPosMinCount_",MINCOUNT,"_unaligned_kmersandpvals.txt")
filename.plotManhattan.log = paste0(LOGDIR,"/plotManhattan.log")
# Unmapped kmers
table.unmapped = read.delim(filename.unmapped)
max.signif.unmapped = max(table.unmapped$negLog10)
thr.signif = as.numeric(grepit(filename.plotManhattan.log,'Bonferroni threshold:'))
html.body = html.body %/%
" <p>Not all kmers were aligned to the user-supplied reference genome. The strongest" %/%
" significance among the unmapped kmers was" %-% s3(max.signif.unmapped) %+% ","
if(max.signif.unmapped>=thr.signif) {
html.body = html.body %/%
" which was genome-wide significant.</p>" %/%
"" %/%
newline
# Filter the significant unmapped kmers
gd = table.unmapped$negLog10>=thr.signif
tb = data.frame('kmer'=table.unmapped$kmer[gd], 'Signif'=s3(table.unmapped$negLog10[gd]), 'beta'=s3(table.unmapped$beta[gd]), 'MAC'=table.unmapped$mac[gd])
html.body = html.body %/%
" <h2>Significant unmapped kmers</h2>" %/%
" <div class='divkmertab'>" %/%
" <table class='kmertab center'>" %/%
" <tr>" %/%
paste0(" <th>", paste0(colnames(tb), collapse="</th><th>"), "</th>") %/%
" </tr>"
for(j in 1:nrow(tb)) {
html.body = html.body %/%
" <tr>" %/%
paste0(" <td>", paste0(tb[j,], collapse="</td><td>"), "</tr>") %/%
" </tr>"
}
html.body = html.body %/%
" </table>" %/%
" </div>" %/%
newline
} else {
html.body = html.body %/%
" which was not genome-wide significant.</p>" %/%
newline
}
html.body = html.body %/%
" <p>For a summary of the associations at all unmapped kmers," %/%
" follow the link to <a href=" %+% filename.unmapped %+% " target='_blank' rel='noopener noreferrer'>this file</a>.</p>" %/%
newline
# Final
cat(html.head,html.body,html.foot,file=outfile.html,append=FALSE)