3
3
println (" For annotating SARS-CoV-2 fastas with pangolin, nextclade, and vadr\n " )
4
4
println (" Author: Erin Young" )
5
5
println (
" email: [email protected] " )
6
- println (" Version: v.0.20210611 " )
6
+ println (" Version: v.0.20210815 " )
7
7
println (" " )
8
8
9
9
params. fastas = workflow. launchDir + ' /fastas'
@@ -28,6 +28,19 @@ Channel
28
28
}
29
29
.set { fastas }
30
30
31
+ params. vadr = true
32
+ params. pangolin = true
33
+ params. relatedness = false
34
+ params. nextclade = true
35
+ if (params. nextclade) {
36
+ Channel
37
+ .fromPath(params. reference_genome, type :' file' )
38
+ .ifEmpty{
39
+ println (" No reference genome was selected. Set with 'params.reference_genome'" )
40
+ }
41
+ .set { reference_genome_nextclade }
42
+ }
43
+
31
44
process fasta_prep {
32
45
publishDir " ${ params.outdir} " , mode: ' copy' , overwrite: true
33
46
tag " ${ fasta} "
@@ -39,7 +52,7 @@ process fasta_prep {
39
52
file(fasta) from fastas
40
53
41
54
output:
42
- file(" ${ task.process} /${ fasta} " ) into fastas_mafft, fastas_pangolin, fastas_nextclade, fastas_vadr
55
+ file(" ${ task.process} /${ fasta} " ) into prepped_fastas, fastas_mafft, fastas_pangolin, fastas_nextclade, fastas_vadr
43
56
44
57
shell:
45
58
'''
@@ -54,7 +67,10 @@ process fasta_prep {
54
67
'''
55
68
}
56
69
57
- params. pangolin = true
70
+ prepped_fastas
71
+ .collectFile(name : " Ultimate.fasta" , storeDir : " ${ params.outdir} " )
72
+ .into { multifasta_pangolin ; multifasta_vadr ; multifasta_nextclade ; multifasta_mafft }
73
+
58
74
params. pangolin_options = ' '
59
75
process pangolin {
60
76
publishDir " ${ params.outdir} " , mode: ' copy'
@@ -67,11 +83,10 @@ process pangolin {
67
83
params. pangolin
68
84
69
85
input:
70
- file(fasta) from fastas_pangolin . collect()
86
+ file(fasta) from multifasta_pangolin
71
87
72
88
output:
73
89
file(" ${ task.process} /lineage_report.csv" )
74
- file(" ${ task.process} /ultimate.fasta" )
75
90
file(" logs/${ task.process} /${ workflow.sessionId} .{log,err}" )
76
91
77
92
shell:
@@ -83,33 +98,32 @@ process pangolin {
83
98
date | tee -a $log_file $err_file > /dev/null
84
99
pangolin --version >> $log_file
85
100
86
- cat !{fasta} > !{task.process}/ultimate.fasta
87
-
88
101
pangolin !{params.pangolin_options} \
89
102
--outdir !{task.process} \
90
- !{task.process}/ultimate. fasta \
103
+ !{fasta} \
91
104
2>> $err_file >> $log_file
92
105
'''
93
106
}
94
107
95
- params. nextclade = true
96
108
params. nextclade_options = ' '
109
+ params. nextclade_genes = ' E,M,N,ORF1a,ORF1b,ORF3a,ORF6,ORF7a,ORF7b,ORF8,ORF9b,S'
97
110
process nextclade {
98
111
publishDir " ${ params.outdir} " , mode: ' copy'
99
112
tag " Clade assignment with nextclade"
100
113
echo false
101
114
cpus params. medcpus
102
- container ' neherlab/nextclade:latest'
115
+ // container 'docker://quay.io/biocontainers/nextclade:1.2.0--h9ee0642_0'
116
+ container ' nextstrain/nextclade:latest'
103
117
104
118
when:
105
119
params. nextclade
106
120
107
121
input:
108
- file(fasta) from fastas_nextclade. collect()
122
+ file(fasta) from multifasta_nextclade
123
+ file(reference) from reference_genome_nextclade
109
124
110
125
output:
111
- file(" ${ task.process} /nextclade_report.tsv" )
112
- file(" ${ task.process} /ultimate.fasta" )
126
+ file(" ${ task.process} /*" )
113
127
file(" logs/${ task.process} /${ workflow.sessionId} .{log,err}" )
114
128
115
129
shell:
@@ -121,42 +135,45 @@ process nextclade {
121
135
date | tee -a $log_file $err_file > /dev/null
122
136
nextclade --version >> $log_file
123
137
124
- cat !{fasta} > !{task.process}/ultimate.fasta
138
+ wget --no-check-certificate https://raw.githubusercontent.com/nextstrain/nextclade/master/data/sars-cov-2/genemap.gff
139
+ wget --no-check-certificate https://raw.githubusercontent.com/nextstrain/nextclade/master/data/sars-cov-2/tree.json
140
+ wget --no-check-certificate https://raw.githubusercontent.com/nextstrain/nextclade/master/data/sars-cov-2/qc.json
141
+ wget --no-check-certificate https://raw.githubusercontent.com/nextstrain/nextclade/master/data/sars-cov-2/primers.csv
125
142
126
143
nextclade !{params.nextclade_options} \
127
- --jobs !{task.cpus} \
128
- --input-fasta !{task.process}/ultimate.fasta \
129
- --output-tsv !{task.process}/nextclade_report.tsv \
144
+ --input-fasta=!{fasta} \
145
+ --input-root-seq=!{reference} \
146
+ --genes=!{params.nextclade_genes} \
147
+ --input-gene-map=genemap.gff \
148
+ --input-tree=tree.json \
149
+ --input-qc-config=qc.json \
150
+ --input-pcr-primers=primers.csv \
151
+ --output-json=!{task.process}/nextclade.json \
152
+ --output-csv=!{task.process}/nextclade.csv \
153
+ --output-tsv=!{task.process}/nextclade.tsv \
154
+ --output-tree=!{task.process}/nextclade.auspice.json \
155
+ --output-dir=!{task.process} \
156
+ --output-basename=!{task.process} \
130
157
2>> $err_file >> $log_file
131
158
'''
132
159
}
133
160
134
- params. vadr = true
135
- if ( Math . round(Runtime . runtime. totalMemory() / 10241024) / 2 > params. medcpus && params. vadr ) {
136
- vadrmemory = params. medcpus + params. medcpus
137
- vadrcpus = params. medcpus
138
- } else {
139
- vadrmemory = 2
140
- vadrcpus = 1
141
- }
142
-
143
- params. vadr_options = ' --split --glsearch -s -r --nomisc --lowsim5term 2 --lowsim3term 2 --alt_fail lowscore,fstukcnf,insertnn,deletinn'
161
+ params. vadr_options = ' --split --glsearch -s -r --nomisc --mkey sarscov2 --lowsim5seq 6 --lowsim3seq 6 --alt_fail lowscore,insertnn,deletinn'
144
162
params. vadr_reference = ' sarscov2'
145
163
params. vadr_mdir = ' /opt/vadr/vadr-models'
146
164
process vadr {
147
165
publishDir " ${ params.outdir} " , mode: ' copy'
148
166
tag " Fasta QC with vadr"
149
167
echo false
150
- cpus vadrcpus
151
- memory vadrmemory. GB
168
+ cpus params. medcpus
152
169
container ' staphb/vadr:latest'
153
- stageInMode = ' symlink'
170
+ // stageInMode = 'symlink'
154
171
155
172
when:
156
173
params. vadr
157
174
158
175
input:
159
- file(fasta) from fastas_vadr . collect()
176
+ file(fasta) from multifasta_vadr
160
177
161
178
output:
162
179
file(" ${ task.process} /*" )
@@ -172,20 +189,17 @@ process vadr {
172
189
echo "no version" >> $log_file
173
190
v-annotate.pl -h >> $log_file
174
191
175
- cat !{fasta} > ultimate.fasta
176
-
177
192
v-annotate.pl !{params.vadr_options} \
193
+ --cpu !{task.cpus} \
178
194
--noseqnamemax \
179
195
--mkey !{params.vadr_reference} \
180
196
--mdir !{params.vadr_mdir} \
181
- --cpu !{task.cpus} \
182
- ultimate.fasta \
197
+ !{fasta} \
183
198
!{task.process} \
184
199
2>> $err_file >> $log_file
185
200
'''
186
201
}
187
202
188
- params. relatedness = false
189
203
if (params. relatedness){
190
204
Channel
191
205
.fromPath(params. reference_genome, type :' file' )
@@ -205,7 +219,7 @@ if (params.relatedness){
205
219
maxRetries 3
206
220
207
221
input:
208
- file(fasta) from fastas_mafft . collect()
222
+ file(fasta) from multifasta_mafft
209
223
file(reference) from reference_genome
210
224
211
225
output:
@@ -229,12 +243,11 @@ if (params.relatedness){
229
243
echo ">!{params.outgroup}" > reference.fasta
230
244
grep -v ">" !{reference} >> reference.fasta
231
245
232
- cat !{fasta} > !{task.process}/ultimate.fasta
233
246
mafft !{params.mafft_options} \
234
247
--auto \
235
248
--thread !{task.cpus} \
236
249
--maxambiguous !{params.max_ambiguous} \
237
- --addfragments !{task.process}/ultimate. fasta \
250
+ --addfragments !{fasta} \
238
251
reference.fasta \
239
252
> !{task.process}/mafft_aligned.fasta \
240
253
2>> $err_file
0 commit comments