Skip to content

Commit edde849

Browse files
authored
updated vadr and nextclade processes (#70)
* update formatting * spelling error * Update cecret.nf * Update cecret_config_template.config * Added samtools_depth * Added samtools depth * Updated summary file for pangolin and nextclade changes Pangolin and Nextclade both updated, so this has as well. Pangolin's scorpio is now a column in the summary file process combine_summary was removed because it's not needed anymore * Update cecret_annotation.nf * simplified vadr variables * simplified vadr variables * Now it should be perfect * Update cecret.nf * Update Update * Changed nextclade container vadr and nextclade are external tools that went through some significant changes that needed to be addressed * Updated vadr and nextclade * Updated vadr and nextclade * updated nextclade container
1 parent 2a2c201 commit edde849

File tree

4 files changed

+88
-66
lines changed

4 files changed

+88
-66
lines changed

staphb_toolkit/workflows/cecret/cecret.nf

+32-24
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
println("Currently using the Cecret workflow for use with amplicon-based Illumina hybrid library prep on MiSeq\n")
44
println("Author: Erin Young")
55
println("email: [email protected]")
6-
println("Version: v.20210611")
6+
println("Version: v.20210815")
77
println("")
88

99
params.reads = workflow.launchDir + '/reads'
@@ -642,7 +642,8 @@ process ivar_consensus {
642642
set val(sample), file(bam), file(reference_genome) from trimmed_bams_ivar_consensus
643643

644644
output:
645-
tuple sample, file("consensus/${sample}.consensus.fa") into consensus_nextclade, consensus_rename, consensus_pangolin, consensus_vadr
645+
tuple sample, file("consensus/${sample}.consensus.fa") into consensus_rename, consensus_pangolin, consensus_vadr
646+
tuple sample, file("consensus/${sample}.consensus.fa"), file(reference_genome) into consensus_nextclade
646647
file("consensus/${sample}.consensus.fa") into consensus_mafft
647648
file("logs/${task.process}/${sample}.${workflow.sessionId}.{log,err}")
648649
tuple sample, env(num_N), env(num_ACTG), env(num_degenerate), env(num_total) into consensus_results
@@ -1191,44 +1192,59 @@ pangolin_files
11911192
storeDir: "${params.outdir}/pangolin")
11921193

11931194
params.nextclade_options = ''
1195+
params.nextclade_genes = 'E,M,N,ORF1a,ORF1b,ORF3a,ORF6,ORF7a,ORF7b,ORF8,ORF9b,S'
11941196
process nextclade {
11951197
publishDir "${params.outdir}", mode: 'copy'
11961198
tag "${sample}"
11971199
echo false
11981200
cpus params.medcpus
1199-
container 'neherlab/nextclade:latest'
1201+
container 'nextstrain/nextclade:latest'
12001202

12011203
when:
12021204
params.nextclade
12031205

12041206
input:
1205-
set val(sample), file(fasta) from consensus_nextclade
1207+
tuple val(sample), file(fasta), file(reference) from consensus_nextclade
12061208

12071209
output:
1208-
file("${task.process}/${sample}_nextclade_report.csv") into nextclade_files
1210+
file("${task.process}/${sample}/${sample}_nextclade.csv") into nextclade_files
12091211
tuple sample, env(nextclade_clade) into nextclade_clade_results
12101212
file("logs/${task.process}/${sample}.${workflow.sessionId}.{log,err}")
12111213

12121214
shell:
12131215
'''
1214-
mkdir -p !{task.process} logs/!{task.process}
1216+
mkdir -p !{task.process}/!{sample} logs/!{task.process}
12151217
log_file=logs/!{task.process}/!{sample}.!{workflow.sessionId}.log
12161218
err_file=logs/!{task.process}/!{sample}.!{workflow.sessionId}.err
12171219
12181220
date | tee -a $log_file $err_file > /dev/null
12191221
nextclade --version >> $log_file
12201222
1223+
wget --no-check-certificate https://raw.githubusercontent.com/nextstrain/nextclade/master/data/sars-cov-2/genemap.gff
1224+
wget --no-check-certificate https://raw.githubusercontent.com/nextstrain/nextclade/master/data/sars-cov-2/tree.json
1225+
wget --no-check-certificate https://raw.githubusercontent.com/nextstrain/nextclade/master/data/sars-cov-2/qc.json
1226+
wget --no-check-certificate https://raw.githubusercontent.com/nextstrain/nextclade/master/data/sars-cov-2/primers.csv
1227+
12211228
nextclade !{params.nextclade_options} \
1222-
--jobs !{task.cpus} \
1223-
--input-fasta !{fasta} \
1224-
--output-csv !{task.process}/!{sample}_nextclade_report.csv \
1229+
--input-fasta=!{fasta} \
1230+
--input-root-seq=!{reference} \
1231+
--genes=!{params.nextclade_genes} \
1232+
--input-gene-map=genemap.gff \
1233+
--input-tree=tree.json \
1234+
--input-qc-config=qc.json \
1235+
--input-pcr-primers=primers.csv \
1236+
--output-json=!{task.process}/!{sample}/!{sample}_nextclade.json \
1237+
--output-csv=!{task.process}/!{sample}/!{sample}_nextclade.csv \
1238+
--output-tsv=!{task.process}/!{sample}/!{sample}_nextclade.tsv \
1239+
--output-tree=!{task.process}/!{sample}/!{sample}_nextclade.auspice.json \
1240+
--output-dir=!{task.process}/!{sample} \
1241+
--output-basename=!{sample} \
12251242
2>> $err_file >> $log_file
12261243
1227-
1228-
nextclade_column=$(head -n 1 !{task.process}/!{sample}_nextclade_report.csv | tr ';' '\\n' | grep -wn "clade" | cut -f 1 -d ":" )
1244+
nextclade_column=$(head -n 1 !{task.process}/!{sample}/!{sample}_nextclade.csv | tr ';' '\\n' | grep -wn "clade" | cut -f 1 -d ":" )
12291245
if [ -n "$nextclade_column" ]
12301246
then
1231-
nextclade_clade=$(cat !{task.process}/!{sample}_nextclade_report.csv | grep !{sample} | cut -f $nextclade_column -d ";" | sed 's/,/;/g' | head -n 1 )
1247+
nextclade_clade=$(cat !{task.process}/!{sample}/!{sample}_nextclade.csv | grep !{sample} | cut -f $nextclade_column -d ";" | sed 's/,/;/g' | sed 's/"//g' | head -n 1 )
12321248
else
12331249
nextclade_clade="Not Found"
12341250
fi
@@ -1243,23 +1259,14 @@ nextclade_files
12431259
sort: true,
12441260
storeDir: "${params.outdir}/nextclade")
12451261

1246-
if ( Math.round(Runtime.runtime.totalMemory() / 10241024) / 2 > params.medcpus && params.vadr ) {
1247-
vadrmemory = params.medcpus + params.medcpus
1248-
vadrcpus = params.medcpus
1249-
} else {
1250-
vadrmemory = 2
1251-
vadrcpus = 1
1252-
}
1253-
1254-
params.vadr_options = '--split --glsearch -s -r --nomisc --lowsim5term 2 --lowsim3term 2 --alt_fail lowscore,fstukcnf,insertnn,deletinn'
1262+
params.vadr_options = '--split --glsearch -s -r --nomisc --mkey sarscov2 --lowsim5seq 6 --lowsim3seq 6 --alt_fail lowscore,insertnn,deletinn'
12551263
params.vadr_reference = 'sarscov2'
12561264
params.vadr_mdir = '/opt/vadr/vadr-models'
12571265
process vadr {
12581266
publishDir "${params.outdir}", mode: 'copy'
12591267
tag "${sample}"
12601268
echo false
1261-
cpus vadrcpus
1262-
memory vadrmemory.GB
1269+
cpus params.medcpus
12631270
container 'staphb/vadr:latest'
12641271

12651272
when:
@@ -1289,10 +1296,10 @@ process vadr {
12891296
v-annotate.pl -h >> $log_file
12901297
12911298
v-annotate.pl !{params.vadr_options} \
1299+
--cpu !{task.cpus} \
12921300
--noseqnamemax \
12931301
--mkey !{params.vadr_reference} \
12941302
--mdir !{params.vadr_mdir} \
1295-
--cpu !{task.cpus} \
12961303
!{fasta} \
12971304
!{task.process}/!{sample} \
12981305
2>> $err_file >> $log_file
@@ -1656,3 +1663,4 @@ workflow.onComplete {
16561663
println("A summary of results can be found in a tab-delimited file: ${workflow.launchDir}/run_results.txt")
16571664
println("Execution status: ${ workflow.success ? 'OK' : 'failed' }")
16581665
}
1666+

staphb_toolkit/workflows/cecret/cecret_annotation.nf

+52-39
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
println("For annotating SARS-CoV-2 fastas with pangolin, nextclade, and vadr\n")
44
println("Author: Erin Young")
55
println("email: [email protected]")
6-
println("Version: v.0.20210611")
6+
println("Version: v.0.20210815")
77
println("")
88

99
params.fastas = workflow.launchDir + '/fastas'
@@ -28,6 +28,19 @@ Channel
2828
}
2929
.set { fastas }
3030

31+
params.vadr = true
32+
params.pangolin = true
33+
params.relatedness = false
34+
params.nextclade = true
35+
if (params.nextclade) {
36+
Channel
37+
.fromPath(params.reference_genome, type:'file')
38+
.ifEmpty{
39+
println("No reference genome was selected. Set with 'params.reference_genome'")
40+
}
41+
.set { reference_genome_nextclade }
42+
}
43+
3144
process fasta_prep {
3245
publishDir "${params.outdir}", mode: 'copy', overwrite: true
3346
tag "${fasta}"
@@ -39,7 +52,7 @@ process fasta_prep {
3952
file(fasta) from fastas
4053

4154
output:
42-
file("${task.process}/${fasta}") into fastas_mafft, fastas_pangolin, fastas_nextclade, fastas_vadr
55+
file("${task.process}/${fasta}") into prepped_fastas, fastas_mafft, fastas_pangolin, fastas_nextclade, fastas_vadr
4356

4457
shell:
4558
'''
@@ -54,7 +67,10 @@ process fasta_prep {
5467
'''
5568
}
5669

57-
params.pangolin = true
70+
prepped_fastas
71+
.collectFile(name: "Ultimate.fasta", storeDir: "${params.outdir}")
72+
.into { multifasta_pangolin ; multifasta_vadr ; multifasta_nextclade ; multifasta_mafft }
73+
5874
params.pangolin_options = ''
5975
process pangolin {
6076
publishDir "${params.outdir}", mode: 'copy'
@@ -67,11 +83,10 @@ process pangolin {
6783
params.pangolin
6884

6985
input:
70-
file(fasta) from fastas_pangolin.collect()
86+
file(fasta) from multifasta_pangolin
7187

7288
output:
7389
file("${task.process}/lineage_report.csv")
74-
file("${task.process}/ultimate.fasta")
7590
file("logs/${task.process}/${workflow.sessionId}.{log,err}")
7691

7792
shell:
@@ -83,33 +98,32 @@ process pangolin {
8398
date | tee -a $log_file $err_file > /dev/null
8499
pangolin --version >> $log_file
85100
86-
cat !{fasta} > !{task.process}/ultimate.fasta
87-
88101
pangolin !{params.pangolin_options} \
89102
--outdir !{task.process} \
90-
!{task.process}/ultimate.fasta \
103+
!{fasta} \
91104
2>> $err_file >> $log_file
92105
'''
93106
}
94107

95-
params.nextclade = true
96108
params.nextclade_options = ''
109+
params.nextclade_genes = 'E,M,N,ORF1a,ORF1b,ORF3a,ORF6,ORF7a,ORF7b,ORF8,ORF9b,S'
97110
process nextclade {
98111
publishDir "${params.outdir}", mode: 'copy'
99112
tag "Clade assignment with nextclade"
100113
echo false
101114
cpus params.medcpus
102-
container 'neherlab/nextclade:latest'
115+
//container 'docker://quay.io/biocontainers/nextclade:1.2.0--h9ee0642_0'
116+
container 'nextstrain/nextclade:latest'
103117

104118
when:
105119
params.nextclade
106120

107121
input:
108-
file(fasta) from fastas_nextclade.collect()
122+
file(fasta) from multifasta_nextclade
123+
file(reference) from reference_genome_nextclade
109124

110125
output:
111-
file("${task.process}/nextclade_report.tsv")
112-
file("${task.process}/ultimate.fasta")
126+
file("${task.process}/*")
113127
file("logs/${task.process}/${workflow.sessionId}.{log,err}")
114128

115129
shell:
@@ -121,42 +135,45 @@ process nextclade {
121135
date | tee -a $log_file $err_file > /dev/null
122136
nextclade --version >> $log_file
123137
124-
cat !{fasta} > !{task.process}/ultimate.fasta
138+
wget --no-check-certificate https://raw.githubusercontent.com/nextstrain/nextclade/master/data/sars-cov-2/genemap.gff
139+
wget --no-check-certificate https://raw.githubusercontent.com/nextstrain/nextclade/master/data/sars-cov-2/tree.json
140+
wget --no-check-certificate https://raw.githubusercontent.com/nextstrain/nextclade/master/data/sars-cov-2/qc.json
141+
wget --no-check-certificate https://raw.githubusercontent.com/nextstrain/nextclade/master/data/sars-cov-2/primers.csv
125142
126143
nextclade !{params.nextclade_options} \
127-
--jobs !{task.cpus} \
128-
--input-fasta !{task.process}/ultimate.fasta \
129-
--output-tsv !{task.process}/nextclade_report.tsv \
144+
--input-fasta=!{fasta} \
145+
--input-root-seq=!{reference} \
146+
--genes=!{params.nextclade_genes} \
147+
--input-gene-map=genemap.gff \
148+
--input-tree=tree.json \
149+
--input-qc-config=qc.json \
150+
--input-pcr-primers=primers.csv \
151+
--output-json=!{task.process}/nextclade.json \
152+
--output-csv=!{task.process}/nextclade.csv \
153+
--output-tsv=!{task.process}/nextclade.tsv \
154+
--output-tree=!{task.process}/nextclade.auspice.json \
155+
--output-dir=!{task.process} \
156+
--output-basename=!{task.process} \
130157
2>> $err_file >> $log_file
131158
'''
132159
}
133160

134-
params.vadr = true
135-
if ( Math.round(Runtime.runtime.totalMemory() / 10241024) / 2 > params.medcpus && params.vadr ) {
136-
vadrmemory = params.medcpus + params.medcpus
137-
vadrcpus = params.medcpus
138-
} else {
139-
vadrmemory = 2
140-
vadrcpus = 1
141-
}
142-
143-
params.vadr_options = '--split --glsearch -s -r --nomisc --lowsim5term 2 --lowsim3term 2 --alt_fail lowscore,fstukcnf,insertnn,deletinn'
161+
params.vadr_options = '--split --glsearch -s -r --nomisc --mkey sarscov2 --lowsim5seq 6 --lowsim3seq 6 --alt_fail lowscore,insertnn,deletinn'
144162
params.vadr_reference = 'sarscov2'
145163
params.vadr_mdir = '/opt/vadr/vadr-models'
146164
process vadr {
147165
publishDir "${params.outdir}", mode: 'copy'
148166
tag "Fasta QC with vadr"
149167
echo false
150-
cpus vadrcpus
151-
memory vadrmemory.GB
168+
cpus params.medcpus
152169
container 'staphb/vadr:latest'
153-
stageInMode = 'symlink'
170+
//stageInMode = 'symlink'
154171

155172
when:
156173
params.vadr
157174

158175
input:
159-
file(fasta) from fastas_vadr.collect()
176+
file(fasta) from multifasta_vadr
160177

161178
output:
162179
file("${task.process}/*")
@@ -172,20 +189,17 @@ process vadr {
172189
echo "no version" >> $log_file
173190
v-annotate.pl -h >> $log_file
174191
175-
cat !{fasta} > ultimate.fasta
176-
177192
v-annotate.pl !{params.vadr_options} \
193+
--cpu !{task.cpus} \
178194
--noseqnamemax \
179195
--mkey !{params.vadr_reference} \
180196
--mdir !{params.vadr_mdir} \
181-
--cpu !{task.cpus} \
182-
ultimate.fasta \
197+
!{fasta} \
183198
!{task.process} \
184199
2>> $err_file >> $log_file
185200
'''
186201
}
187202

188-
params.relatedness = false
189203
if (params.relatedness){
190204
Channel
191205
.fromPath(params.reference_genome, type:'file')
@@ -205,7 +219,7 @@ if (params.relatedness){
205219
maxRetries 3
206220

207221
input:
208-
file(fasta) from fastas_mafft.collect()
222+
file(fasta) from multifasta_mafft
209223
file(reference) from reference_genome
210224

211225
output:
@@ -229,12 +243,11 @@ if (params.relatedness){
229243
echo ">!{params.outgroup}" > reference.fasta
230244
grep -v ">" !{reference} >> reference.fasta
231245
232-
cat !{fasta} > !{task.process}/ultimate.fasta
233246
mafft !{params.mafft_options} \
234247
--auto \
235248
--thread !{task.cpus} \
236249
--maxambiguous !{params.max_ambiguous} \
237-
--addfragments !{task.process}/ultimate.fasta \
250+
--addfragments !{fasta} \
238251
reference.fasta \
239252
> !{task.process}/mafft_aligned.fasta \
240253
2>> $err_file

staphb_toolkit/workflows/cecret/configs/cecret_config_template.config

+3-2
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ bamsnap_container = 'danielmsk/bamsnap:latest'
6767
kraken2_container = 'staphb/kraken2:latest'
6868
bedtools_container = 'staphb/bedtools:latest'
6969
pangolin_container = 'staphb/pangolin:latest'
70-
nextclade_container = 'neherlab/nextclade:latest'
70+
nextclade_container = 'nextstrain/nextclade:latest'
7171
vadr_container = 'staphb/vadr:latest'
7272
parallel_perl_container = 'staphb/parallel-perl:latest'
7373
mafft_container = 'staphb/mafft:latest'
@@ -151,10 +151,11 @@ iqtree_container = 'staphb/iqtree:latest'
151151

152152
//# For process nextclade
153153
//params.nextclade_options = ''
154+
//params.nextclade_genes = 'E,M,N,ORF1a,ORF1b,ORF3a,ORF6,ORF7a,ORF7b,ORF8,ORF9b,S'
154155
//params.nextclade = true
155156

156157
//# For process vadr
157-
//params.vadr_options = '--split --glsearch -s -r --nomisc --lowsim5term 2 --lowsim3term 2 --alt_fail lowscore,fstukcnf,insertnn,deletinn'
158+
//params.vadr_options = '--split --glsearch -s -r --nomisc --mkey sarscov2 --lowsim5seq 6 --lowsim3seq 6 --alt_fail lowscore,insertnn,deletinn'
158159
//params.vadr_reference = 'sarscov2'
159160
//params.vadr_mdir = '/opt/vadr/vadr-models'
160161
//params.vadr = true

staphb_toolkit/workflows/cecret/configs/docker_containers.config

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,6 @@ bcftools_container = 'staphb/bcftools:latest'
1414
kraken2_container = 'staphb/kraken2:latest'
1515
bedtools_container = 'staphb/bedtools:latest'
1616
pangolin_container = 'staphb/pangolin:latest'
17-
nextclade_container = 'neherlab/nextclade:latest'
17+
nextclade_container = 'nextstrain/nextclade:latest'
1818
lightweight_container = 'staphb/parallel-perl:latest'
1919
iqtree_container = 'staphb/iqtree:latest'

0 commit comments

Comments
 (0)