-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.nf
104 lines (69 loc) · 2.04 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
process filter_by_samplesheet{
publishDir params.outdir, mode: 'copy'
input:
path(cgmlsttab)
path(samplesheet)
output:
path("filtered_cgmlst.tab")
"""
cat ${samplesheet} | cut -d',' -f1 > ids.csv
awk -F '\t' 'NR==FNR {id[\$1]; next} \$1 in id' ids.csv ${cgmlsttab} > filtered_cgmlst.tab
"""
}
process calculate_distance {
publishDir params.outdir, mode: 'copy'
input:
path(cgmlst_csv)
output:
path("distance.csv")
script:
if( params.mode == 'count-missing')
"""
cat ${cgmlst_csv} | sed 's/,/\t/g' > cgmlst.tab
cgmlst-dists-count-missing-as-diff -c cgmlst.tab > distance.csv
"""
else
"""
cat ${cgmlst_csv} | sed 's/,/\t/g' > cgmlst.tab
cgmlst-dists -c cgmlst.tab > distance.csv
"""
}
process dendrogram {
publishDir params.outdir, mode: 'copy'
input:
path(distance_matrix)
output:
path("dendrogram.pdf")
script:
"""
dendrogram.py ${distance_matrix}
"""
}
process cluster_py {
publishDir params.outdir, mode: 'copy'
when:
params.runClustering == true
input:
path(distance_matrix)
output:
path("${params.linkage_type}_cluster_*.csv")
script:
"""
for i in ${params.threshold}; do cluster.py ${distance_matrix} -t \${i} --linkage ${params.linkage_type} | awk 'BEGIN {OFS=","; print "run_accession","our_clusters"} {print \$0}' > ${params.linkage_type}_cluster_\${i}.csv; done
"""
}
workflow {
ch_cgmlst = Channel.fromPath(params.cgmlst)
if(params.samplesheet_input != 'NO_FILE'){
//filter out combined_cgmlst.csv based on sample ids in the sample sheet
ch_samplesheet = Channel.fromPath(params.samplesheet_input)
filter_by_samplesheet(ch_cgmlst, ch_samplesheet)
calculate_distance(filter_by_samplesheet.out)
}else{
calculate_distance(ch_cgmlst)
}
dendrogram(calculate_distance.out)
cluster_py(calculate_distance.out)
}