-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSnakefile
96 lines (85 loc) · 3.04 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#Welcome to MitSorter
#Written by Sharon Natasha Cox and Angelo Sante Varvara
configfile: "config.yaml"
sample = config['samples']
rule all:
input:
expand("results/calls_sorted_no_unmapped_only_5mC_methylated_{sample}.txt", sample = config["samples"]),
expand("results/calls_sorted_no_unmapped_only_5mC_no_methylated_{sample}.txt", sample = config["samples"])
rule basecalling:
input:
pod5="data/{sample}/pod5/",
ref="data/chrM.fa",
model="data/[email protected]",
modelC="data/[email protected]_5mCG_5hmCG@v1"
output:
temp("mapped_reads/calls_{sample}.bam")
log:
"logs/basecalling/{sample}.log"
message: "MitSorter is running..."
shell:
"dorado basecaller {input.model} {input.pod5} --min-qscore 9 -Y --recursive --modified-bases-models {input.modelC} --reference {input.ref} > {output}"
rule sorting:
input:
"mapped_reads/calls_{sample}.bam"
output:
bam=temp("sorted_reads/calls_sorted_{sample}.bam"),
index="sorted_reads/calls_sorted_{sample}.bam.bai"
threads: 16
message: "Basecalling complete. Now sorting and indexing BAM file..."
shell:
"samtools sort -@ {threads} --write-index {input} -o {output.bam}##idx##{output.index}"
rule removeunmapped:
input:
"sorted_reads/calls_sorted_{sample}.bam"
output:
"sorted_reads/calls_sorted_no_unmapped_{sample}.bam"
threads: 16
message: "Removing unmapped reads..."
shell:
"samtools view -@ {threads} -bF4 {input} -o {output}"
rule modadjust:
input:
"sorted_reads/calls_sorted_no_unmapped_{sample}.bam"
output:
temp("sorted_reads/calls_sorted_no_unmapped_only_5mC_{sample}.bam")
log:
"logs/modkit_adjust/{sample}.log"
message: "Keeping only 5mC tags..."
shell:
"modkit adjust-mods {input} {output} --ignore h"
rule script:
input:
"sorted_reads/calls_sorted_no_unmapped_only_5mC_{sample}.bam"
output:
"sorted_reads/INF_15_{sample}.txt"
message: "Analyzing new BAM file..."
shell:
"sorted_reads/script.sh {input} {output}"
rule discriminatemetbam:
input:
bam="sorted_reads/calls_sorted_no_unmapped_only_5mC_{sample}.bam",
text="sorted_reads/INF_15_{sample}.txt"
output:
nomet="sorted_reads/calls_sorted_no_unmapped_only_5mC_no_methylated_{sample}.bam",
met="sorted_reads/calls_sorted_no_unmapped_only_5mC_methylated_{sample}.bam"
threads: 8
message: "Producing BAM files with methylated and not methylated reads."
shell:
"samtools view -@ {threads} -b -o {output.nomet} -U {output.met} -N {input.text} {input.bam}"
rule modsummary1:
input:
"sorted_reads/calls_sorted_no_unmapped_only_5mC_methylated_{sample}.bam"
output:
"results/calls_sorted_no_unmapped_only_5mC_methylated_{sample}.txt"
message: "Last steps starting now...calculating stats for metBAM..."
shell:
"modkit summary {input} --no-sampling --tsv > {output}"
rule modsummary2:
input:
"sorted_reads/calls_sorted_no_unmapped_only_5mC_no_methylated_{sample}.bam"
output:
"results/calls_sorted_no_unmapped_only_5mC_no_methylated_{sample}.txt"
message: "Calculating stats for nometBAM..."
shell:
"modkit summary {input} --no-sampling --tsv > {output}"