-
Notifications
You must be signed in to change notification settings - Fork 0
/
map_mip_arms.smk
75 lines (68 loc) · 2.37 KB
/
map_mip_arms.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
'''
converts arms files from mips into psl alignments that can then be visualized on
a genome browser (e.g. igv or ucsc genome browser)
Reports genomic coordinates of individual mips for later use
'''
configfile: 'map_mip_arms.yaml'
output=config['output_folder']
rule all:
input:
filtered_psl=output+'/'+config['panel_name']+'_'+config['genome_name']+'_mips_filtered.psl',
filtered_bed=output+'/'+config['panel_name']+'_'+config['genome_name']+'_mips_filtered.bed'
rule arms_to_fasta:
'''
converts ligation and extension arms into fasta sequences for mapping onto
the genome. Reverse complements the ligation arm and concatenates the two
arms as 'exons'
'''
input:
arms_file=config['mip_arms_file']
output:
mips_fasta=output+'/'+config['panel_name']+'_mips.fa'
script:
'scripts/arms_to_fasta.py'
rule map_arms_to_genome:
'''
maps the fasta version of mip arms to the genome using blat
'''
input:
mips_fasta=output+'/'+config['panel_name']+'_mips.fa',
genome_path=config['genome_path']
output:
mips_psl=output+'/'+config['panel_name']+'_'+config['genome_name']+'_mips.psl'
shell:
'blat {input.genome_path} {input.mips_fasta} -minScore=5 -tileSize=6 {output.mips_psl}'
rule filter_best_hit:
'''
searches the blat hits and gets the best blat hit (or best tied hits).
Also creates a coordinates file with the genomic coordinates of every mip
'''
input:
mips_psl=output+'/'+config['panel_name']+'_'+config['genome_name']+'_mips.psl'
output:
filtered_psl=output+'/'+config['panel_name']+'_'+config['genome_name']+'_mips_filtered.psl',
coords_file=output+'/'+config['panel_name']+'_'+config['genome_name']+'_genomic_coords.tsv'
script:
'scripts/filter_best_hit.py'
rule psl_to_bed:
'''
converts a psl file into a bed file for other downstream analysis programs
'''
input:
filtered_psl=output+'/'+config['panel_name']+'_'+config['genome_name']+'_mips_filtered.psl'
conda:
'envs/pslToBed.yaml'
output:
filtered_bed=output+'/'+config['panel_name']+'_'+config['genome_name']+'_mips_filtered.bed'
shell:
'pslToBed {input.filtered_psl} {output.filtered_bed}'
rule make_hub:
'''
creates a hub.txt file for viewing data on the UCSC genome browser
'''
input:
mips_fasta=output+'/'+config['panel_name']+'_'+config['genome_name']+'_mips_filtered.psl'
output:
hub=output+'/'+config['panel_name']+'_'+config['genome_name']+'psl_hub.txt'
script:
'scripts/make_hub.py'