-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathzequencer.smk
146 lines (114 loc) · 4.81 KB
/
zequencer.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
from snakemake.utils import min_version
import os
min_version("6.0")
###########################################################################
# Set up default folders for input_dir & output_dir if they do not exist #
###########################################################################
src_dir = "./src"
if 'src_dir' not in config:
config['src_dir'] = src_dir
default_dict_path = os.path.join(config['src_dir'], 'variant_calling_rules.json')
def read_json(filepath):
"""
Basic json read function, throws warnings and returns [] if file does not exist or is not of valid format
"""
import os
import json
if not os.path.exists(filepath):
print("Warning: condor_q_held.json Does not exist")
return {}
try:
with open(filepath) as f_in:
return json.load(f_in)
except ValueError:
print("Warning: json is blank (none held) or bad format")
return {}
def deep_merge_dicts(original, incoming):
"""
Deep merge two dictionaries. Modifies original.
For key conflicts if both values are:
a. dict: Recursively call deep_merge_dicts on both values.
c. any other type: Value is overridden.
d. conflicting types: Value is overridden.
"""
for key in incoming:
if key in original:
if isinstance(original[key], dict) and isinstance(incoming[key], dict):
deep_merge_dicts(original[key], incoming[key])
else:
original[key] = incoming[key]
else:
original[key] = incoming[key]
return original
####
# read the default dictionary with the hardcoded (yuck path)
default_config_dict = read_json(default_dict_path)
config = deep_merge_dicts(default_config_dict, config)
input_dir = "./"
out_dir = "out"
ref_dir = "ref"
os.makedirs(config['ref_dir'],exist_ok=True)
os.makedirs(config['out_dir'],exist_ok=True)
primer_amplicon_path = os.path.join(config['ref_dir'],"primer_amplicon.csv")
if 'primer_amplicon_path' not in config:
config['primer_amplicon_path'] = primer_amplicon_path
ref_adaptor_path = os.path.join(config['ref_dir'],"ref_adaptor.fasta")
if 'ref_adaptor_path' not in config:
config['ref_adaptor_path'] = ref_adaptor_path
use_amplicon_norm_bbduk_trim = True
if 'use_amplicon_norm_bbduk_trim' in config:
if config['use_amplicon_norm_bbduk_trim'][0].upper() == 'F':
use_amplicon_norm_bbduk_trim = False
insert_fasta_path = os.path.join(config['ref_dir'], "insert_amplicons.fasta")
if 'insert_fasta_path' not in config:
config['insert_fasta_path'] = insert_fasta_path
src_dir = "./src"
if 'src_dir' not in config:
config['src_dir'] = src_dir
##################################
# Get Sample list using patterns #
##################################
# needs to end in -R1.fastq.gz and reverse file as -R2.fastq.gz
sample_list = os.listdir(config['input_dir'])
# print(config['input_dir'])
# print(sample_list)
suffix = '_L001_R1_001.fastq.gz'
suffix ='-R1.fastq.gz'
suffix_len = len(suffix)
SAMPLES_PRE = [x[:-suffix_len] for x in sample_list if (len(x) > suffix_len) and (x[-suffix_len:] == suffix and x[:2] != '._')]
# print(SAMPLES_PRE)
SAMPLES = []
for sample_i in SAMPLES_PRE:
suffix_repl = suffix.replace('R1','R2')
# if '{0}-R2.fastq.gz'.format(sample_i) in sample_list:
if '{0}{1}'.format(sample_i,suffix_repl) in sample_list:
SAMPLES.append(sample_i)
# SAMPLES=['ZIKV-DAKAR-41524-06192021_Rep01']
# print(SAMPLES)
config['SAMPLES'] = SAMPLES
# print(config)
#################################
# Begin the Modulized Workflow #
################################
snake_make_rules = os.path.join(config['src_dir'], 'variant_calling_rules.smk')
module variant_calling_rules:
# here, plain paths, URLs and the special markers for code hosting providers (see below) are possible.
snakefile: snake_make_rules
config: config
use rule all from variant_calling_rules
use rule download_ncbi_reference from variant_calling_rules
use rule create_insert_fasta_path_from_primer_amplicon from variant_calling_rules
use rule create_primer_amp_from_ref_adaptor_fasta from variant_calling_rules
use rule create_primer_fasta_ref_bed from variant_calling_rules
if use_amplicon_norm_bbduk_trim:
use rule bbduk_filter_primer_reads from variant_calling_rules
use rule bbduk_trim_primer_and_quality from variant_calling_rules
use rule bbmerge_reads_from_bbduk_filter_trim from variant_calling_rules
use rule slow_amplicon_normalization from variant_calling_rules
use rule bbmap_reformat_trim_read_length from variant_calling_rules
use rule bbmap_filtered_trimmed from variant_calling_rules
else:
use rule bbmerge_reads from variant_calling_rules
use rule bbmap_merged_reads from variant_calling_rules
use rule filter_downsample_mask_bam from variant_calling_rules
use rule bbmap_call_variants from variant_calling_rules