This repository has been archived by the owner on Nov 29, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathvcf-annotate-snpeff-snpsift.py
330 lines (284 loc) · 8.75 KB
/
vcf-annotate-snpeff-snpsift.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
#!/usr/bin/python3.8
# -*- coding: utf-8 -*-
"""
This is the CLI launcher of the vcf-annotate-snpeff-snpsift pipeline. This
pipeline is powered by Snakemake and uses snpsift and snpeff in order to
annotate VCF using various databases.
You may filter through this pipeline.
Please be aware that a Snakemake report is available for more details on
both results and methods.
If you have any question, please refer to the wiki at:
https://github.com/tdayris-perso/vcf-annotate-snpeff-snpsift/wiki
Or open an issue at:
https://github.com/tdayris-perso/vcf-annotate-snpeff-snpsift/issues
Citations:
https://github.com/tdayris-perso/vcf-annotate-snpeff-snpsift/wiki/Pipeline_Content
"""
import argparse
import os
import os.path
import logging
import sys
from pathlib import Path
from snakemake.utils import makedirs
from snakemake.shell import shell
import snakemake
from typing import Any
try:
from scripts import prepare_pipeline, common_script_vass
except ImportError:
scripts_path = Path(os.path.realpath(__file__)).parent / "scripts"
sys.path.append(str(scripts_path))
import prepare_pipeline, common_script_vass
except ModuleNotFoundError:
scripts_path = Path(os.path.realpath(__file__)).parent / "scripts"
sys.path.append(str(scripts_path))
import prepare_pipeline, common_script_vass
# Building custom class for help formatter
class CustomFormatter(
argparse.RawDescriptionHelpFormatter,
argparse.ArgumentDefaultsHelpFormatter
):
"""
This class is used only to allow line breaks in the documentation,
without breaking the classic argument formatting.
"""
def parser() -> argparse.ArgumentParser:
"""
Build command line parser object
"""
main_parser = argparse.ArgumentParser(
description=sys.modules[__name__].__doc__,
formatter_class=CustomFormatter
)
subparsers = main_parser.add_subparsers()
config = subparsers.add_parser(
"config",
parents=[prepare_pipeline.parser()],
add_help=False
)
config.set_defaults(func=prepare_pipeline.main)
snake = subparsers.add_parser(
"snakemake",
add_help=True
)
snake.add_argument(
"--snakemake-args",
help="Snakemake arguments. If you use this wrapper instead "
"of real snakemake call, then please put all your"
" arguments in simple quotes. (default: %(default)s)",
type=str,
default=""
)
snake.add_argument(
"--no-profile",
help="Do not activate snakemake profile. This means you have to "
"define several environment variables. See documentation for "
"more information.",
action="store_true",
default=False
)
snake.add_argument(
"--no-cache",
help="Do not use the shared cache that allows user not to re-run "
"indexation steps. Not using cache will slow down your analysis "
"and use disk space for duplicated data.",
action="store_true",
default=False
)
snake.set_defaults(func=snakemake_run)
report_parser = subparsers.add_parser(
"report",
add_help=True
)
report_parser.add_argument(
"--snakemake-args",
help="Snakemake arguments. If you use this wrapper instead "
"of real snakemake call, then please put all your"
" arguments in simple quotes. (default: %(default)s)",
type=str,
default=""
)
report_parser.add_argument(
"--no-profile",
help="Do not activate snakemake profile. This means you have to "
"define several environment variables. See documentation for "
"more information.",
action="store_true",
default=False
)
report_parser.add_argument(
"--no-cache",
help="Do not use the shared cache that allows user not to re-run "
"indexation steps. Not using cache will slow down your analysis "
"and use disk space for duplicated data.",
action="store_true",
default=False
)
igr = subparsers.add_parser(
"flamingo",
add_help=True
)
igr.add_argument(
"--vcf_dir",
help="Path to experimental design file (default: %(default)s)",
type=str,
default=os.getcwd()
)
igr.set_defaults(func=igr_run)
report_parser.set_defaults(func=report)
return main_parser
def parse_args(args: Any) -> argparse.ArgumentParser:
"""
This function parses command line arguments
Parameters
args Any All command line arguments
Return
ArgumentParser A object designed to parse the command line
"""
return parser().parse_args(args)
def snakemake_command(opt: str = "",
use_profile: bool = True,
make_report: bool = False,
use_cache: bool = True) -> str:
"""
Build snakemake command line
"""
return [
"snakemake",
f"-s {os.getenv('SNAKEFILE')}",
f"--profile {os.getenv('PROFILE')}" if use_profile is True else "",
"--report Annotated_VCF.html" if make_report is True else "",
"--cache tr2gene" if use_cache is True else "",
opt
]
def run_cmd(*cmd_line) -> None:
"""
Run a provided command line
"""
cmd_line = " ".join(cmd_line)
print(cmd_line)
shell(cmd_line)
def snakemake_run(cmd_line_args) -> None:
"""
Call snakemake itself
"""
command = snakemake_command(
opt=cmd_line_args.snakemake_args,
use_profile=not cmd_line_args.no_profile,
make_report=False,
use_cache=not cmd_line_args.no_cache
)
run_cmd(*command)
def report(cmd_line_args) -> None:
"""
Call snakemake itself
"""
command = snakemake_command(
opt=cmd_line_args.snakemake_args,
use_profile=not cmd_line_args.no_profile,
make_report=True,
use_cache=not cmd_line_args.no_cache
)
run_cmd(*command)
def igr_run(cmd_line_args) -> None:
"""
Call this pipeline whole pipeline with default arguments
"""
snpeff = (
f"--SnpEff-db {os.getenv('SNPEFF_DB_PATH')}"
if os.getenv('SNPEFF_DB_PATH') is not None
else ""
)
dbnsfp = (
f"--dbNSFP {os.getenv('DBNSFP_PATH')}"
if os.getenv('DBNSFP_PATH') is not None
else ""
)
gwascat = (
f"--GWASCat {os.getenv('GWASCAT_PATH')}"
if os.getenv('GWASCAT_PATH') is not None
else ""
)
kgenomes = (
f"--Kgenomes {os.getenv('KGENOME_PATH')}"
if os.getenv('KGENOME_PATH') is not None
else ""
)
cosmic = (
f"--Kgenomes {os.getenv('COSMIC_PATH')}"
if os.getenv('COSMIC_PATH') is not None
else ""
)
clinvar = (
f"--Kgenomes {os.getenv('CLINVAR_PATH')}"
if os.getenv('CLINVAR_PATH') is not None
else ""
)
dbsnp = (
f"--Kgenomes {os.getenv('DBSNP_PATH')}"
if os.getenv('DBSNP_PATH') is not None
else ""
)
kaviar = (
f"--Kgenomes {os.getenv('KAVIAR_PATH')}"
if os.getenv('KAVIAR_PATH') is not None
else ""
)
config_cmd = [
"python3", os.getenv('ANNOT_LAUNCHER'), "config",
snpeff, dbnsfp, gwascat, kgenomes, cosmic, clinvar, dbsnp, kaviar,
"--recursive", "--threads 20", "--debug",
"--cold-storage /mnt/isilon /mnt/archivage",
]
run_cmd(*config_cmd)
snakemake_cmd = [
"python3",
os.getenv("ANNOT_LAUNCHER"),
"snakemake"
]
run_cmd(*snakemake_cmd)
report_cmd = [
"python3",
os.getenv("ANNOT_LAUNCHER"),
"report"
]
run_cmd(*report_cmd)
def run_cmd(*cmd_line) -> None:
"""
Run a provided command line
"""
cmd_line = " ".join(cmd_line)
if check_env():
print(cmd_line)
shell(cmd_line)
else:
print("Environment was not suitable for this pipeline to run.")
def check_env() -> bool:
"""
Verify environment variables required for this pipeline
"""
expected = [
os.getenv('SNAKEFILE'),
os.getenv('PROFILE'),
os.getenv("ANNOT_LAUNCHER"),
]
print(expected)
test_if_none = all(var is not None for var in expected)
if test_if_none is True:
test_if_exists = all(os.path.exists(path) for path in expected)
return test_if_exists
return False
if __name__ == '__main__':
# Parsing command line arguments
args = parse_args(sys.argv[1:])
makedirs("logs/prepare")
# Build logging object and behaviour
logging.basicConfig(
filename="logs/prepare/design.log", filemode="w", level=logging.DEBUG
)
try:
args.func(args)
except Exception as e:
logging.exception("%s", e)
raise