-
Notifications
You must be signed in to change notification settings - Fork 4
/
update_gingr_vcf.py
executable file
·113 lines (99 loc) · 3.14 KB
/
update_gingr_vcf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/bin/env python
'''
Copyright (c) 2015, David Edwards, Bernie Pope, Kat Holt
All rights reserved. (see README.txt for more details)
'''
#
# convert VCF generated by parseSNPTable from v1 to v1+
#
# Author(s) - David Edwards
#
# Example command:
'''
module load python-gcc/2.7.5
python convert_gingr_vcf.py -v <my_vcf>.vcf -r <isolate_a>
'''
#
# Created: 20160129
# Changes:
# <date> - <change>
#
import os, sys, glob
import subprocess
import string
import re
#import random
import operator
from optparse import OptionParser
#import resource
def main():
usage = "usage: %prog [options]"
parser = OptionParser(usage=usage)
parser.add_option("-v", "--vcf", action="store", dest="vcf", help="Input VCF to be converted. Required", default="")
parser.add_option("-r", "--reference", action="store", dest="reference", help="Name of reference isolate. Required", default="")
parser.add_option("-d", "--directory", action="store", dest="directory", help="Output directory. Default: working directory", default="")
return parser.parse_args()
if __name__ == "__main__":
(options, args) = main()
def splitPath(path):
(prefix, base) = os.path.split(path)
(name, ext) = os.path.splitext(base)
return (prefix, name, ext)
def read_input(input_file_name):
input_file_handle = open(input_file_name, 'rU')
input_file = input_file_handle.readlines()
input_file_handle.close()
return input_file
def get_output_directory(output_directory):
if output_directory != '':
if output_directory[:-1] != '/':
output_directory += '/'
if output_directory != '':
if not os.path.isdir(output_directory):
os.mkdir(output_directory)
return output_directory
def convert_vcf(vcf, reference):
converted_vcf = ''
for line in vcf:
if line.startswith('#'):
if not line.startswith('#CHROM'):
converted_vcf += line
else:
bits = line.split('\t')
new_line = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t'
for i in range(len(bits)):
if bits[i] == reference:
index = i
new_line += bits[index] + '\t'
for i in range(10,len(bits)):
if i != index:
new_line += bits[i] + '\t'
converted_vcf += new_line.rstrip() + '\n'
else:
bits = line.split('\t')
new_line = '1\t'
for i in range(1,5):
new_line += bits[i] + '\t'
new_line += '40\t' + bits[6] + '\tNA\tGT\t' + bits[index] + '\t'
for i in range(10,len(bits)):
if i != index:
new_line += bits[i] + '\t'
converted_vcf += new_line.rstrip() + '\n'
return converted_vcf
def output_to_file(output_file_name, output):
output_file_handle = open(output_file_name, "w")
output_file_handle.write(output)
output_file_handle.close()
return
### MAIN PROCESS
# set up variables
if options.vcf == '':
print '\nNo input VCF file provided (-v)'
sys.exit()
if options.reference == '':
print '\nNo reference provided (-r)'
sys.exit()
else:
output_file_name = get_output_directory(options.directory) + splitPath(options.vcf)[1] + '_fix.vcf'
output_to_file(output_file_name, convert_vcf(read_input(options.vcf), options.reference))
# print ('Memory usage: %s (kb)' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)