-
Notifications
You must be signed in to change notification settings - Fork 4
/
snpTable2GenomeAlignment.py
executable file
·60 lines (47 loc) · 1.73 KB
/
snpTable2GenomeAlignment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env python3
'''
Copyright (c) 2015, David Edwards, Bernie Pope, Kat Holt
All rights reserved. (see README.txt for more details)
Updated to python3 by Kelly Wyres, 2018
'''
# input = reference sequence and table of SNP alleles
# output = mfasta whole genome alignment of pseudosequences
import string, re, collections
import os, sys, subprocess
import argparse
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Seq import MutableSeq
def get_arguments():
parser = argparse.ArgumentParser(description='create whole genome pseudo aln using snp matrix and reference')
parser.add_argument("-r", "--ref", action="store", dest="ref", help="reference (fasta)", default="")
parser.add_argument("-s", "--snps", action="store", dest="snps", help="snp table (CSV)", default="")
parser.add_argument("-o", "--offset", action="store", dest="offset", help="offset (added to SNP positions in table to match reference coordinates)", default="0")
return parser.parse_args()
def main():
# Get arguments
args = get_arguments()
# read in ref seq
ref = SeqIO.read(args.ref, "fasta")
# read in SNP info
f = open(args.snps,"r")
strains = []
snps = collections.defaultdict(dict) # key 1 = strain, key2 = pos (corrected by offset), value = allele
for line in f:
fields = line.rstrip().split(",")
if len(strains) > 0:
pos = int(fields[0]) + int(args.offset)
for i in range(1,len(fields)):
snps[strains[i]][pos] = fields[i]
else:
strains = fields
# print out mutated copy of sequence for each strain
for strain in snps:
# make mutable copy of reference seq
seq = ref.seq.tomutable()
for snp in snps[strain]:
seq[snp-1] = snps[strain][snp]
print(">" + strain)
print(str(seq))
if __name__ == '__main__':
main()