Skip to content

Commit

Permalink
updated snpTable2GenomeAlignment.py to python 3 version
Browse files Browse the repository at this point in the history
  • Loading branch information
kelwyres committed Feb 18, 2021
1 parent 3c5d23b commit 90707ac
Showing 1 changed file with 22 additions and 19 deletions.
41 changes: 22 additions & 19 deletions snpTable2GenomeAlignment.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,47 @@
#!/bin/env python
#!/usr/bin/env python3

'''
Copyright (c) 2015, David Edwards, Bernie Pope, Kat Holt
All rights reserved. (see README.txt for more details)
Updated to python3 by Kelly Wyres, 2018
'''



# input = reference sequence and table of SNP alleles
# output = mfasta whole genome alignment of pseudosequences
import string, re, collections
import os, sys, subprocess
from optparse import OptionParser
import argparse
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Seq import MutableSeq

def main():

usage = "usage: %prog [options]"
parser = OptionParser(usage=usage)
def get_arguments():
parser = argparse.ArgumentParser(description='create whole genome pseudo aln using snp matrix and reference')
parser.add_argument("-r", "--ref", action="store", dest="ref", help="reference (fasta)", default="")
parser.add_argument("-s", "--snps", action="store", dest="snps", help="snp table (CSV)", default="")
parser.add_argument("-o", "--offset", action="store", dest="offset", help="offset (added to SNP positions in table to match reference coordinates)", default="0")

parser.add_option("-r", "--ref", action="store", dest="ref", help="reference (fasta)", default="")
parser.add_option("-s", "--snps", action="store", dest="snps", help="snp table (CSV)", default="")
parser.add_option("-o", "--offset", action="store", dest="offset", help="offset (added to SNP positions in table to match reference coordinates)", default="0")
# parser.add_option("-f", "--format", action="store", dest="format", help="format (fasta)", default="fasta")

return parser.parse_args()

if __name__ == "__main__":
def main():

(options, args) = main()
# Get arguments
args = get_arguments()

# read in ref seq
# ref = SeqIO.read(options.ref, options.format)
ref = SeqIO.read(options.ref, "fasta")
ref = SeqIO.read(args.ref, "fasta")

# read in SNP info
f = file(options.snps,"r")
f = open(args.snps,"r")
strains = []
snps = collections.defaultdict(dict) # key 1 = strain, key2 = pos (corrected by offset), value = allele
for line in f:
fields = line.rstrip().split(",")
if len(strains) > 0:
pos = int(fields[0]) + int(options.offset)
pos = int(fields[0]) + int(args.offset)
for i in range(1,len(fields)):
snps[strains[i]][pos] = fields[i]
else:
Expand All @@ -52,6 +53,8 @@ def main():
seq = ref.seq.tomutable()
for snp in snps[strain]:
seq[snp-1] = snps[strain][snp]
print ">" + strain
print str(seq)
print(">" + strain)
print(str(seq))

if __name__ == '__main__':
main()

0 comments on commit 90707ac

Please sign in to comment.