diff --git a/snpTable2GenomeAlignment.py b/snpTable2GenomeAlignment.py index 88d1ece..b9aaaec 100755 --- a/snpTable2GenomeAlignment.py +++ b/snpTable2GenomeAlignment.py @@ -1,46 +1,47 @@ -#!/bin/env python +#!/usr/bin/env python3 ''' Copyright (c) 2015, David Edwards, Bernie Pope, Kat Holt All rights reserved. (see README.txt for more details) + +Updated to python3 by Kelly Wyres, 2018 ''' + + + # input = reference sequence and table of SNP alleles # output = mfasta whole genome alignment of pseudosequences import string, re, collections import os, sys, subprocess -from optparse import OptionParser +import argparse from Bio import SeqIO from Bio.SeqRecord import SeqRecord from Bio.Seq import MutableSeq -def main(): - - usage = "usage: %prog [options]" - parser = OptionParser(usage=usage) +def get_arguments(): + parser = argparse.ArgumentParser(description='create whole genome pseudo aln using snp matrix and reference') + parser.add_argument("-r", "--ref", action="store", dest="ref", help="reference (fasta)", default="") + parser.add_argument("-s", "--snps", action="store", dest="snps", help="snp table (CSV)", default="") + parser.add_argument("-o", "--offset", action="store", dest="offset", help="offset (added to SNP positions in table to match reference coordinates)", default="0") - parser.add_option("-r", "--ref", action="store", dest="ref", help="reference (fasta)", default="") - parser.add_option("-s", "--snps", action="store", dest="snps", help="snp table (CSV)", default="") - parser.add_option("-o", "--offset", action="store", dest="offset", help="offset (added to SNP positions in table to match reference coordinates)", default="0") -# parser.add_option("-f", "--format", action="store", dest="format", help="format (fasta)", default="fasta") - return parser.parse_args() -if __name__ == "__main__": +def main(): - (options, args) = main() + # Get arguments + args = get_arguments() # read in ref seq -# ref = SeqIO.read(options.ref, options.format) - ref = SeqIO.read(options.ref, "fasta") + ref = SeqIO.read(args.ref, "fasta") # read in SNP info - f = file(options.snps,"r") + f = open(args.snps,"r") strains = [] snps = collections.defaultdict(dict) # key 1 = strain, key2 = pos (corrected by offset), value = allele for line in f: fields = line.rstrip().split(",") if len(strains) > 0: - pos = int(fields[0]) + int(options.offset) + pos = int(fields[0]) + int(args.offset) for i in range(1,len(fields)): snps[strains[i]][pos] = fields[i] else: @@ -52,6 +53,8 @@ def main(): seq = ref.seq.tomutable() for snp in snps[strain]: seq[snp-1] = snps[strain][snp] - print ">" + strain - print str(seq) + print(">" + strain) + print(str(seq)) +if __name__ == '__main__': + main() \ No newline at end of file