-
Notifications
You must be signed in to change notification settings - Fork 0
/
gb2peg.py
50 lines (42 loc) · 1.54 KB
/
gb2peg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import sys
from Bio import SeqIO
import Bio
sequence = ""
gene_loci = list()
c_start = 0
for record in SeqIO.parse(sys.argv[1], 'genbank'):
if c_start == 0:
sequence = str(record.seq)
else:
c_start = len(sequence)
sequence = "N" + str(record.sequence)
for feature in record.features:
if feature.type == 'CDS':
if isinstance(feature.location , Bio.SeqFeature.FeatureLocation):
#print(feature)
#print(type(feature.location))
strand = feature.location.strand
start = int(feature.location.start)
end = int(feature.location.end)
#print("!# ",start,end, strand, end-start, (end-start)%3)
if start > end:
print(start,end, strand)
t = start
start = end
end = t
gene_loci.append( (start,end,strand) )
# loci = (start,end,strand)
# print(loci, (loci[1]-loci[0])%3 )
# gene_seq = sequence[loci[0]:loci[1]]
# print( len(gene_seq)%3 )
# print( str(Seq(gene_seq).translate(table=4)) )
# print(feature.qualifiers['translation'])
# #print(feature.translate(seq, cds=False))
sequence = sequence.upper()
gene_loci = sorted(gene_loci)
off = open(sys.argv[2],'w')
off.write(sequence+"\n")
for locus in gene_loci:
off.write(str(locus[0])+" "+str(locus[1])+" "+str(locus[2])+"\n")
off.flush()
off.close()