forked from vappiah/bacterial-genomics-tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_annot_stats.py
executable file
·47 lines (30 loc) · 1.23 KB
/
get_annot_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import sys
from Bio import SeqIO
import os
features=['CDS','gene','tRNA', 'tmRNA', 'rRNA',]
directory=sys.argv[1]
prefix=sys.argv[2]
def get_stats(directory,prefix):
#bname=os.path.split(directory)[-1]
bname=prefix
stats_dict={}
genebank='%s/%s.gbk'%(directory,bname)
data=[i for i in SeqIO.parse(genebank,"gb")][0]
featurecount=lambda x:len([i for i in data.features if i.type==x])
counts=[featurecount(feature) for feature in features]
#print(counts)
cds=[i for i in data.features if i.type=='CDS']
#Getting information on Insertion Sequences
insertion_sequences=[i.qualifiers['product'][0] for i in cds if 'IS' in ''.join(i.qualifiers['product'])[0:2]]
insertion_sequence_names=list(set(insertion_sequences))
#print(counts)
for name in insertion_sequence_names:
counts.append(insertion_sequences.count(name))
features.append(name)
counts.append(len(insertion_sequences))
features.append('Insertion_Sequences')
#print(list(set(insertion_sequences)))
print('Counting Annotated Features.............................')
for feature,count in zip(features,counts):
print("%s:%d"%(feature,count))
get_stats(directory,prefix)