-
Notifications
You must be signed in to change notification settings - Fork 1
/
enzyme.py
executable file
·54 lines (43 loc) · 1.39 KB
/
enzyme.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/python
'''
scan sequences for restiction enzyme sites
output hits as GFF annotation file
'''
from rjv.gff import *
from rjv.fasta import *
import re,sys
usage=\
'''
usage: ./enzyme.py input.fa pattern_file > output_file.gff
where pattern file is:
<label> && <forward strand regexp> && <reverse strand regexp>
use --none-- if there is no reverse strand regexp
'''
if len(sys.argv) == 1:
print usage
exit()
f = open(sys.argv[2],'rb')
reg = []
for line in f:
reg.append([x.strip() for x in line.split('&&')])
f.close()
ct=0
sys.stdout.write('##gff-version 3' + newline)
for rec in next_fasta(sys.argv[1]):
name = rec['header'].split()[0]
for expr in reg:
#search for matches on the forward strand
for match in re.finditer(expr[1], rec['seq']):
ct += 1
x = make_gff(name,'python', 'enzyme binding',
match.start()+1, match.end(),
ct, expr[0], strand='+')
write_gff3_record(x,sys.stdout)
if expr[2] == '--none--': continue
#search for matches on the reverse strand
for match in re.finditer(expr[2], rec['seq']):
ct += 1
x = make_gff(name,'python', 'enzyme binding',
match.start()+1, match.end(),
ct, expr[0], strand='-')
write_gff3_record(x,sys.stdout)