-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathCASPERQuick.py
163 lines (149 loc) · 5.62 KB
/
CASPERQuick.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
"""This file is a rapid way to investigate CASPER_Seq_Finder generated files for target locations. Run this python file
AFTER you have created your setup file: Cquicksetup.txt."""
import os
from Algorithms import SeqTranslate
from bioservices import KEGG
import requests
from bs4 import BeautifulSoup
class CasperQuick:
def __init__(self,casper_seq_file, output_file_path, ofa):
self.csffile = casper_seq_file
self.ST = SeqTranslate()
self.allTargets = {}
self.location = tuple()
self.output = output_file_path
self.off_target_all = ofa
def loadGenesandTargets(self, rk):
region_keggs = rk
for region_kegg in region_keggs:
self.allTargets[str(region_kegg)] = list()
if type(region_kegg) == tuple:
self.location = region_kegg
else:
k = Kegg()
self.location = k.gene_locator(region_kegg)
myfy = open(self.csffile)
while True:
line = myfy.readline()
if line == '':
break
if line.find('CHROMOSOME') != -1:
s = line.find("#")
if line[s+1:-1] == str(self.location[0]): # checks to see if it is on the right chromosome
curpos = int()
while curpos < int(self.location[1]):
line = myfy.readline()
curpos = self.ST.decompress64(line.split(',')[0])
while curpos < int(self.location[2]):
line = self.ST.decompress_csf_tuple(myfy.readline()[:-1])
curpos = line[0]
self.allTargets[str(region_kegg)].append(line)
break
myfy.close()
self.printoutresultstofile()
def printoutresultstofile(self):
out = self.output + "quickresults.txt"
f = open(out, 'w')
for item in self.allTargets.keys():
f.write(item)
f.write('\n')
for target in self.allTargets[item]:
insert = str(target[0]) + "," + str(target[1]) + "," + str(target[2]) + '\n'
f.write(insert)
f.close()
class Kegg:
k = KEGG()
location = "http://www.genome.jp/dbget-bin/www_bget?"
def gene_locator(self, gene_id):
res = self.k.get(gene_id)
d = self.k.parse(res)
newstr = d['POSITION']
cstop = newstr.find(':')
if cstop == -1:
chromosome = 1
else:
chrom = newstr[0:cstop]
chromosome = self.translate_chromosome(chrom)
sense = True
if newstr.find('complement') != -1: # it is on the opposite strand of DNA
sense = False
cstop = newstr.find('(')
if newstr.find('join') != -1:
cstop = newstr.find('(')
srt = cstop + 1
bothpos = newstr[srt:len(newstr)-1].split(",")
totpos = []
for i in range(0, len(bothpos)):
spc = bothpos[i].find('..')
spos = bothpos[i][0:spc]
epos = bothpos[i][spc+2:len(bothpos[i])]
totpos.append((spos, epos))
startloc = totpos[0][0]
endloc = totpos[len(bothpos)-1][1]
else:
srt = cstop + 1
spc = newstr.find('..')
startloc = newstr[srt:spc]
if not sense:
endloc = newstr[spc+2:len(newstr)-1]
else:
endloc = newstr[spc+2:len(newstr)]
totloc = (chromosome, int(startloc), int(endloc))
return totloc
def translate_chromosome(self, chr):
numbers = ('1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16')
letters = ('A','B','C','D','E','F','G','H')
roman = ('I','II','III','IV','V','VI','VII','VIII','IX','X','XI','XII','XIII', 'XIV',
'XV', 'XVI')
types =[numbers,letters,roman]
for list in types:
if chr in list:
ind = list.index(chr)
return numbers[ind]
return 1
def revcom(self, sequence):
revseq = ""
change = {'A':'T',
'T':'A',
'G':'C',
'C':'G'}
for nt in sequence:
rnt = change[nt]
revseq = rnt + revseq
return revseq
def added_nts(self, seqstart, seqend, vector, orgcode, chromosome):
url = self.location + "FROM=" + seqstart + "&TO=" + seqend + "&VECTOR="\
+ vector + "&ORG=" + orgcode + "&CHR=" + chromosome
source_code = requests.get(url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text, "html.parser")
exons = []
x = soup.find('pre')
for region in soup.findAll('font'):
seq = str(region)
st = seq.find('>') + 1
z = seq.find('/font') - 2
exons.append(seq[st:z])
# get the sequence:
sx = str(x)
start = sx.find("\n", 10) + 1
end = sx.find("/pre") - 2
unfontseq = sx[start:end]
trueseq = ""
ingene = True
i = 0
for nt in unfontseq:
if nt == "<":
ingene = False
if nt == ">":
ingene = True
elif ingene:
trueseq += nt
i += 1
exon_position_tuples = []
for exon in exons:
spos = trueseq.find(exon)
epos = spos + len(exon)
pos = (spos, epos)
exon_position_tuples.append(pos)
return exon_position_tuples