Skip to content

Commit

Permalink
✈Implement new Score&Select (Single only)
Browse files Browse the repository at this point in the history
  • Loading branch information
NatureGeorge committed Oct 11, 2020
1 parent e77275a commit ba5f755
Show file tree
Hide file tree
Showing 9 changed files with 627 additions and 56 deletions.
269 changes: 269 additions & 0 deletions examples/Score&Select.ipynb

Large diffs are not rendered by default.

78 changes: 78 additions & 0 deletions pdb_profiling/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# @Created Date: 2020-10-10 04:06:02 pm
# @Filename: data.py
# @Email: [email protected]
# @Author: ZeFeng Zhu
# @Last Modified: 2020-10-10 04:06:06 pm
# @Copyright (c) 2020 MinghuiGroup, Soochow University

blosum62 = {
('W', 'F'): 1, ('L', 'R'): -2, ('S', 'P'): -1, ('V', 'T'): 0,
('Q', 'Q'): 5, ('N', 'A'): -2, ('Z', 'Y'): -2, ('W', 'R'): -3,
('Q', 'A'): -1, ('S', 'D'): 0, ('H', 'H'): 8, ('S', 'H'): -1,
('H', 'D'): -1, ('L', 'N'): -3, ('W', 'A'): -3, ('Y', 'M'): -1,
('G', 'R'): -2, ('Y', 'I'): -1, ('Y', 'E'): -2, ('B', 'Y'): -3,
('Y', 'A'): -2, ('V', 'D'): -3, ('B', 'S'): 0, ('Y', 'Y'): 7,
('G', 'N'): 0, ('E', 'C'): -4, ('Y', 'Q'): -1, ('Z', 'Z'): 4,
('V', 'A'): 0, ('C', 'C'): 9, ('M', 'R'): -1, ('V', 'E'): -2,
('T', 'N'): 0, ('P', 'P'): 7, ('V', 'I'): 3, ('V', 'S'): -2,
('Z', 'P'): -1, ('V', 'M'): 1, ('T', 'F'): -2, ('V', 'Q'): -2,
('K', 'K'): 5, ('P', 'D'): -1, ('I', 'H'): -3, ('I', 'D'): -3,
('T', 'R'): -1, ('P', 'L'): -3, ('K', 'G'): -2, ('M', 'N'): -2,
('P', 'H'): -2, ('F', 'Q'): -3, ('Z', 'G'): -2, ('X', 'L'): -1,
('T', 'M'): -1, ('Z', 'C'): -3, ('X', 'H'): -1, ('D', 'R'): -2,
('B', 'W'): -4, ('X', 'D'): -1, ('Z', 'K'): 1, ('F', 'A'): -2,
('Z', 'W'): -3, ('F', 'E'): -3, ('D', 'N'): 1, ('B', 'K'): 0,
('X', 'X'): -1, ('F', 'I'): 0, ('B', 'G'): -1, ('X', 'T'): 0,
('F', 'M'): 0, ('B', 'C'): -3, ('Z', 'I'): -3, ('Z', 'V'): -2,
('S', 'S'): 4, ('L', 'Q'): -2, ('W', 'E'): -3, ('Q', 'R'): 1,
('N', 'N'): 6, ('W', 'M'): -1, ('Q', 'C'): -3, ('W', 'I'): -3,
('S', 'C'): -1, ('L', 'A'): -1, ('S', 'G'): 0, ('L', 'E'): -3,
('W', 'Q'): -2, ('H', 'G'): -2, ('S', 'K'): 0, ('Q', 'N'): 0,
('N', 'R'): 0, ('H', 'C'): -3, ('Y', 'N'): -2, ('G', 'Q'): -2,
('Y', 'F'): 3, ('C', 'A'): 0, ('V', 'L'): 1, ('G', 'E'): -2,
('G', 'A'): 0, ('K', 'R'): 2, ('E', 'D'): 2, ('Y', 'R'): -2,
('M', 'Q'): 0, ('T', 'I'): -1, ('C', 'D'): -3, ('V', 'F'): -1,
('T', 'A'): 0, ('T', 'P'): -1, ('B', 'P'): -2, ('T', 'E'): -1,
('V', 'N'): -3, ('P', 'G'): -2, ('M', 'A'): -1, ('K', 'H'): -1,
('V', 'R'): -3, ('P', 'C'): -3, ('M', 'E'): -2, ('K', 'L'): -2,
('V', 'V'): 4, ('M', 'I'): 1, ('T', 'Q'): -1, ('I', 'G'): -4,
('P', 'K'): -1, ('M', 'M'): 5, ('K', 'D'): -1, ('I', 'C'): -1,
('Z', 'D'): 1, ('F', 'R'): -3, ('X', 'K'): -1, ('Q', 'D'): 0,
('X', 'G'): -1, ('Z', 'L'): -3, ('X', 'C'): -2, ('Z', 'H'): 0,
('B', 'L'): -4, ('B', 'H'): 0, ('F', 'F'): 6, ('X', 'W'): -2,
('B', 'D'): 4, ('D', 'A'): -2, ('S', 'L'): -2, ('X', 'S'): 0,
('F', 'N'): -3, ('S', 'R'): -1, ('W', 'D'): -4, ('V', 'Y'): -1,
('W', 'L'): -2, ('H', 'R'): 0, ('W', 'H'): -2, ('H', 'N'): 1,
('W', 'T'): -2, ('T', 'T'): 5, ('S', 'F'): -2, ('W', 'P'): -4,
('L', 'D'): -4, ('B', 'I'): -3, ('L', 'H'): -3, ('S', 'N'): 1,
('B', 'T'): -1, ('L', 'L'): 4, ('Y', 'K'): -2, ('E', 'Q'): 2,
('Y', 'G'): -3, ('Z', 'S'): 0, ('Y', 'C'): -2, ('G', 'D'): -1,
('B', 'V'): -3, ('E', 'A'): -1, ('Y', 'W'): 2, ('E', 'E'): 5,
('Y', 'S'): -2, ('C', 'N'): -3, ('V', 'C'): -1, ('T', 'H'): -2,
('P', 'R'): -2, ('V', 'G'): -3, ('T', 'L'): -1, ('V', 'K'): -2,
('K', 'Q'): 1, ('R', 'A'): -1, ('I', 'R'): -3, ('T', 'D'): -1,
('P', 'F'): -4, ('I', 'N'): -3, ('K', 'I'): -3, ('M', 'D'): -3,
('V', 'W'): -3, ('W', 'W'): 11, ('M', 'H'): -2, ('P', 'N'): -2,
('K', 'A'): -1, ('M', 'L'): 2, ('K', 'E'): 1, ('Z', 'E'): 4,
('X', 'N'): -1, ('Z', 'A'): -1, ('Z', 'M'): -1, ('X', 'F'): -1,
('K', 'C'): -3, ('B', 'Q'): 0, ('X', 'B'): -1, ('B', 'M'): -3,
('F', 'C'): -2, ('Z', 'Q'): 3, ('X', 'Z'): -1, ('F', 'G'): -3,
('B', 'E'): 1, ('X', 'V'): -1, ('F', 'K'): -3, ('B', 'A'): -2,
('X', 'R'): -1, ('D', 'D'): 6, ('W', 'G'): -2, ('Z', 'F'): -3,
('S', 'Q'): 0, ('W', 'C'): -2, ('W', 'K'): -3, ('H', 'Q'): 0,
('L', 'C'): -1, ('W', 'N'): -4, ('S', 'A'): 1, ('L', 'G'): -4,
('W', 'S'): -3, ('S', 'E'): 0, ('H', 'E'): 0, ('S', 'I'): -2,
('H', 'A'): -2, ('S', 'M'): -1, ('Y', 'L'): -1, ('Y', 'H'): 2,
('Y', 'D'): -3, ('E', 'R'): 0, ('X', 'P'): -2, ('G', 'G'): 6,
('G', 'C'): -3, ('E', 'N'): 0, ('Y', 'T'): -2, ('Y', 'P'): -3,
('T', 'K'): -1, ('A', 'A'): 4, ('P', 'Q'): -1, ('T', 'C'): -1,
('V', 'H'): -3, ('T', 'G'): -2, ('I', 'Q'): -3, ('Z', 'T'): -1,
('C', 'R'): -3, ('V', 'P'): -2, ('P', 'E'): -1, ('M', 'C'): -1,
('K', 'N'): 0, ('I', 'I'): 4, ('P', 'A'): -1, ('M', 'G'): -3,
('T', 'S'): 1, ('I', 'E'): -3, ('P', 'M'): -2, ('M', 'K'): -1,
('I', 'A'): -1, ('P', 'I'): -3, ('R', 'R'): 5, ('X', 'M'): -1,
('L', 'I'): 2, ('X', 'I'): -1, ('Z', 'B'): 1, ('X', 'E'): -1,
('Z', 'N'): 0, ('X', 'A'): 0, ('B', 'R'): -1, ('B', 'N'): 3,
('F', 'D'): -3, ('X', 'Y'): -1, ('Z', 'R'): 0, ('F', 'H'): -1,
('B', 'F'): -3, ('F', 'L'): 0, ('X', 'Q'): -1, ('B', 'B'): 4
}
2 changes: 1 addition & 1 deletion pdb_profiling/fetcher/webfetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ async def http_download(cls, method: str, info: Dict, path: str):
cls.logger.debug(f"File has been saved in: {path}")
return path
elif resp.status in (403, 404, 405):
cls.logger.warning(f"403/404/405 for: {info}")
cls.logger.debug(f"403/404/405 for: {info}")
return None
else:
mes = "code={resp.status}, message={resp.reason}, headers={resp.headers}".format(resp=resp)
Expand Down
13 changes: 10 additions & 3 deletions pdb_profiling/pipelines/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ class AHP(object):
INI_LIST = [1, 1, 2, 3, 4, 1, 2, 3, 4, 2, 3, 4, 2, 3, 2]
factor = -1

def __init__(self, weight=None):
self._weight = weight

@property
def weight(self) -> List:
com_list = list(combinations(self.ELE_LIST, 2))
Expand All @@ -93,9 +96,13 @@ def weight(self) -> List:
return select_vector * self.factor

def raw_score(self, array):
weight = self.weight/self.weight[0]
score = array[0]*weight[0]
return score + np.dot(array[1:], -weight[1:])
if self._weight is not None:
weight = self._weight
else:
weight = self.weight
weight = weight/weight[0]
weight[1:] = -weight[1:]
return np.dot(array, weight)

def score(self, array):
'''
Expand Down
3 changes: 2 additions & 1 deletion pdb_profiling/processors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
PDBInterface,
SIFTS,
Compounds,
PDBs
PDBs,
SIFTSs
)
from pdb_profiling.processors.pdbe.api import PDBeModelServer, PDBArchive,PDBVersioned
from pdb_profiling.processors.uniprot.api import UniProtFASTA
Expand Down
12 changes: 9 additions & 3 deletions pdb_profiling/processors/pdbe/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,14 +190,20 @@ async def process(cls, path: Union[str, Path, Unfuture]):
res = Dict2Tabular.pyexcel_io(traverseSuffixes(suffix, data))
if res is not None:
if isinstance(res, Generator):
one = False
for r in res:
await pipe_out(df=r, path=new_path, format='tsv', mode='a')
if r is not None:
await pipe_out(df=r, path=new_path, format='tsv', mode='a')
one = True
if not one:
cls.logger.debug(f"Without Expected Data ({suffix}): {data}")
return None
else:
await pipe_out(df=res, path=new_path, format='tsv', mode='w')
cls.logger.debug(f'Decoded file in {new_path}')
return new_path
else:
cls.logger.warning(f"Without Expected Data ({suffix}): {data}")
cls.logger.debug(f"Without Expected Data ({suffix}): {data}")
return None


Expand Down Expand Up @@ -309,7 +315,7 @@ def add_tage_to_range(df: pd.DataFrame, tage_name: str):
dfrm['unp_gaps0'] = dfrm.unp_gaps.apply(lambda x: x.count(0))
add_tage_to_range(dfrm, tage_name='sifts_range_tag')
dfrm['repeated'] = dfrm.apply(
lambda x: x['diff-'] > 0 and x['sifts_range_tag'] != 'Insertion (Specail Case)', axis=1)
lambda x: x['diff-'] > 0 and x['sifts_range_tag'] != 'Insertion_Undivided', axis=1)
dfrm['repeated'] = dfrm.apply(
lambda x: True if any(i < 0 for i in x['unp_gaps']) else x['repeated'], axis=1)
dfrm['reversed'] = dfrm.pdb_gaps.apply(lambda x: any(i < 0 for i in x))
Expand Down
20 changes: 1 addition & 19 deletions pdb_profiling/processors/pdbe/neo4j_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,32 +26,14 @@
from pdb_profiling.utils import (pipe_out, sort_sub_cols, slice_series, to_interval,
lyst22intervel, SEQ_DICT, standardAA, standardNu, range_len,
interval2set, lyst2range, subtract_range,
add_range, overlap_range)
add_range, overlap_range, outside_range_len)
from pdb_profiling.log import Abclog
from pdb_profiling.fetcher.dbfetch import Neo4j
from pdb_profiling.processors.pdbe.sqlite_api import Sqlite_API
import logging
# logging.basicConfig(level=logging.INFO)


def outside_range_len(pdb_range: Union[str, Iterable], seqres_len: int, omit: int = 5) -> int:
if isinstance(pdb_range, str):
lyst = json.loads(pdb_range)
else:
lyst = pdb_range
out_head = lyst[0][0]-1
out_tail = seqres_len - lyst[-1][-1]
if out_head <= omit:
out_head = 0
else:
out_head -= omit
if out_tail <= omit:
out_tail = 0
else:
out_tail -= omit
return out_head + out_tail


def lyst2dict(lyst: List) -> Dict:
try:
res = dict(lyst)
Expand Down
Loading

0 comments on commit ba5f755

Please sign in to comment.