Skip to content

Commit

Permalink
🌌towards v0.3.2 (#15)
Browse files Browse the repository at this point in the history
* 🦹‍♂️deal with old edge case

* 🧜‍♂️add 1d-coordinates

* 👨‍🌾impl dataclass

* 👮‍♂️fix unchange

* 🏃‍♂️add funcs for Identifier

* 👨‍🎨add pipe_scheduled_ranged_map_res_df

* ⚡change pytest-timeout

* 🦼add demo files

* ☔fix test path

* 🌬fix command.py output path

* 🦽add time for test_single_select

* add auto_assign

* add time
  • Loading branch information
NatureGeorge authored Apr 25, 2021
1 parent 5c00ef6 commit 6df4d11
Show file tree
Hide file tree
Showing 18 changed files with 728 additions and 313 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Profiling Protein Structures from Protein Data Bank and integrate various resour

## Install

> Notice: require Python Environment >= 3.6, Platform Independent
> Notice: require Python Environment >= 3.7, Platform Independent
Install by `pip` command.

Expand Down
2 changes: 1 addition & 1 deletion pdb_profiling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# @Author: ZeFeng Zhu
# @Last Modified: 2020-05-13 08:54:09 pm
# @Copyright (c) 2020 MinghuiGroup, Soochow University
__version__ = '0.2.12'
__version__ = '0.3.2'


def default_config(folder='./'):
Expand Down
10 changes: 5 additions & 5 deletions pdb_profiling/commands/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def do_nothing(dfrm):
@click.option('--column', type=str, default=None)
@click.option('--sep', type=str, default='\t')
@click.option('--chunksize', type=int, help="the chunksize parameter", default=50)
@click.option('--auto_assign/--no-auto_assign', default=True, is_flag=True)
@click.option('--auto_assign/--no-auto_assign', default=False, is_flag=True)
@click.option('--sleep/--no-sleep', default=True, is_flag=True)
@click.pass_context
def id_mapping(ctx, input, column, sep, chunksize, auto_assign, sleep):
Expand Down Expand Up @@ -283,7 +283,7 @@ def residue_mapping(ctx, input, chunksize, output, sleep):
na_values=['NULL', 'null', ''], chunksize=chunksize)
sqlite_api = ctx.obj['custom_db']
if output is not None:
output = Path(output)
output = ctx.obj['folder']/output
done = 0
for df in dfs:
for col in ('new_pdb_range_raw', 'new_unp_range_raw', 'conflict_pdb_index'):
Expand All @@ -294,7 +294,7 @@ def residue_mapping(ctx, input, chunksize, output, sleep):
row.new_unp_range_raw,
row.new_pdb_range_raw,
conflict_pdb_index=row.conflict_pdb_index,
struct_asym_id=row.struct_asym_id) for _, row in df.iterrows()]
struct_asym_id=row.struct_asym_id) for row in df.to_records()]
with Progress(*progress_bar_args) as p:
res = ob.run(p.track).result()
res_mapping_df = concat(res, sort=False, ignore_index=True)
Expand Down Expand Up @@ -442,7 +442,7 @@ def export_residue_remapping(ctx, with_id, sele, output):
if df.shape[0] == 0:
continue
df.rename(columns={'edUniProt': 'UniProt'}).to_csv(
output, index=False, mode='a+', sep='\t', header=not output_path.exists())
output_path, index=False, mode='a+', sep='\t', header=not output_path.exists())
console.log(f'result saved in {output_path}')


Expand Down Expand Up @@ -524,7 +524,7 @@ def export_smr_residue_remapping(ctx, identity_cutoff, length_cutoff, with_id, s
if df.shape[0] == 0:
continue
df.rename(columns={'edUniProt': 'UniProt'}).to_csv(
output, index=False, mode='a+', sep='\t',header=not output_path.exists())
output_path, index=False, mode='a+', sep='\t', header=not output_path.exists())
console.log(f'result saved in {output_path}')
#full_df = read_csv(output_path, sep='\t', keep_default_na=False)
#best_indexes = full_df.groupby(['UniProt','Pos', 'Alt']).select_rank.idxmin()
Expand Down
2 changes: 1 addition & 1 deletion pdb_profiling/cython/cyrange.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ cpdef bint isin_range(object input_range, int value):
return False


cpdef int convert_index(object lrange, object rrange, int site):
cdef int convert_index(object lrange, object rrange, int site) except *:
# convert from rrange to lrange
cdef int lstart, rstart, lend, rend
for (lstart, lend), (rstart, rend) in zip(lrange, rrange):
Expand Down
5 changes: 3 additions & 2 deletions pdb_profiling/processors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@
from pdb_profiling.processors.pdbe.record import (
Base,
PDB,
PDBAssemble,
PDBAssembly,
PDBInterface,
SIFTS,
Compounds,
PDBs,
SIFTSs
SIFTSs,
RCSB1DCoordinates,
)
from pdb_profiling.processors.pdbe.api import PDBeModelServer, PDBArchive, PDBVersioned
from pdb_profiling.processors.uniprot.api import UniProtINFO, UniProtAPI
Expand Down
21 changes: 0 additions & 21 deletions pdb_profiling/processors/pdbe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,6 @@
from re import compile as re_compile
from pdb_profiling.processors.database import SqliteDB

common_pat = r'^(?=.*[A-Za-z])(?=.*\d)[A-Za-z\d]'


pats = dict(pdb_id=re_compile(common_pat+r'{4}$'),
pdb_entity_id=re_compile(common_pat+r'{4}_[0-9]+$'),
UniProt=re_compile(common_pat+r'{6,}[\-]*[0-9]*$'),
pdb_complex_id=re_compile(r'PDB-CPX-[0-9]+'))


def default_id_tag(identifier: str, default: str = '', raise_error: bool = False):
try:
for pat_name, pat in pats.items():
if bool(pat.fullmatch(identifier)):
return pat_name
except Exception:
raise ValueError(f"Invalid Identifier: {identifier} !")
if raise_error:
raise ValueError(f'Unexpected Identifiers: {identifier}')
else:
return default


class PDBeDB(SqliteDB):

Expand Down
17 changes: 13 additions & 4 deletions pdb_profiling/processors/pdbe/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from unsync import unsync, Unfuture
from random import choice
from hashlib import sha1
from pdb_profiling.processors.pdbe import default_id_tag
from pdb_profiling.processors.recordbase import IdentifierBase
from pdb_profiling.utils import related_dataframe, flatten_dict, pipe_out, dumpsParams
from pdb_profiling.log import Abclog
from pdb_profiling.fetcher.webfetch import UnsyncFetch
Expand Down Expand Up @@ -43,6 +43,16 @@
FUNCS = []


def mask_ib(i, default='', raise_error=False):
if i.source == 'PDB' and i.level == 'entry':
return 'pdb_id'
elif i.source == 'UniProt':
return 'UniProt'
elif raise_error:
raise AssertionError('Unexpected Case!')
else:
return default

def str_number_converter(x):
try:
return int(x)
Expand Down Expand Up @@ -207,7 +217,7 @@ def yieldCommon(data: Dict) -> Generator:
for key in value:
if isinstance(value[key], (Dict, List)):
value[key] = json.dumps(value[key]).decode('utf-8')
yield values, (default_id_tag(pdb, '_code_'),), (pdb,)
yield values, (mask_ib(IdentifierBase(pdb), '_code_'),), (pdb,)

@staticmethod
@dispatch_on_set('api/pdb/entry/polymer_coverage/')
Expand Down Expand Up @@ -348,8 +358,7 @@ def yieldSIFTSAnnotation(data: Dict) -> Generator:
continue
chain[key] = json.dumps(value).decode(
'utf-8') if isinstance(value, Dict) else value
chain[default_id_tag(
top_root, raise_error=True)] = top_root
chain[mask_ib(IdentifierBase(top_root), raise_error=True)] = top_root
chain[sec_root] = annotation
yield chains, None
elif len(data[top_root].keys()) == 1 and 'PDB' in data[top_root].keys():
Expand Down
Loading

0 comments on commit 6df4d11

Please sign in to comment.