Skip to content

Commit

Permalink
🚦v0.2.7a1
Browse files Browse the repository at this point in the history
  • Loading branch information
NatureGeorge committed Feb 24, 2021
1 parent 34c6c1b commit 7f6e095
Show file tree
Hide file tree
Showing 8 changed files with 224 additions and 38 deletions.
2 changes: 1 addition & 1 deletion pdb_profiling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# @Author: ZeFeng Zhu
# @Last Modified: 2020-05-13 08:54:09 pm
# @Copyright (c) 2020 MinghuiGroup, Soochow University
__version__ = '0.2.6'
__version__ = '0.2.7'


def default_config(folder='./'):
Expand Down
19 changes: 19 additions & 0 deletions pdb_profiling/commands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,26 @@ class IDMapping(orm.Model):
Entry = orm.String(max_length=50, primary_key=True)
isoform = orm.String(max_length=50, primary_key=True)
is_canonical = orm.Boolean()

class ResidueMapping(orm.Model):
__tablename__ = 'ResidueMapping'
__metadata__ = self.metadata
__database__ = self.database
UniProt = orm.String(max_length=50, primary_key=True)
author_insertion_code = orm.String(max_length=50, allow_null=True, allow_blank=True, default='')
author_residue_number = orm.Integer()
chain_id = orm.String(max_length=10)
struct_asym_id = orm.String(max_length=10, primary_key=True)
entity_id = orm.Integer(primary_key=True)
pdb_id = orm.String(max_length=4, primary_key=True)
residue_number = orm.Integer(primary_key=True)
unp_residue_number = orm.Integer(primary_key=True)
residue_name = orm.String(max_length=10)
observed_ratio = orm.Float()
multiple_conformers = orm.JSON(allow_null=True)
conflict_code = orm.String(max_length=3, allow_null=True)


self.Mutation = Mutation
self.IDMapping = IDMapping
self.ResidueMapping = ResidueMapping
15 changes: 10 additions & 5 deletions pdb_profiling/commands/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def id_mapping(ctx, input, column, sep, chunksize):
@click.option('--func', type=str, default='pipe_select_mo')
@click.option('--kwargs', type=str, default='{}')
@click.option('--chunksize', type=int, help="the chunksize parameter", default=200)
@click.option('--entry_filter', type=str, default='(release_date < "20201020") and ((experimental_method in ["X-ray diffraction", "Electron Microscopy"] and resolution <= 3) or experimental_method == "Solution NMR")')
@click.option('--entry_filter', type=str, default='(release_date < "20210101") and ((experimental_method in ["X-ray diffraction", "Electron Microscopy"] and resolution <= 3) or experimental_method == "Solution NMR")')
@click.option('--chain_filter', type=str, default="UNK_COUNT < SEQRES_COUNT and ca_p_only == False and identity >=0.9 and repeated == False and reversed == False and OBS_COUNT > 20")
@click.option('--skip_pdbs', type=str, default='1fc2,6wrg,5jm5,6vnn,2i6l,4zai,5jn1,6bj0,6yth,4fc3,7acu,6lsd,6llc,6xoz,6xp0,6xp1,6xp2,6xp3,6xp4,6xp5,6xp6,6xp7,6xp8,6xpa,6zqz,6t5h,6xwd,6xxc')
@click.option('--omit', type=int, default=0)
Expand Down Expand Up @@ -204,9 +204,9 @@ def get_unp_id(args):
@Interface.command("residue-mapping")
@click.option('--input', type=click.Path())
@click.option('--chunksize', type=int, help="the chunksize parameter", default=10000)
@click.option('--output', type=str)
def residue_mapping(input, chunksize, output):
output = Path(output)
@click.option('--output', type=str, default=None)
@click.pass_context
def residue_mapping(ctx, input, chunksize, output):
dfs = read_csv(input, sep='\t', keep_default_na=False,
na_values=['NULL', 'null'], chunksize=chunksize)
for df in dfs:
Expand All @@ -222,7 +222,12 @@ def residue_mapping(input, chunksize, output):
with Progress(*progress_bar_args) as p:
res = ob.run(p.track).result()
res_mapping_df = concat(res, sort=False, ignore_index=True)
res_mapping_df[sorted(res_mapping_df.columns)].to_csv(output, sep='\t', mode='a+', index=False, header=not output.exists())
if output is not None:
output = Path(output)
res_mapping_df[sorted(res_mapping_df.columns)].to_csv(output, sep='\t', mode='a+', index=False, header=not output.exists())
else:
sqlite_api = ctx.obj['custom_db']
sqlite_api.sync_insert(sqlite_api.ResidueMapping, res_mapping_df.to_dict('records'))
sleep(uniform(0, 1))


Expand Down
106 changes: 102 additions & 4 deletions pdb_profiling/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,37 @@
# @Author: ZeFeng Zhu
# @Last Modified: 2020-10-10 04:06:06 pm
# @Copyright (c) 2020 MinghuiGroup, Soochow University
# from copy import deepcopy

'''
class SwapKeyDict(dict):
def __missing__(self, key):
swap = key[::-1]
if swap not in self:
if hasattr(self, 'gap') and None in key:
return self.gap
raise KeyError(key)
else:
return self[swap]
def set_gap(self, gap):
cur = deepcopy(self)
cur.gap = gap
return cur
'''

def store_swap_key(data_from, data_to):
for key, value in data_from.items():
data_to[key] = value
data_to[key[::-1]] = value
return data_to

'''
Matrix Data From: https://github.com/biopython/biopython/blob/master/Bio/SubsMat/MatrixInfo.py
'''

blosum62 = {
blosum62 = store_swap_key({
('W', 'F'): 1, ('L', 'R'): -2, ('S', 'P'): -1, ('V', 'T'): 0,
('Q', 'Q'): 5, ('N', 'A'): -2, ('Z', 'Y'): -2, ('W', 'R'): -3,
('Q', 'A'): -1, ('S', 'D'): 0, ('H', 'H'): 8, ('S', 'H'): -1,
Expand Down Expand Up @@ -79,7 +104,79 @@
('Z', 'N'): 0, ('X', 'A'): 0, ('B', 'R'): -1, ('B', 'N'): 3,
('F', 'D'): -3, ('X', 'Y'): -1, ('Z', 'R'): 0, ('F', 'H'): -1,
('B', 'F'): -3, ('F', 'L'): 0, ('X', 'Q'): -1, ('B', 'B'): 4
}
}, dict())

blosum95 = store_swap_key({
("W", "F"): 0, ("L", "R"): -3, ("S", "P"): -2, ("V", "T"): -1,
("Q", "Q"): 7, ("N", "A"): -2, ("Z", "Y"): -4, ("W", "R"): -4,
("Q", "A"): -1, ("S", "D"): -1, ("H", "H"): 9, ("S", "H"): -2,
("H", "D"): -2, ("L", "N"): -5, ("W", "A"): -4, ("Y", "M"): -3,
("G", "R"): -4, ("Y", "I"): -2, ("Y", "E"): -4, ("B", "Y"): -4,
("Y", "A"): -3, ("V", "D"): -5, ("B", "S"): -1, ("Y", "Y"): 8,
("G", "N"): -1, ("E", "C"): -6, ("Y", "Q"): -3, ("Z", "Z"): 4,
("V", "A"): -1, ("C", "C"): 9, ("M", "R"): -2, ("V", "E"): -3,
("T", "N"): -1, ("P", "P"): 8, ("V", "I"): 3, ("V", "S"): -3,
("Z", "P"): -2, ("V", "M"): 0, ("T", "F"): -3, ("V", "Q"): -3,
("K", "K"): 6, ("P", "D"): -3, ("I", "H"): -4, ("I", "D"): -5,
("T", "R"): -2, ("P", "L"): -4, ("K", "G"): -3, ("M", "N"): -3,
("P", "H"): -3, ("F", "Q"): -4, ("Z", "G"): -3, ("X", "L"): -2,
("T", "M"): -1, ("Z", "C"): -5, ("X", "H"): -2, ("D", "R"): -3,
("B", "W"): -6, ("X", "D"): -2, ("Z", "K"): 0, ("F", "A"): -3,
("Z", "W"): -4, ("F", "E"): -5, ("D", "N"): 1, ("B", "K"): -1,
("X", "X"): -2, ("F", "I"): -1, ("B", "G"): -2, ("X", "T"): -1,
("F", "M"): -1, ("B", "C"): -4, ("Z", "I"): -4, ("Z", "V"): -3,
("S", "S"): 5, ("L", "Q"): -3, ("W", "E"): -5, ("Q", "R"): 0,
("N", "N"): 7, ("W", "M"): -2, ("Q", "C"): -4, ("W", "I"): -4,
("S", "C"): -2, ("L", "A"): -2, ("S", "G"): -1, ("L", "E"): -4,
("W", "Q"): -3, ("H", "G"): -3, ("S", "K"): -1, ("Q", "N"): 0,
("N", "R"): -1, ("H", "C"): -5, ("Y", "N"): -3, ("G", "Q"): -3,
("Y", "F"): 3, ("C", "A"): -1, ("V", "L"): 0, ("G", "E"): -3,
("G", "A"): -1, ("K", "R"): 2, ("E", "D"): 1, ("Y", "R"): -3,
("M", "Q"): -1, ("T", "I"): -2, ("C", "D"): -5, ("V", "F"): -2,
("T", "A"): 0, ("T", "P"): -2, ("B", "P"): -3, ("T", "E"): -2,
("V", "N"): -4, ("P", "G"): -4, ("M", "A"): -2, ("K", "H"): -1,
("V", "R"): -4, ("P", "C"): -5, ("M", "E"): -3, ("K", "L"): -3,
("V", "V"): 5, ("M", "I"): 1, ("T", "Q"): -1, ("I", "G"): -6,
("P", "K"): -2, ("M", "M"): 7, ("K", "D"): -2, ("I", "C"): -2,
("Z", "D"): 0, ("F", "R"): -4, ("X", "K"): -1, ("Q", "D"): -1,
("X", "G"): -3, ("Z", "L"): -4, ("X", "C"): -3, ("Z", "H"): 0,
("B", "L"): -5, ("B", "H"): -1, ("F", "F"): 7, ("X", "W"): -4,
("B", "D"): 4, ("D", "A"): -3, ("S", "L"): -3, ("X", "S"): -1,
("F", "N"): -4, ("S", "R"): -2, ("W", "D"): -6, ("V", "Y"): -3,
("W", "L"): -3, ("H", "R"): -1, ("W", "H"): -3, ("H", "N"): 0,
("W", "T"): -4, ("T", "T"): 6, ("S", "F"): -3, ("W", "P"): -5,
("L", "D"): -5, ("B", "I"): -5, ("L", "H"): -4, ("S", "N"): 0,
("B", "T"): -1, ("L", "L"): 5, ("Y", "K"): -3, ("E", "Q"): 2,
("Y", "G"): -5, ("Z", "S"): -1, ("Y", "C"): -4, ("G", "D"): -2,
("B", "V"): -5, ("E", "A"): -1, ("Y", "W"): 2, ("E", "E"): 6,
("Y", "S"): -3, ("C", "N"): -4, ("V", "C"): -2, ("T", "H"): -2,
("P", "R"): -3, ("V", "G"): -5, ("T", "L"): -2, ("V", "K"): -3,
("K", "Q"): 1, ("R", "A"): -2, ("I", "R"): -4, ("T", "D"): -2,
("P", "F"): -5, ("I", "N"): -4, ("K", "I"): -4, ("M", "D"): -5,
("V", "W"): -3, ("W", "W"): 11, ("M", "H"): -3, ("P", "N"): -3,
("K", "A"): -1, ("M", "L"): 2, ("K", "E"): 0, ("Z", "E"): 4,
("X", "N"): -2, ("Z", "A"): -1, ("Z", "M"): -2, ("X", "F"): -2,
("K", "C"): -5, ("B", "Q"): -1, ("X", "B"): -2, ("B", "M"): -4,
("F", "C"): -3, ("Z", "Q"): 4, ("X", "Z"): -1, ("F", "G"): -5,
("B", "E"): 0, ("X", "V"): -2, ("F", "K"): -4, ("B", "A"): -3,
("X", "R"): -2, ("D", "D"): 7, ("W", "G"): -5, ("Z", "F"): -4,
("S", "Q"): -1, ("W", "C"): -4, ("W", "K"): -5, ("H", "Q"): 1,
("L", "C"): -3, ("W", "N"): -5, ("S", "A"): 1, ("L", "G"): -5,
("W", "S"): -4, ("S", "E"): -1, ("H", "E"): -1, ("S", "I"): -3,
("H", "A"): -3, ("S", "M"): -3, ("Y", "L"): -2, ("Y", "H"): 1,
("Y", "D"): -5, ("E", "R"): -1, ("X", "P"): -3, ("G", "G"): 6,
("G", "C"): -5, ("E", "N"): -1, ("Y", "T"): -3, ("Y", "P"): -5,
("T", "K"): -1, ("A", "A"): 5, ("P", "Q"): -2, ("T", "C"): -2,
("V", "H"): -4, ("T", "G"): -3, ("I", "Q"): -4, ("Z", "T"): -2,
("C", "R"): -5, ("V", "P"): -4, ("P", "E"): -2, ("M", "C"): -3,
("K", "N"): 0, ("I", "I"): 5, ("P", "A"): -1, ("M", "G"): -4,
("T", "S"): 1, ("I", "E"): -4, ("P", "M"): -3, ("M", "K"): -2,
("I", "A"): -2, ("P", "I"): -4, ("R", "R"): 7, ("X", "M"): -2,
("L", "I"): 1, ("X", "I"): -2, ("Z", "B"): 0, ("X", "E"): -2,
("Z", "N"): -1, ("X", "A"): -1, ("B", "R"): -2, ("B", "N"): 4,
("F", "D"): -5, ("X", "Y"): -2, ("Z", "R"): -1, ("F", "H"): -2,
("B", "F"): -5, ("F", "L"): 0, ("X", "Q"): -1, ("B", "B"): 4
}, dict())


'''
Expand All @@ -95,7 +192,8 @@
After transform the distance matrix into a similarity matrix, normalize each value by the highest_similarity.
NOTE: now the maximum similarity is 1
'''
miyata_similarity_matrix = {

miyata_similarity_matrix = store_swap_key({
('A', 'A'): 1.0,
('A', 'C'): -0.112,
('A', 'P'): 0.952,
Expand Down Expand Up @@ -305,4 +403,4 @@
('Y', 'S'): -1.664,
('Y', 'T'): -0.96,
('Y', 'V'): -0.216,
('Y', 'Y'): 1.0}
('Y', 'Y'): 1.0}, dict())
Loading

0 comments on commit 7f6e095

Please sign in to comment.