Skip to content

Commit

Permalink
Multicomponent supports json format input
Browse files Browse the repository at this point in the history
  • Loading branch information
fangning-ren committed Jul 3, 2024
1 parent f4120a4 commit de7d7e6
Show file tree
Hide file tree
Showing 18 changed files with 651 additions and 134 deletions.
92 changes: 73 additions & 19 deletions autosolvate/molecule/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,15 +215,15 @@ def reference_name(self) -> str:
# @dataclass
class Molecule(System):
#constants
_SUPPORT_INPUT_FORMATS = ['pdb', 'xyz']
_SUPPORT_INPUT_FORMATS = ['pdb', 'xyz', 'mol2', 'prep']
# other
def __init__(
self,
xyzfile: str,
charge: int,
multiplicity: int,
name = "",
residue_name = "MOL",
residue_name = "",
folder = WORKING_DIR,
amber_solvent = False,
) -> None:
Expand All @@ -233,15 +233,15 @@ def __init__(
Parameters
----------
xyzfile : str
The file path of the xyz file of the molecule, can also be a pdb file.
The file path of the structure file of the molecule, can be in xyz, pdb, mol2 or prep format.
charge : int
The charge of the molecule.
multiplicity : int
The multiplicity of the molecule.
name : str, optional
The name of the molecule, by default "" will be the basename of the xyzfile.
residue_name : str, optional
The residue name of the molecule, by default "MOL".
The residue name of the molecule, ignored if the xyzfile is provided as mol2 or prep format, by default will be assigned according to the filename.
folder : str, optional
The folder to store all files of the molecule, by default is the current working directory.
amber_solvent : bool, optional
Expand All @@ -260,29 +260,83 @@ def __init__(
self.logger.name = self.__class__.__name__

if not self.amber_solvent:
self.name = process_system_name(name, xyzfile, support_input_format=Molecule._SUPPORT_INPUT_FORMATS)
self.read_coordinate(xyzfile)
if not self.name:
self.name = process_system_name(name, xyzfile, support_input_format=Molecule._SUPPORT_INPUT_FORMATS, check_exist=False)
self.process_input_xyz(xyzfile)
self.generate_pdb()

def read_coordinate(self, fname:str):
ext = os.path.splitext(fname)[-1][1:]
setattr(self, ext, fname)
def process_input_xyz(self, xyzfile:str) -> None:
for e in Molecule._SUPPORT_INPUT_FORMATS:
if e == ext:
continue
newpath = self.reference_name + "." + e
subprocess.run(f"obabel -i {ext} {fname} -o {e} -O {newpath} ---errorlevel 0", shell = True)
setattr(self, e, newpath)
ext = os.path.splitext(xyzfile)[-1]
if e in ext:
self.__setattr__(e, xyzfile)
break
else:
raise ValueError(f"Unsupported input format for the molecule: {xyzfile}")

def generate_pdb(self):
if not self.check_exist("pdb") and self.name != "water":
prep2pdb4amber_solvent(self)
self.logger.info(f"AMBER predefined solvent {self.name} is used.")
self.logger.info(f"Converted the predefined prep file {self.prep} to pdb file {self.pdb}")
elif self.name == "water":
"""
Convert the input structure file to pdb file if the provided coordinate file is not in pdb format.
If the pdb file for the same molecule already exists, it will be ignored.
This behavior is to keep the residue name of the molecule consistent.
"""
if self.name == "water" and self.amber_solvent:
assign_water_pdb(self)
self.logger.info(f"AMBER predefined water is used.")
self.logger.info(f"Write the reference pdb file for {self.name} to {self.pdb}")
self.logger.info(f"Water model used: TIP3P")
elif self.amber_solvent:
prep2pdb(self)
self.logger.info(f"Predefined solvent {self.name} is used.")
self.logger.info(f"Converted the predefined prep file {self.prep} to pdb file {self.pdb}")
elif self.check_exist("prep"):
newpath = self.reference_name + ".pdb"
if self.check_exist("pdb"):
self.logger.warning(f"The existing pdb file {self.pdb} will be ignored as amber prep file is provided.")
shutil.move(self.pdb, self.reference_name + "-bak.pdb")
self.logger.info(f"Converted the prep file {self.prep} to pdb file {self.pdb}")
prep2pdb(self)
elif self.check_exist("mol2"):
newpath = self.reference_name + ".pdb"
if self.check_exist("pdb"):
self.logger.warning(f"The existing pdb file {self.pdb} will be ignored as mol2 file is provided.")
shutil.move(self.pdb, self.reference_name + "-bak.pdb")
self.logger.info(f"Converted the mol2 file {self.mol2} to pdb file {self.pdb}")
subprocess.run(f"obabel -i mol2 {self.mol2} -o pdb -O {newpath} ---errorlevel 0", shell = True)
self.pdb = newpath
elif self.check_exist("pdb"):
pass
elif self.check_exist("xyz"):
newpath = self.reference_name + ".pdb"
subprocess.run(f"obabel -i xyz {self.xyz} -o pdb -O {newpath} ---errorlevel 0", shell = True)
self.pdb = newpath

def get_residue_name(self):
"""
Get the residue name from the provided pdb, mol2, or prep file.
Note that the residue name of the molecule will be automatically assigned if the residue name is not found.
Note if the mol2, or prep file is provided, the 'residue_name' attribute will be updated according to these files instead of the argument 'residue_name'.
"""
new_residue_name = ""
if self.check_exist("prep"): # 这里必须要改。都已经知道residue name了就没有必要从prep文件里获取了。必须找到从prep文件中直接读取residue name的方法,或者找到把prep文件转换成pdb文件并且不借助residue name的方法。
prep2pdb_withexactpath(self.prep, self.reference_name + "-fromprep.pdb", self.residue_name)
new_residue_name = get_residue_name_from_pdb(self.reference_name + "-fromprep.pdb")
elif self.check_exist("mol2"):
newpath = self.reference_name + "-frommol2.pdb"
subprocess.run(f"obabel -i mol2 {self.mol2} -o pdb -O {newpath} ---errorlevel 0", shell = True)
new_residue_name = get_residue_name_from_pdb(newpath)
elif self.residue_name:
new_residue_name = self.residue_name
else:
self.logger.warning(f"Cannot find the residue name of molecule {self.name}, trying to automatically assign one.")
new_residue_name = try_ones_best_to_get_residue_name(self, self.pdb, self.name)
if not new_residue_name:
new_residue_name = "MOL"
if self.residue_name != new_residue_name:
self.logger.warning(f"Residue name of {self.name} changed: {self.residue_name} -> {new_residue_name}")
self.logger.warning(f"This behavior is to keep the residue name of the molecule consistent.")
self.residue_name = new_residue_name
self.logger.info(f"Residue name of {self.name} is set to {self.residue_name}")

def update(self):
if not self.amber_solvent:
Expand Down
13 changes: 6 additions & 7 deletions autosolvate/molecule/molecule_complex.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,11 @@ def __init__(self,
def read_coordinate(self, fname:str):
ext = os.path.splitext(fname)[-1][1:]
setattr(self, ext, fname)
for e in Molecule._SUPPORT_INPUT_FORMATS:
if e == ext:
continue
nname = os.path.splitext(fname)[0] + "." + e
subprocess.run(f"obabel -i {ext} {fname} -o {e} > {nname}", shell = True)
setattr(self, e, nname)
if ext != "pdb":
subprocess.run(f"obabel -i {ext} {fname} -o pdb -O {self.reference_name}.pdb ---errorlevel 0", shell = True)
# subprocess.run(f"obabel -i {ext} {fname} -o pdb -O {self.reference_name}.pdb", shell = True)
# os.system(f"obabel -i {ext} {fname} -o pdb -O {self.reference_name}.pdb")
self.pdb = f"{self.reference_name}.pdb"

def check_protein_fragment(self, fragment_obmol:ob.OBMol, fragment_index = 0):
for j in range(fragment_obmol.NumResidues()):
Expand Down Expand Up @@ -145,7 +144,7 @@ def getFragments(self):
fragres = fragment_obmol.GetResidue(0)
fragresname = fragres.GetName()
# This is a water molecule or an amino acid moleucle.
if fragresname == "HOH" or fragresname in AMINO_ACID_RESIDUES:
if fragresname == "HOH" or fragresname == "WAT" or fragresname in AMINO_ACID_RESIDUES:
self.fragmols.append(ob.OBMol(fragment_obmol))
self.fragresiduenames.append(fragresname)
logger.info(f"Fragment {i} is a known molecule with res name {fragresname}. ")
Expand Down
8 changes: 4 additions & 4 deletions autosolvate/molecule/transition_metal_complex.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,13 @@ def check_system(self):
self.metal_legand_bonds = self.get_bond_connect_with_metal()
for metal_residue_name in self.metal_residue_names:
if not os.path.exists(f"{self.origin_folder}/{metal_residue_name}.mol2"):
logger.error(f"Metal residue {metal_residue_name}.mol2 does not exist.")
self.logger.error(f"Metal residue {metal_residue_name}.mol2 does not exist.")
raise FileNotFoundError
self.logger.info(f"Find mol2 for {metal_residue_name}: {self.origin_folder}/{metal_residue_name}.mol2")
self.metal_mol2_files.append(f"{self.origin_folder}/{metal_residue_name}.mol2")
for legand_residue_name in self.legand_residue_names:
if not os.path.exists(f"{self.origin_folder}/{legand_residue_name}.mol2"):
logger.error(f"Legand residue {legand_residue_name}.mol2 does not exist.")
self.logger.error(f"Legand residue {legand_residue_name}.mol2 does not exist.")
raise FileNotFoundError
self.logger.info(f"Find mol2 for {legand_residue_name}: {self.origin_folder}/{legand_residue_name}.mol2")
self.legand_mol2_files.append(f"{self.origin_folder}/{legand_residue_name}.mol2")
Expand Down Expand Up @@ -168,8 +168,8 @@ def find_out_real_residue_index(self, solvated_pdb:str):
for key in tmpdict.keys():
self.logger.info(f"The system contains {len(tmpdict[key])} {key}" + f" with indices {tmpdict[key]}")
if len(tmpdict[key]) != len(tmpdict[list(tmpdict.keys())[0]]):
logger.error(f"The number of {key} is different from others!")
logger.error(f"This may indicate an incomplete transition metal complex structure")
self.logger.error(f"The number of {key} is different from others!")
self.logger.error(f"This may indicate an incomplete transition metal complex structure")
raise ValueError
for i in range(len(tmpdict[list(tmpdict.keys())[0]])):
tmc_res_index = {}
Expand Down
Loading

0 comments on commit de7d7e6

Please sign in to comment.