Multicomponent supports json format input

Liu-group · Jul 3, 2024 · de7d7e6 · de7d7e6
1 parent f4120a4
commit de7d7e6
Show file tree

Hide file tree

Showing 18 changed files with 651 additions and 134 deletions.
diff --git a/autosolvate/molecule/molecule.py b/autosolvate/molecule/molecule.py
@@ -215,15 +215,15 @@ def reference_name(self) -> str:
 # @dataclass 
 class Molecule(System): 
     #constants 
-    _SUPPORT_INPUT_FORMATS = ['pdb', 'xyz'] 
+    _SUPPORT_INPUT_FORMATS = ['pdb', 'xyz', 'mol2', 'prep']
     # other
     def __init__(
             self, 
             xyzfile:        str, 
             charge:         int, 
             multiplicity:   int,
             name            = "",
-            residue_name    = "MOL",
+            residue_name    = "",
             folder          = WORKING_DIR,
             amber_solvent   = False,
             ) -> None:
@@ -233,15 +233,15 @@ def __init__(
         Parameters
         ----------
         xyzfile : str
-            The file path of the xyz file of the molecule, can also be a pdb file.
+            The file path of the structure file of the molecule, can be in xyz, pdb, mol2 or prep format.
         charge : int
             The charge of the molecule.
         multiplicity : int
             The multiplicity of the molecule.
         name : str, optional
             The name of the molecule, by default "" will be the basename of the xyzfile.
         residue_name : str, optional
-            The residue name of the molecule, by default "MOL".
+            The residue name of the molecule, ignored if the xyzfile is provided as mol2 or prep format, by default will be assigned according to the filename.
         folder : str, optional
             The folder to store all files of the molecule, by default is the current working directory.
         amber_solvent : bool, optional
@@ -260,29 +260,83 @@ def __init__(
         self.logger.name = self.__class__.__name__
 
         if not self.amber_solvent:
-            self.name           = process_system_name(name, xyzfile, support_input_format=Molecule._SUPPORT_INPUT_FORMATS)
-            self.read_coordinate(xyzfile)
+            if not self.name:
+                self.name = process_system_name(name, xyzfile, support_input_format=Molecule._SUPPORT_INPUT_FORMATS, check_exist=False)  
+            self.process_input_xyz(xyzfile)
+            self.generate_pdb()
 
-    def read_coordinate(self, fname:str):
-        ext = os.path.splitext(fname)[-1][1:]
-        setattr(self, ext, fname)
+    def process_input_xyz(self, xyzfile:str) -> None:
         for e in Molecule._SUPPORT_INPUT_FORMATS:
-            if e == ext:
-                continue
-            newpath = self.reference_name + "." + e
-            subprocess.run(f"obabel -i {ext} {fname} -o {e} -O {newpath} ---errorlevel 0", shell = True)
-            setattr(self, e, newpath)
+            ext = os.path.splitext(xyzfile)[-1]
+            if e in ext:
+                self.__setattr__(e, xyzfile)
+                break
+        else:
+            raise ValueError(f"Unsupported input format for the molecule: {xyzfile}")
 
     def generate_pdb(self):
-        if not self.check_exist("pdb") and self.name != "water":
-            prep2pdb4amber_solvent(self)
-            self.logger.info(f"AMBER predefined solvent {self.name} is used.")
-            self.logger.info(f"Converted the predefined prep file {self.prep} to pdb file {self.pdb}")
-        elif self.name == "water":
+        """
+        Convert the input structure file to pdb file if the provided coordinate file is not in pdb format.
+        If the pdb file for the same molecule already exists, it will be ignored.
+        This behavior is to keep the residue name of the molecule consistent.
+        """
+        if self.name == "water" and self.amber_solvent:
             assign_water_pdb(self)
             self.logger.info(f"AMBER predefined water is used.")
             self.logger.info(f"Write the reference pdb file for {self.name} to {self.pdb}")
             self.logger.info(f"Water model used: TIP3P")
+        elif self.amber_solvent:
+            prep2pdb(self)
+            self.logger.info(f"Predefined solvent {self.name} is used.")
+            self.logger.info(f"Converted the predefined prep file {self.prep} to pdb file {self.pdb}")
+        elif self.check_exist("prep"):
+            newpath = self.reference_name + ".pdb"
+            if self.check_exist("pdb"):
+                self.logger.warning(f"The existing pdb file {self.pdb} will be ignored as amber prep file is provided.")
+                shutil.move(self.pdb, self.reference_name + "-bak.pdb")
+            self.logger.info(f"Converted the prep file {self.prep} to pdb file {self.pdb}")
+            prep2pdb(self)
+        elif self.check_exist("mol2"):
+            newpath = self.reference_name + ".pdb"
+            if self.check_exist("pdb"):
+                self.logger.warning(f"The existing pdb file {self.pdb} will be ignored as mol2 file is provided.")
+                shutil.move(self.pdb, self.reference_name + "-bak.pdb")
+            self.logger.info(f"Converted the mol2 file {self.mol2} to pdb file {self.pdb}")
+            subprocess.run(f"obabel -i mol2 {self.mol2} -o pdb -O {newpath} ---errorlevel 0", shell = True)
+            self.pdb = newpath
+        elif self.check_exist("pdb"):
+            pass
+        elif self.check_exist("xyz"):
+            newpath = self.reference_name + ".pdb"
+            subprocess.run(f"obabel -i xyz {self.xyz} -o pdb -O {newpath} ---errorlevel 0", shell = True)
+            self.pdb = newpath
+
+    def get_residue_name(self):
+        """
+        Get the residue name from the provided pdb, mol2, or prep file.
+        Note that the residue name of the molecule will be automatically assigned if the residue name is not found.
+        Note if the mol2, or prep file is provided, the 'residue_name' attribute will be updated according to these files instead of the argument 'residue_name'.
+        """
+        new_residue_name = ""
+        if self.check_exist("prep"):    # 这里必须要改。都已经知道residue name了就没有必要从prep文件里获取了。必须找到从prep文件中直接读取residue name的方法，或者找到把prep文件转换成pdb文件并且不借助residue name的方法。
+            prep2pdb_withexactpath(self.prep, self.reference_name + "-fromprep.pdb", self.residue_name)
+            new_residue_name = get_residue_name_from_pdb(self.reference_name + "-fromprep.pdb")
+        elif self.check_exist("mol2"):
+            newpath = self.reference_name + "-frommol2.pdb"
+            subprocess.run(f"obabel -i mol2 {self.mol2} -o pdb -O {newpath} ---errorlevel 0", shell = True)
+            new_residue_name = get_residue_name_from_pdb(newpath)
+        elif self.residue_name:
+            new_residue_name = self.residue_name
+        else:
+            self.logger.warning(f"Cannot find the residue name of molecule {self.name}, trying to automatically assign one.")
+            new_residue_name = try_ones_best_to_get_residue_name(self, self.pdb, self.name)
+        if not new_residue_name:
+            new_residue_name = "MOL"
+        if self.residue_name != new_residue_name:
+            self.logger.warning(f"Residue name of {self.name} changed: {self.residue_name} -> {new_residue_name}")
+            self.logger.warning(f"This behavior is to keep the residue name of the molecule consistent.")
+            self.residue_name = new_residue_name
+        self.logger.info(f"Residue name of {self.name} is set to {self.residue_name}")
 
     def update(self):
         if not self.amber_solvent:

diff --git a/autosolvate/molecule/molecule_complex.py b/autosolvate/molecule/molecule_complex.py
@@ -89,12 +89,11 @@ def __init__(self,
     def read_coordinate(self, fname:str):
         ext = os.path.splitext(fname)[-1][1:]
         setattr(self, ext, fname)
-        for e in Molecule._SUPPORT_INPUT_FORMATS:
-            if e == ext:
-                continue
-            nname = os.path.splitext(fname)[0] + "." + e
-            subprocess.run(f"obabel -i {ext} {fname} -o {e} > {nname}", shell = True)
-            setattr(self, e, nname)
+        if ext != "pdb":
+            subprocess.run(f"obabel -i {ext} {fname} -o pdb -O {self.reference_name}.pdb ---errorlevel 0", shell = True)
+            # subprocess.run(f"obabel -i {ext} {fname} -o pdb -O {self.reference_name}.pdb", shell = True)
+            # os.system(f"obabel -i {ext} {fname} -o pdb -O {self.reference_name}.pdb")
+            self.pdb = f"{self.reference_name}.pdb"
 
     def check_protein_fragment(self, fragment_obmol:ob.OBMol, fragment_index = 0):
         for j in range(fragment_obmol.NumResidues()):
@@ -145,7 +144,7 @@ def getFragments(self):
             fragres = fragment_obmol.GetResidue(0)
             fragresname = fragres.GetName()
             # This is a water molecule or an amino acid moleucle.
-            if fragresname == "HOH" or fragresname in AMINO_ACID_RESIDUES:
+            if fragresname == "HOH" or fragresname == "WAT" or fragresname in AMINO_ACID_RESIDUES:
                 self.fragmols.append(ob.OBMol(fragment_obmol))
                 self.fragresiduenames.append(fragresname)
                 logger.info(f"Fragment {i} is a known molecule with res name {fragresname}. ")

diff --git a/autosolvate/molecule/transition_metal_complex.py b/autosolvate/molecule/transition_metal_complex.py
@@ -111,13 +111,13 @@ def check_system(self):
         self.metal_legand_bonds = self.get_bond_connect_with_metal()
         for metal_residue_name in self.metal_residue_names:
             if not os.path.exists(f"{self.origin_folder}/{metal_residue_name}.mol2"):
-                logger.error(f"Metal residue {metal_residue_name}.mol2 does not exist.")
+                self.logger.error(f"Metal residue {metal_residue_name}.mol2 does not exist.")
                 raise FileNotFoundError
             self.logger.info(f"Find mol2 for {metal_residue_name}: {self.origin_folder}/{metal_residue_name}.mol2")
             self.metal_mol2_files.append(f"{self.origin_folder}/{metal_residue_name}.mol2")
         for legand_residue_name in self.legand_residue_names:
             if not os.path.exists(f"{self.origin_folder}/{legand_residue_name}.mol2"):
-                logger.error(f"Legand residue {legand_residue_name}.mol2 does not exist.")
+                self.logger.error(f"Legand residue {legand_residue_name}.mol2 does not exist.")
                 raise FileNotFoundError
             self.logger.info(f"Find mol2 for {legand_residue_name}: {self.origin_folder}/{legand_residue_name}.mol2")
             self.legand_mol2_files.append(f"{self.origin_folder}/{legand_residue_name}.mol2")
@@ -168,8 +168,8 @@ def find_out_real_residue_index(self, solvated_pdb:str):
         for key in tmpdict.keys():
             self.logger.info(f"The system contains {len(tmpdict[key])} {key}" + f" with indices {tmpdict[key]}")
             if len(tmpdict[key]) != len(tmpdict[list(tmpdict.keys())[0]]):
-                logger.error(f"The number of {key} is different from others!")
-                logger.error(f"This may indicate an incomplete transition metal complex structure")
+                self.logger.error(f"The number of {key} is different from others!")
+                self.logger.error(f"This may indicate an incomplete transition metal complex structure")
                 raise ValueError
         for i in range(len(tmpdict[list(tmpdict.keys())[0]])):
             tmc_res_index = {}