Merge pull request mala-project#611 from RandomDefaultUser/move_atomi…

…c_density_formula Move the flag `use_atomic_density_energy_formula`
RandomDefaultUser · Nov 21, 2024 · 01320fa · 01320fa
2 parents a61a489 + caab2cc
commit 01320fa
Show file tree

Hide file tree

Showing 8 changed files with 93 additions and 174 deletions.
diff --git a/docs/source/advanced_usage/predictions.rst b/docs/source/advanced_usage/predictions.rst
@@ -81,11 +81,13 @@ Gaussian representation of atomic positions. In this algorithm, most of the
 computational overhead of the total energy calculation is offloaded to the
 computation of this Gaussian representation. This calculation is realized via
 LAMMPS and can therefore be GPU accelerated (parallelized) in the same fashion
-as the bispectrum descriptor calculation. Simply activate this option via
+as the bispectrum descriptor calculation. If a GPU is activated (and LAMMPS
+is available), this option will be used by default. It can also manually be
+activated via
 
     .. code-block:: python
 
-        parameters.descriptors.use_atomic_density_energy_formula = True
+        parameters.use_atomic_density_formula = True
 
 The Gaussian representation algorithm is describe in
 the publication `Predicting electronic structures at any length scale with machine learning <doi.org/10.1038/s41524-023-01070-z>`_.

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -72,7 +72,6 @@
     "scipy",
     "oapackage",
     "matplotlib",
-    "horovod",
     "lammps",
     "total_energy",
     "pqkmeans",

diff --git a/mala/common/parameters.py b/mala/common/parameters.py
@@ -40,6 +40,7 @@ def __init__(
             "openpmd_configuration": {},
             "openpmd_granularity": 1,
             "lammps": True,
+            "atomic_density_formula": False,
         }
         pass
 
@@ -88,6 +89,11 @@ def _update_openpmd_granularity(self, new_granularity):
     def _update_lammps(self, new_lammps):
         self._configuration["lammps"] = new_lammps
 
+    def _update_atomic_density_formula(self, new_atomic_density_formula):
+        self._configuration["atomic_density_formula"] = (
+            new_atomic_density_formula
+        )
+
     @staticmethod
     def _member_to_json(member):
         if isinstance(member, (int, float, type(None), str)):
@@ -322,11 +328,6 @@ class ParametersDescriptors(ParametersBase):
 
     atomic_density_sigma : float
         Sigma used for the calculation of the Gaussian descriptors.
-
-    use_atomic_density_energy_formula : bool
-        If True, Gaussian descriptors will be calculated for the
-        calculation of the Ewald sum as part of the total energy module.
-        Default is False.
     """
 
     def __init__(self):
@@ -356,7 +357,6 @@ def __init__(self):
         # atomic density may be used at the same time, if e.g. bispectrum
         # descriptors are used for a full inference, which then uses the atomic
         # density for the calculation of the Ewald sum.
-        self.use_atomic_density_energy_formula = False
         self.atomic_density_sigma = None
         self.atomic_density_cutoff = None
 
@@ -556,11 +556,6 @@ class ParametersData(ParametersBase):
 
     Attributes
     ----------
-    descriptors_contain_xyz : bool
-        Legacy option. If True, it is assumed that the first three entries of
-        the descriptor vector are the xyz coordinates and they are cut from the
-        descriptor vector. If False, no such cutting is peformed.
-
     snapshot_directories_list : list
         A list of all added snapshots.
 
@@ -1186,9 +1181,6 @@ class Parameters:
     hyperparameters : ParametersHyperparameterOptimization
         Parameters used for hyperparameter optimization.
 
-    debug : ParametersDebug
-        Container for all debugging parameters.
-
     manual_seed: int
         If not none, this value is used as manual seed for the neural networks.
         Can be used to make experiments comparable. Default: None.
@@ -1220,6 +1212,7 @@ def __init__(self):
         # different.
         self.openpmd_granularity = 1
         self.use_lammps = True
+        self.use_atomic_density_formula = False
 
     @property
     def openpmd_granularity(self):
@@ -1271,7 +1264,7 @@ def verbosity(self, value):
 
     @property
     def use_gpu(self):
-        """Control whether or not a GPU is used (provided there is one)."""
+        """Control whether a GPU is used (provided there is one)."""
         return self._use_gpu
 
     @use_gpu.setter
@@ -1286,6 +1279,12 @@ def use_gpu(self, value):
                     "GPU requested, but no GPU found. MALA will "
                     "operate with CPU only."
                 )
+        if self._use_gpu and self.use_lammps:
+            printout(
+                "Enabling atomic density formula because LAMMPS and GPU "
+                "are used."
+            )
+            self.use_atomic_density_formula = True
 
         # Invalidate, will be updated in setter.
         self.device = None
@@ -1298,7 +1297,7 @@ def use_gpu(self, value):
 
     @property
     def use_ddp(self):
-        """Control whether or not dd is used for parallel training."""
+        """Control whether ddp is used for parallel training."""
         return self._use_ddp
 
     @use_ddp.setter
@@ -1349,7 +1348,7 @@ def device(self, value):
 
     @property
     def use_mpi(self):
-        """Control whether or not MPI is used for paralle inference."""
+        """Control whether MPI is used for paralle inference."""
         return self._use_mpi
 
     @use_mpi.setter
@@ -1393,19 +1392,67 @@ def openpmd_configuration(self, value):
 
     @property
     def use_lammps(self):
-        """Control whether or not to use LAMMPS for descriptor calculation."""
+        """Control whether to use LAMMPS for descriptor calculation."""
         return self._use_lammps
 
     @use_lammps.setter
     def use_lammps(self, value):
         self._use_lammps = value
+        if self.use_gpu and value:
+            printout(
+                "Enabling atomic density formula because LAMMPS and GPU "
+                "are used."
+            )
+            self.use_atomic_density_formula = True
         self.network._update_lammps(self.use_lammps)
         self.descriptors._update_lammps(self.use_lammps)
         self.targets._update_lammps(self.use_lammps)
         self.data._update_lammps(self.use_lammps)
         self.running._update_lammps(self.use_lammps)
         self.hyperparameters._update_lammps(self.use_lammps)
 
+    @property
+    def use_atomic_density_formula(self):
+        """Control whether to use the atomic density formula.
+
+        This formula uses as a Gaussian representation of the atomic density
+        to calculate the structure factor and with it, the Ewald energy
+        and parts of the exchange-correlation energy. By using it, one can
+        go from N^2 to NlogN scaling, and offloads most of the computational
+        overhead of energy calculation from QE to LAMMPS. This is beneficial
+        since LAMMPS can benefit from GPU acceleration (QE GPU acceleration
+        is not used in the portion of the QE code MALA employs). If set
+        to True, this means MALA will perform another LAMMPS calculation
+        during inference. The hyperparameters for this atomic density
+        calculation are set via the parameters.descriptors object.
+        Default is False, except for when both use_gpu and use_lammps
+        are True, in which case this value will be set to True as well.
+        """
+        return self._use_atomic_density_formula
+
+    @use_atomic_density_formula.setter
+    def use_atomic_density_formula(self, value):
+        self._use_atomic_density_formula = value
+
+        self.network._update_atomic_density_formula(
+            self.use_atomic_density_formula
+        )
+        self.descriptors._update_atomic_density_formula(
+            self.use_atomic_density_formula
+        )
+        self.targets._update_atomic_density_formula(
+            self.use_atomic_density_formula
+        )
+        self.data._update_atomic_density_formula(
+            self.use_atomic_density_formula
+        )
+        self.running._update_atomic_density_formula(
+            self.use_atomic_density_formula
+        )
+        self.hyperparameters._update_atomic_density_formula(
+            self.use_atomic_density_formula
+        )
+
     def show(self):
         """Print name and values of all attributes of this object."""
         printout(
@@ -1598,6 +1645,18 @@ def load_from_file(
                     ].from_json(json_dict[key])
                     setattr(loaded_parameters, key, sub_parameters)
 
+                    # Backwards compatability:
+                    if key == "descriptors":
+                        if (
+                            "use_atomic_density_energy_formula"
+                            in json_dict[key]
+                        ):
+                            loaded_parameters.use_atomic_density_formula = (
+                                json_dict[key][
+                                    "use_atomic_density_energy_formula"
+                                ]
+                            )
+
             # We iterate a second time, to set global values, so that they
             # are properly forwarded.
             for key in json_dict:
@@ -1611,6 +1670,13 @@ def load_from_file(
                         setattr(loaded_parameters, key, json_dict[key])
             if no_snapshots is True:
                 loaded_parameters.data.snapshot_directories_list = []
+            # Backwards compatability: since the transfer of old property
+            # to new property happens _before_ all children descriptor classes
+            # are instantiated, it is not properly propagated. Thus, we
+            # simply have to set it to its own value again.
+            loaded_parameters.use_atomic_density_formula = (
+                loaded_parameters.use_atomic_density_formula
+            )
         else:
             raise Exception("Unsupported parameter save format.")
 

diff --git a/mala/network/hyper_opt_oat.py b/mala/network/hyper_opt_oat.py
@@ -403,12 +403,6 @@ def __create_checkpointing(self, trial):
 
             self._save_params_and_scaler()
 
-            # Next, we save all the other objects.
-            # Here some horovod stuff would have to go.
-            # But so far, the optuna implementation is not horovod-ready...
-            # if self.params.use_horovod:
-            #     if hvd.rank() != 0:
-            #         return
             # The study only has to be saved if the no RDB storage is used.
             if self.params.hyperparameters.rdb_storage is None:
                 hyperopt_name = (

diff --git a/mala/network/hyper_opt_optuna.py b/mala/network/hyper_opt_optuna.py
@@ -384,12 +384,6 @@ def __create_checkpointing(self, study, trial):
 
             self._save_params_and_scaler()
 
-            # Next, we save all the other objects.
-            # Here some horovod stuff would have to go.
-            # But so far, the optuna implementation is not horovod-ready...
-            # if self.params.use_horovod:
-            #     if hvd.rank() != 0:
-            #         return
             # The study only has to be saved if the no RDB storage is used.
             if self.params.hyperparameters.rdb_storage is None:
                 hyperopt_name = (

diff --git a/mala/targets/density.py b/mala/targets/density.py
@@ -1118,7 +1118,7 @@ def __setup_total_energy_module(
         # instantiate the process with the file.
         positions_for_qe = self.get_scaled_positions_for_qe(atoms_Angstrom)
 
-        if self._parameters_full.descriptors.use_atomic_density_energy_formula:
+        if self.parameters._configuration["atomic_density_formula"]:
             # Calculate the Gaussian descriptors for the calculation of the
             # structure factors.
             barrier()
@@ -1187,8 +1187,8 @@ def __setup_total_energy_module(
         te.set_positions(
             np.transpose(positions_for_qe),
             number_of_atoms,
-            self._parameters_full.descriptors.use_atomic_density_energy_formula,
-            self._parameters_full.descriptors.use_atomic_density_energy_formula,
+            self.parameters._configuration["atomic_density_formula"],
+            self.parameters._configuration["atomic_density_formula"],
         )
         barrier()
         printout(
@@ -1199,7 +1199,7 @@ def __setup_total_energy_module(
         )
         barrier()
 
-        if self._parameters_full.descriptors.use_atomic_density_energy_formula:
+        if self.parameters._configuration["atomic_density_formula"]:
             t0 = time.perf_counter()
             gaussian_descriptors = np.reshape(
                 gaussian_descriptors,