Merge remote-tracking branch 'refs/remotes/fork_lenz/develop' into mu…

…ltielement_multidos
RandomDefaultUser · Nov 29, 2024 · 7e4fb74 · 7e4fb74
2 parents a0ff258 + 03f6b96
commit 7e4fb74
Show file tree

Hide file tree

Showing 73 changed files with 3,408 additions and 2,071 deletions.
diff --git a/.github/workflows/cpu-tests.yml b/.github/workflows/cpu-tests.yml
@@ -170,20 +170,27 @@ jobs:
           # install mala package
           pip --no-cache-dir install -e .[opt,test] --no-build-isolation
 
+
       - name: Check if Conda environment meets the specified requirements
         shell: 'bash -c "docker exec -i mala-cpu bash < {0}"'
         run: |
           # export Conda environment _with_ mala package installed in it (and extra dependencies)
           conda env export -n mala-cpu > env_after.yml
 
+          # This command is necessary because conda includes even editable
+          # packages in an export, at least in the versions we recently used.
+          # That of course leads to the diff failing, since MALA can never
+          # be there before it has been installed.
+          sed -i '/materials-learning-algorithms/d' ./env_after.yml
+
           # if comparison fails, `install/mala_cpu_[base]_environment.yml` needs to be aligned with
           # `requirements.txt` and/or extra dependencies are missing in the Docker Conda environment
 
           if diff --brief env_before.yml env_after.yml
           then
             echo "Files env_before.yml and env_after.yml do not differ."
           else
-            diff --side-by-side --color-always env_before.yml env_after.yml
+            diff --side-by-side env_before.yml env_after.yml
           fi
 
       - name: Download test data repository from RODARE

diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml
@@ -26,7 +26,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v5
         with:
-          python-version: '3.12'
+          python-version: '3.10.4'
 
       - name: Upgrade pip
         run: python3 -m pip install --upgrade pip
@@ -50,7 +50,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v5
         with:
-          python-version: '3.12'
+          python-version: '3.10.4'
 
       - name: Upgrade pip
         run: python3 -m pip install --upgrade pip

diff --git a/docs/source/advanced_usage/predictions.rst b/docs/source/advanced_usage/predictions.rst
@@ -81,11 +81,13 @@ Gaussian representation of atomic positions. In this algorithm, most of the
 computational overhead of the total energy calculation is offloaded to the
 computation of this Gaussian representation. This calculation is realized via
 LAMMPS and can therefore be GPU accelerated (parallelized) in the same fashion
-as the bispectrum descriptor calculation. Simply activate this option via
+as the bispectrum descriptor calculation. If a GPU is activated (and LAMMPS
+is available), this option will be used by default. It can also manually be
+activated via
 
     .. code-block:: python
 
-        parameters.descriptors.use_atomic_density_energy_formula = True
+        parameters.use_atomic_density_formula = True
 
 The Gaussian representation algorithm is describe in
 the publication `Predicting electronic structures at any length scale with machine learning <doi.org/10.1038/s41524-023-01070-z>`_.

diff --git a/docs/source/advanced_usage/trainingmodel.rst b/docs/source/advanced_usage/trainingmodel.rst
@@ -194,22 +194,64 @@ keyword, you can fine-tune the number of new snapshots being created.
 By default, the same number of snapshots as had been provided will be created
 (if possible).
 
-Using tensorboard
-******************
+Logging metrics during training
+*******************************
+
+Training progress in MALA can be visualized via tensorboard or wandb, as also shown
+in the file ``advanced/ex03_tensor_board``. Simply select a logger prior to training as
+
+      .. code-block:: python
+
+            parameters.running.logger = "tensorboard"
+            parameters.running.logging_dir = "mala_vis"
 
-Training routines in MALA can be visualized via tensorboard, as also shown
-in the file ``advanced/ex03_tensor_board``. Simply enable tensorboard
-visualization prior to training via
+or
 
       .. code-block:: python
 
-            # 0: No visualizatuon, 1: loss and learning rate, 2: like 1,
-            # but additionally weights and biases are saved
-            parameters.running.logging = 1
+            import wandb
+            wandb.init(
+                  project="mala_training",
+                  entity="your_wandb_entity"
+            )
+            parameters.running.logger = "wandb"
             parameters.running.logging_dir = "mala_vis"
 
 where ``logging_dir`` specifies some directory in which to save the
-MALA logging data. Afterwards, you can run the training without any
+MALA logging data. You can also select which metrics to record via
+
+      .. code-block:: python
+
+            parameters.validation_metrics = ["ldos", "dos", "density", "total_energy"]
+
+Full list of available metrics:
+      - "ldos": MSE of the LDOS.
+      - "band_energy": Band energy.
+      - "band_energy_actual_fe": Band energy computed with ground truth Fermi energy.
+      - "total_energy": Total energy.
+      - "total_energy_actual_fe": Total energy computed with ground truth Fermi energy.
+      - "fermi_energy": Fermi energy.
+      - "density": Electron density.
+      - "density_relative": Rlectron density (Mean Absolute Percentage Error).
+      - "dos": Density of states.
+      - "dos_relative": Density of states (Mean Absolute Percentage Error).
+
+To save time and resources you can specify the logging interval via
+
+      .. code-block:: python
+
+            parameters.running.validate_every_n_epochs = 10
+
+If you want to monitor the degree to which the model overfits to the training data,
+you can use the option
+
+      .. code-block:: python
+            
+            parameters.running.validate_on_training_data = True
+
+MALA will evaluate the validation metrics on the training set as well as the validation set.
+
+Afterwards, you can run the training without any
 other modifications. Once training is finished (or during training, in case
 you want to use tensorboard to monitor progress), you can launch tensorboard
 via
@@ -221,6 +263,7 @@ via
 The full path for ``path_to_log_directory`` can be accessed via
 ``trainer.full_logging_path``.
 
+If you're using wandb, you can monitor the training progress on the wandb website.
 
 Training in parallel
 ********************

diff --git a/docs/source/basic_usage/trainingmodel.rst b/docs/source/basic_usage/trainingmodel.rst
@@ -28,7 +28,7 @@ options to train a simple network with example data, namely
             parameters = mala.Parameters()
 
             parameters.data.input_rescaling_type = "feature-wise-standard"
-            parameters.data.output_rescaling_type = "normal"
+            parameters.data.output_rescaling_type = "minmax"
 
             parameters.network.layer_activations = ["ReLU"]
 
@@ -43,15 +43,18 @@ sub-objects dealing with the individual aspects of the workflow. In the first
 two lines, which data scaling MALA should employ. Scaling data greatly
 improves the performance of NN based ML models. Options are
 
-* ``None``: No normalization is applied.
+* ``None``: No scaling is applied.
 
-* ``standard``: Standardization (Scale to mean 0, standard deviation 1)
+* ``standard``: Standardization (Scale to mean 0, standard deviation 1) is
+  applied to the entire array.
 
-* ``normal``: Min-Max scaling (Scale to be in range 0...1)
+* ``minmax``: Min-Max scaling (Scale to be in range 0...1) is applied to the entire array.
 
-* ``feature-wise-standard``: Row Standardization (Scale to mean 0, standard deviation 1)
+* ``feature-wise-standard``: Standardization (Scale to mean 0, standard
+  deviation 1) is applied to each feature dimension individually.
 
-* ``feature-wise-normal``: Row Min-Max scaling (Scale to be in range 0...1)
+* ``feature-wise-minmax``: Min-Max scaling (Scale to be in range 0...1) is
+  applied to each feature dimension individually.
 
 Here, we specify that MALA should standardize the input (=descriptors)
 by feature (i.e., each entry of the vector separately on the grid) and

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -72,7 +72,6 @@
     "scipy",
     "oapackage",
     "matplotlib",
-    "horovod",
     "lammps",
     "total_energy",
     "pqkmeans",

diff --git a/docs/source/install/installing_mala.rst b/docs/source/install/installing_mala.rst
@@ -4,8 +4,8 @@ Installing MALA
 Prerequisites
 **************
 
-MALA does not depend on a specific Python version. The most recent Python
-version it has been tested with successfully is Python ``3.10.4``.
+MALA supports any Python version starting from ``3.10.4``. No upper limit on
+Python versions are enforced. The most recent *tested* version is ``3.10.12``.
 
 MALA requires ``torch`` in order to function. As the installation of torch
 depends highly on the architecture you are using, ``torch`` will not

diff --git a/examples/advanced/ex01_checkpoint_training.py b/examples/advanced/ex01_checkpoint_training.py
@@ -21,7 +21,7 @@ def initial_setup():
     parameters = mala.Parameters()
     parameters.data.data_splitting_type = "by_snapshot"
     parameters.data.input_rescaling_type = "feature-wise-standard"
-    parameters.data.output_rescaling_type = "normal"
+    parameters.data.output_rescaling_type = "minmax"
     parameters.network.layer_activations = ["ReLU"]
     parameters.running.max_number_epochs = 9
     parameters.running.mini_batch_size = 8

diff --git a/examples/advanced/ex03_tensor_board.py b/examples/advanced/ex03_tensor_board.py
@@ -13,7 +13,7 @@
 
 parameters = mala.Parameters()
 parameters.data.input_rescaling_type = "feature-wise-standard"
-parameters.data.output_rescaling_type = "normal"
+parameters.data.output_rescaling_type = "minmax"
 parameters.targets.ldos_gridsize = 11
 parameters.targets.ldos_gridspacing_ev = 2.5
 parameters.targets.ldos_gridoffset_ev = -5
@@ -32,11 +32,19 @@
 
 data_handler = mala.DataHandler(parameters)
 data_handler.add_snapshot(
-    "Be_snapshot0.in.npy", data_path, "Be_snapshot0.out.npy", data_path, "tr",
+    "Be_snapshot0.in.npy",
+    data_path,
+    "Be_snapshot0.out.npy",
+    data_path,
+    "tr",
     calculation_output_file=os.path.join(data_path, "Be_snapshot0.out"),
 )
 data_handler.add_snapshot(
-    "Be_snapshot1.in.npy", data_path, "Be_snapshot1.out.npy", data_path, "va",
+    "Be_snapshot1.in.npy",
+    data_path,
+    "Be_snapshot1.out.npy",
+    data_path,
+    "va",
     calculation_output_file=os.path.join(data_path, "Be_snapshot1.out"),
 )
 data_handler.prepare_data()

diff --git a/examples/advanced/ex05_checkpoint_hyperparameter_optimization.py b/examples/advanced/ex05_checkpoint_hyperparameter_optimization.py
@@ -17,7 +17,7 @@
 def initial_setup():
     parameters = mala.Parameters()
     parameters.data.input_rescaling_type = "feature-wise-standard"
-    parameters.data.output_rescaling_type = "normal"
+    parameters.data.output_rescaling_type = "minmax"
     parameters.running.max_number_epochs = 10
     parameters.running.mini_batch_size = 40
     parameters.running.learning_rate = 0.00001

diff --git a/examples/advanced/ex06_distributed_hyperparameter_optimization.py b/examples/advanced/ex06_distributed_hyperparameter_optimization.py
@@ -24,7 +24,7 @@
 parameters = mala.Parameters()
 # Specify the data scaling.
 parameters.data.input_rescaling_type = "feature-wise-standard"
-parameters.data.output_rescaling_type = "normal"
+parameters.data.output_rescaling_type = "minmax"
 parameters.running.max_number_epochs = 5
 parameters.running.mini_batch_size = 40
 parameters.running.learning_rate = 0.00001

diff --git a/examples/advanced/ex07_advanced_hyperparameter_optimization.py b/examples/advanced/ex07_advanced_hyperparameter_optimization.py
@@ -17,7 +17,7 @@ def optimize_hyperparameters(hyper_optimizer):
 
     parameters = mala.Parameters()
     parameters.data.input_rescaling_type = "feature-wise-standard"
-    parameters.data.output_rescaling_type = "normal"
+    parameters.data.output_rescaling_type = "minmax"
     parameters.running.max_number_epochs = 10
     parameters.running.mini_batch_size = 40
     parameters.running.learning_rate = 0.00001

diff --git a/examples/advanced/ex10_convert_numpy_openpmd.py b/examples/advanced/ex10_convert_numpy_openpmd.py
@@ -0,0 +1,98 @@
+import mala
+
+from mala.datahandling.data_repo import data_path
+import os
+
+parameters = mala.Parameters()
+parameters.descriptors.descriptors_contain_xyz = False
+
+# First, convert from Numpy files to openPMD.
+
+data_converter = mala.DataConverter(parameters)
+
+for snapshot in range(2):
+    data_converter.add_snapshot(
+        descriptor_input_type="numpy",
+        descriptor_input_path=os.path.join(
+            data_path, "Be_snapshot{}.in.npy".format(snapshot)
+        ),
+        target_input_type="numpy",
+        target_input_path=os.path.join(
+            data_path, "Be_snapshot{}.out.npy".format(snapshot)
+        ),
+        additional_info_input_type=None,
+        additional_info_input_path=None,
+        target_units=None,
+    )
+
+data_converter.convert_snapshots(
+    descriptor_save_path="./",
+    target_save_path="./",
+    additional_info_save_path="./",
+    naming_scheme="converted_from_numpy_*.h5",
+    descriptor_calculation_kwargs={"working_directory": "./"},
+)
+
+# Convert those files back to Numpy to verify the data stays the same.
+
+data_converter = mala.DataConverter(parameters)
+
+for snapshot in range(2):
+    data_converter.add_snapshot(
+        descriptor_input_type="openpmd",
+        descriptor_input_path="converted_from_numpy_{}.in.h5".format(snapshot),
+        target_input_type="openpmd",
+        target_input_path="converted_from_numpy_{}.out.h5".format(snapshot),
+        additional_info_input_type=None,
+        additional_info_input_path=None,
+        target_units=None,
+    )
+
+data_converter.convert_snapshots(
+    descriptor_save_path="./",
+    target_save_path="./",
+    additional_info_save_path="./",
+    naming_scheme="verify_against_original_numpy_data_*.npy",
+    descriptor_calculation_kwargs={"working_directory": "./"},
+)
+
+for snapshot in range(2):
+    for i_o in ["in", "out"]:
+        original = os.path.join(
+            data_path, "Be_snapshot{}.{}.npy".format(snapshot, i_o)
+        )
+        roundtrip = "verify_against_original_numpy_data_{}.{}.npy".format(
+            snapshot, i_o
+        )
+        import numpy as np
+
+        original_a = np.load(original)
+        roundtrip_a = np.load(roundtrip)
+        np.testing.assert_allclose(original_a, roundtrip_a)
+
+# Now, convert some openPMD data back to Numpy.
+
+data_converter = mala.DataConverter(parameters)
+
+for snapshot in range(2):
+    data_converter.add_snapshot(
+        descriptor_input_type="openpmd",
+        descriptor_input_path=os.path.join(
+            data_path, "Be_snapshot{}.in.h5".format(snapshot)
+        ),
+        target_input_type="openpmd",
+        target_input_path=os.path.join(
+            data_path, "Be_snapshot{}.out.h5".format(snapshot)
+        ),
+        additional_info_input_type=None,
+        additional_info_input_path=None,
+        target_units=None,
+    )
+
+data_converter.convert_snapshots(
+    descriptor_save_path="./",
+    target_save_path="./",
+    additional_info_save_path="./",
+    naming_scheme="converted_from_openpmd_*.npy",
+    descriptor_calculation_kwargs={"working_directory": "./"},
+)
diff --git a/examples/basic/ex01_train_network.py b/examples/basic/ex01_train_network.py
@@ -20,7 +20,7 @@
 # Specify the data scaling. For regular bispectrum and LDOS data,
 # these have proven successful.
 parameters.data.input_rescaling_type = "feature-wise-standard"
-parameters.data.output_rescaling_type = "normal"
+parameters.data.output_rescaling_type = "minmax"
 # Specify the used activation function.
 parameters.network.layer_activations = ["ReLU"]
 # Specify the training parameters.

diff --git a/examples/basic/ex04_hyperparameter_optimization.py b/examples/basic/ex04_hyperparameter_optimization.py
@@ -19,7 +19,7 @@
 ####################
 parameters = mala.Parameters()
 parameters.data.input_rescaling_type = "feature-wise-standard"
-parameters.data.output_rescaling_type = "normal"
+parameters.data.output_rescaling_type = "minmax"
 parameters.running.max_number_epochs = 20
 parameters.running.mini_batch_size = 40
 parameters.running.optimizer = "Adam"