From 1c222ab6f8c1dfbbd15fa8b857b9cce05c9cbebe Mon Sep 17 00:00:00 2001
From: Lenz Fiedler <l.fiedler@hzdr.de>
Date: Fri, 25 Oct 2024 15:44:41 +0200
Subject: [PATCH 01/13] Renamed "normal" to "minmax" and fixed docstrings.

---
 docs/source/basic_usage/trainingmodel.rst     | 19 ++++--
 examples/advanced/ex01_checkpoint_training.py |  2 +-
 examples/advanced/ex03_tensor_board.py        | 14 +++-
 ..._checkpoint_hyperparameter_optimization.py |  2 +-
 ...distributed_hyperparameter_optimization.py |  2 +-
 ...07_advanced_hyperparameter_optimization.py |  2 +-
 examples/basic/ex01_train_network.py          |  2 +-
 .../basic/ex04_hyperparameter_optimization.py |  2 +-
 mala/common/parameters.py                     | 48 ++++++++-----
 mala/datahandling/data_scaler.py              | 67 ++++++++++++-------
 test/all_lazy_loading_test.py                 | 14 ++--
 test/basic_gpu_test.py                        |  2 +-
 test/checkpoint_hyperopt_test.py              |  2 +-
 test/checkpoint_training_test.py              |  2 +-
 test/complete_interfaces_test.py              |  2 +-
 test/hyperopt_test.py                         | 10 +--
 test/scaling_test.py                          |  4 +-
 test/shuffling_test.py                        |  8 +--
 test/workflow_test.py                         |  2 +-
 19 files changed, 129 insertions(+), 77 deletions(-)

diff --git a/docs/source/basic_usage/trainingmodel.rst b/docs/source/basic_usage/trainingmodel.rst
index e6bc8c967..bfb157c9a 100644
--- a/docs/source/basic_usage/trainingmodel.rst
+++ b/docs/source/basic_usage/trainingmodel.rst
@@ -28,7 +28,7 @@ options to train a simple network with example data, namely
             parameters = mala.Parameters()
 
             parameters.data.input_rescaling_type = "feature-wise-standard"
-            parameters.data.output_rescaling_type = "normal"
+            parameters.data.output_rescaling_type = "minmax"
 
             parameters.network.layer_activations = ["ReLU"]
 
@@ -43,15 +43,22 @@ sub-objects dealing with the individual aspects of the workflow. In the first
 two lines, which data scaling MALA should employ. Scaling data greatly
 improves the performance of NN based ML models. Options are
 
-* ``None``: No normalization is applied.
+* ``None``: No scaling is applied.
 
-* ``standard``: Standardization (Scale to mean 0, standard deviation 1)
+* ``standard``: Standardization (Scale to mean 0, standard deviation 1) is
+  applied to the entire array.
 
-* ``normal``: Min-Max scaling (Scale to be in range 0...1)
+* ``minmax``: Min-Max scaling (Scale to be in range 0...1) is applied to the entire array.
 
-* ``feature-wise-standard``: Row Standardization (Scale to mean 0, standard deviation 1)
+* ``feature-wise-standard``: Standardization (Scale to mean 0, standard
+  deviation 1) is applied to each feature dimension individually. I.e., if your
+  training data has dimensions (x,y,z,f), then each of the f rows with (x,y,z)
+  entries is scaled indiviually.
 
-* ``feature-wise-normal``: Row Min-Max scaling (Scale to be in range 0...1)
+* ``feature-wise-minmax``: Min-Max scaling (Scale to be in range 0...1) is
+  applied to each feature dimension individually. I.e., if your training data
+  has dimensions (x,y,z,f), then each of the f rows with (x,y,z) entries is
+  scaled indiviually.
 
 Here, we specify that MALA should standardize the input (=descriptors)
 by feature (i.e., each entry of the vector separately on the grid) and
diff --git a/examples/advanced/ex01_checkpoint_training.py b/examples/advanced/ex01_checkpoint_training.py
index 5222a5232..af8ee5687 100644
--- a/examples/advanced/ex01_checkpoint_training.py
+++ b/examples/advanced/ex01_checkpoint_training.py
@@ -21,7 +21,7 @@ def initial_setup():
     parameters = mala.Parameters()
     parameters.data.data_splitting_type = "by_snapshot"
     parameters.data.input_rescaling_type = "feature-wise-standard"
-    parameters.data.output_rescaling_type = "normal"
+    parameters.data.output_rescaling_type = "minmax"
     parameters.network.layer_activations = ["ReLU"]
     parameters.running.max_number_epochs = 9
     parameters.running.mini_batch_size = 8
diff --git a/examples/advanced/ex03_tensor_board.py b/examples/advanced/ex03_tensor_board.py
index 97bc781cf..cf1e884a7 100644
--- a/examples/advanced/ex03_tensor_board.py
+++ b/examples/advanced/ex03_tensor_board.py
@@ -13,7 +13,7 @@
 
 parameters = mala.Parameters()
 parameters.data.input_rescaling_type = "feature-wise-standard"
-parameters.data.output_rescaling_type = "normal"
+parameters.data.output_rescaling_type = "minmax"
 parameters.targets.ldos_gridsize = 11
 parameters.targets.ldos_gridspacing_ev = 2.5
 parameters.targets.ldos_gridoffset_ev = -5
@@ -32,11 +32,19 @@
 
 data_handler = mala.DataHandler(parameters)
 data_handler.add_snapshot(
-    "Be_snapshot0.in.npy", data_path, "Be_snapshot0.out.npy", data_path, "tr",
+    "Be_snapshot0.in.npy",
+    data_path,
+    "Be_snapshot0.out.npy",
+    data_path,
+    "tr",
     calculation_output_file=os.path.join(data_path, "Be_snapshot0.out"),
 )
 data_handler.add_snapshot(
-    "Be_snapshot1.in.npy", data_path, "Be_snapshot1.out.npy", data_path, "va",
+    "Be_snapshot1.in.npy",
+    data_path,
+    "Be_snapshot1.out.npy",
+    data_path,
+    "va",
     calculation_output_file=os.path.join(data_path, "Be_snapshot1.out"),
 )
 data_handler.prepare_data()
diff --git a/examples/advanced/ex05_checkpoint_hyperparameter_optimization.py b/examples/advanced/ex05_checkpoint_hyperparameter_optimization.py
index 99a92fa35..7680c7a91 100644
--- a/examples/advanced/ex05_checkpoint_hyperparameter_optimization.py
+++ b/examples/advanced/ex05_checkpoint_hyperparameter_optimization.py
@@ -17,7 +17,7 @@
 def initial_setup():
     parameters = mala.Parameters()
     parameters.data.input_rescaling_type = "feature-wise-standard"
-    parameters.data.output_rescaling_type = "normal"
+    parameters.data.output_rescaling_type = "minmax"
     parameters.running.max_number_epochs = 10
     parameters.running.mini_batch_size = 40
     parameters.running.learning_rate = 0.00001
diff --git a/examples/advanced/ex06_distributed_hyperparameter_optimization.py b/examples/advanced/ex06_distributed_hyperparameter_optimization.py
index 215dd1ab2..4a6e42f9b 100644
--- a/examples/advanced/ex06_distributed_hyperparameter_optimization.py
+++ b/examples/advanced/ex06_distributed_hyperparameter_optimization.py
@@ -24,7 +24,7 @@
 parameters = mala.Parameters()
 # Specify the data scaling.
 parameters.data.input_rescaling_type = "feature-wise-standard"
-parameters.data.output_rescaling_type = "normal"
+parameters.data.output_rescaling_type = "minmax"
 parameters.running.max_number_epochs = 5
 parameters.running.mini_batch_size = 40
 parameters.running.learning_rate = 0.00001
diff --git a/examples/advanced/ex07_advanced_hyperparameter_optimization.py b/examples/advanced/ex07_advanced_hyperparameter_optimization.py
index 242ffd7dd..0072ed3a0 100644
--- a/examples/advanced/ex07_advanced_hyperparameter_optimization.py
+++ b/examples/advanced/ex07_advanced_hyperparameter_optimization.py
@@ -17,7 +17,7 @@ def optimize_hyperparameters(hyper_optimizer):
 
     parameters = mala.Parameters()
     parameters.data.input_rescaling_type = "feature-wise-standard"
-    parameters.data.output_rescaling_type = "normal"
+    parameters.data.output_rescaling_type = "minmax"
     parameters.running.max_number_epochs = 10
     parameters.running.mini_batch_size = 40
     parameters.running.learning_rate = 0.00001
diff --git a/examples/basic/ex01_train_network.py b/examples/basic/ex01_train_network.py
index 1eca8c6b7..c7a5ca782 100644
--- a/examples/basic/ex01_train_network.py
+++ b/examples/basic/ex01_train_network.py
@@ -20,7 +20,7 @@
 # Specify the data scaling. For regular bispectrum and LDOS data,
 # these have proven successful.
 parameters.data.input_rescaling_type = "feature-wise-standard"
-parameters.data.output_rescaling_type = "normal"
+parameters.data.output_rescaling_type = "minmax"
 # Specify the used activation function.
 parameters.network.layer_activations = ["ReLU"]
 # Specify the training parameters.
diff --git a/examples/basic/ex04_hyperparameter_optimization.py b/examples/basic/ex04_hyperparameter_optimization.py
index cebb4c42e..3160206c3 100644
--- a/examples/basic/ex04_hyperparameter_optimization.py
+++ b/examples/basic/ex04_hyperparameter_optimization.py
@@ -19,7 +19,7 @@
 ####################
 parameters = mala.Parameters()
 parameters.data.input_rescaling_type = "feature-wise-standard"
-parameters.data.output_rescaling_type = "normal"
+parameters.data.output_rescaling_type = "minmax"
 parameters.running.max_number_epochs = 20
 parameters.running.mini_batch_size = 40
 parameters.running.optimizer = "Adam"
diff --git a/mala/common/parameters.py b/mala/common/parameters.py
index 28840ebec..5b415e9d7 100644
--- a/mala/common/parameters.py
+++ b/mala/common/parameters.py
@@ -573,27 +573,45 @@ class ParametersData(ParametersBase):
         Specifies how input quantities are normalized.
         Options:
 
-            - "None": No normalization is applied.
-            - "standard": Standardization (Scale to mean 0, standard
-              deviation 1)
-            - "normal": Min-Max scaling (Scale to be in range 0...1)
-            - "feature-wise-standard": Row Standardization (Scale to mean 0,
-              standard deviation 1)
-            - "feature-wise-normal": Row Min-Max scaling (Scale to be in range
-              0...1)
+            - "None": No scaling is applied.
+            - "standard": Standardization (Scale to mean 0,
+              standard deviation 1) is applied to the entire array.
+            - "minmax": Min-Max scaling (Scale to be in range 0...1) is applied
+              to the entire array.
+            - "feature-wise-standard": Standardization (Scale to mean 0,
+              standard deviation 1) is applied to each feature dimension
+              individually. I.e., if your training data has dimensions
+              (x,y,z,f), then each of the f rows with (x,y,z) entries is scaled
+               indiviually.
+            - "feature-wise-minmax": Row Min-Max scaling (Scale to be in range
+              0...1) is applied to each feature dimension individually.
+              I.e., if your training data has dimensions (x,y,z,f), then each
+              of the f rows with (x,y,z) entries is scaled indiviually.
+            - "normal": (DEPRECATED) Old name for "minmax".
+            - "feature-wise-normal": (DEPRECATED) Old name for
+              "feature-wise-minmax"
 
     output_rescaling_type : string
         Specifies how output quantities are normalized.
         Options:
 
-            - "None": No normalization is applied.
+            - "None": No scaling is applied.
             - "standard": Standardization (Scale to mean 0,
-              standard deviation 1)
-            - "normal": Min-Max scaling (Scale to be in range 0...1)
-            - "feature-wise-standard": Row Standardization (Scale to mean 0,
-              standard deviation 1)
-            - "feature-wise-normal": Row Min-Max scaling (Scale to be in
-              range 0...1)
+              standard deviation 1) is applied to the entire array.
+            - "minmax": Min-Max scaling (Scale to be in range 0...1) is applied
+              to the entire array.
+            - "feature-wise-standard": Standardization (Scale to mean 0,
+              standard deviation 1) is applied to each feature dimension
+              individually. I.e., if your training data has dimensions
+              (x,y,z,f), then each of the f rows with (x,y,z) entries is scaled
+               indiviually.
+            - "feature-wise-minmax": Row Min-Max scaling (Scale to be in range
+              0...1) is applied to each feature dimension individually.
+              I.e., if your training data has dimensions (x,y,z,f), then each
+              of the f rows with (x,y,z) entries is scaled indiviually.
+            - "normal": (DEPRECATED) Old name for "minmax".
+            - "feature-wise-normal": (DEPRECATED) Old name for
+              "feature-wise-minmax"
 
     use_lazy_loading : bool
         If True, data is lazily loaded, i.e. only the snapshots that are
diff --git a/mala/datahandling/data_scaler.py b/mala/datahandling/data_scaler.py
index e3c8a5328..b9867f201 100644
--- a/mala/datahandling/data_scaler.py
+++ b/mala/datahandling/data_scaler.py
@@ -6,6 +6,7 @@
 import torch.distributed as dist
 
 from mala.common.parameters import printout
+from mala.common.parallelizer import parallel_warn
 
 
 class DataScaler:
@@ -20,14 +21,23 @@ class DataScaler:
         Specifies how scaling should be performed.
         Options:
 
-        - "None": No normalization is applied.
+        - "None": No scaling is applied.
         - "standard": Standardization (Scale to mean 0,
-          standard deviation 1)
-        - "normal": Min-Max scaling (Scale to be in range 0...1)
-        - "feature-wise-standard": Row Standardization (Scale to mean 0,
-          standard deviation 1)
-        - "feature-wise-normal": Row Min-Max scaling (Scale to be in range
-          0...1)
+          standard deviation 1) is applied to the entire array.
+        - "minmax": Min-Max scaling (Scale to be in range 0...1) is applied
+          to the entire array.
+        - "feature-wise-standard": Standardization (Scale to mean 0,
+          standard deviation 1) is applied to each feature dimension
+          individually.
+          I.e., if your training data has dimensions (x,y,z,f), then each
+          of the f rows with (x,y,z) entries is scaled indiviually.
+        - "feature-wise-minmax": Min-Max scaling (Scale to be in range
+          0...1) is applied to each feature dimension individually.
+          I.e., if your training data has dimensions (x,y,z,f), then each
+          of the f rows with (x,y,z) entries is scaled indiviually.
+        - "normal": (DEPRECATED) Old name for "minmax".
+        - "feature-wise-normal": (DEPRECATED) Old name for
+          "feature-wise-minmax"
 
     use_ddp : bool
         If True, the DataScaler will use ddp to check that data is
@@ -38,7 +48,7 @@ def __init__(self, typestring, use_ddp=False):
         self.use_ddp = use_ddp
         self.typestring = typestring
         self.scale_standard = False
-        self.scale_normal = False
+        self.scale_minmax = False
         self.feature_wise = False
         self.cantransform = False
         self.__parse_typestring()
@@ -57,20 +67,29 @@ def __init__(self, typestring, use_ddp=False):
     def __parse_typestring(self):
         """Parse the typestring to class attributes."""
         self.scale_standard = False
-        self.scale_normal = False
+        self.scale_minmax = False
         self.feature_wise = False
 
         if "standard" in self.typestring:
             self.scale_standard = True
         if "normal" in self.typestring:
-            self.scale_normal = True
+            parallel_warn(
+                "Options 'normal' and 'feature-wise-normal' will be "
+                "deprecated, starting in MALA v1.4.0. Please use 'minmax' and "
+                "'feature-wise-minmax' instead.",
+                min_verbosity=0,
+                category=FutureWarning,
+            )
+            self.scale_minmax = True
+        if "minmax" in self.typestring:
+            self.scale_minmax = True
         if "feature-wise" in self.typestring:
             self.feature_wise = True
-        if self.scale_standard is False and self.scale_normal is False:
+        if self.scale_standard is False and self.scale_minmax is False:
             printout("No data rescaling will be performed.", min_verbosity=1)
             self.cantransform = True
             return
-        if self.scale_standard is True and self.scale_normal is True:
+        if self.scale_standard is True and self.scale_minmax is True:
             raise Exception("Invalid input data rescaling.")
 
     def start_incremental_fitting(self):
@@ -93,7 +112,7 @@ def incremental_fit(self, unscaled):
             Data that is to be added to the fit.
 
         """
-        if self.scale_standard is False and self.scale_normal is False:
+        if self.scale_standard is False and self.scale_minmax is False:
             return
         else:
             with torch.no_grad():
@@ -142,7 +161,7 @@ def incremental_fit(self, unscaled):
                             self.stds = new_std
                         self.total_data_count += current_data_count
 
-                    if self.scale_normal:
+                    if self.scale_minmax:
                         new_maxs = torch.max(unscaled, 0, keepdim=True)
                         if list(self.maxs.size())[0] > 0:
                             for i in range(list(new_maxs.values.size())[1]):
@@ -205,7 +224,7 @@ def incremental_fit(self, unscaled):
                         self.total_std = torch.sqrt(self.total_std)
                         self.total_data_count += current_data_count
 
-                    if self.scale_normal:
+                    if self.scale_minmax:
                         new_max = torch.max(unscaled)
                         if new_max > self.total_max:
                             self.total_max = new_max
@@ -232,7 +251,7 @@ def fit(self, unscaled):
             Data that on which the scaling will be calculated.
 
         """
-        if self.scale_standard is False and self.scale_normal is False:
+        if self.scale_standard is False and self.scale_minmax is False:
             return
         else:
             with torch.no_grad():
@@ -246,7 +265,7 @@ def fit(self, unscaled):
                         self.means = torch.mean(unscaled, 0, keepdim=True)
                         self.stds = torch.std(unscaled, 0, keepdim=True)
 
-                    if self.scale_normal:
+                    if self.scale_minmax:
                         self.maxs = torch.max(unscaled, 0, keepdim=True).values
                         self.mins = torch.min(unscaled, 0, keepdim=True).values
 
@@ -260,7 +279,7 @@ def fit(self, unscaled):
                         self.total_mean = torch.mean(unscaled)
                         self.total_std = torch.std(unscaled)
 
-                    if self.scale_normal:
+                    if self.scale_minmax:
                         self.total_max = torch.max(unscaled)
                         self.total_min = torch.min(unscaled)
 
@@ -284,7 +303,7 @@ def transform(self, unscaled):
             Scaled data.
         """
         # First we need to find out if we even have to do anything.
-        if self.scale_standard is False and self.scale_normal is False:
+        if self.scale_standard is False and self.scale_minmax is False:
             pass
 
         elif self.cantransform is False:
@@ -306,7 +325,7 @@ def transform(self, unscaled):
                     unscaled -= self.means
                     unscaled /= self.stds
 
-                if self.scale_normal:
+                if self.scale_minmax:
                     unscaled -= self.mins
                     unscaled /= self.maxs - self.mins
 
@@ -320,7 +339,7 @@ def transform(self, unscaled):
                     unscaled -= self.total_mean
                     unscaled /= self.total_std
 
-                if self.scale_normal:
+                if self.scale_minmax:
                     unscaled -= self.total_min
                     unscaled /= self.total_max - self.total_min
 
@@ -346,7 +365,7 @@ def inverse_transform(self, scaled, as_numpy=False):
 
         """
         # First we need to find out if we even have to do anything.
-        if self.scale_standard is False and self.scale_normal is False:
+        if self.scale_standard is False and self.scale_minmax is False:
             unscaled = scaled
 
         else:
@@ -368,7 +387,7 @@ def inverse_transform(self, scaled, as_numpy=False):
                     if self.scale_standard:
                         unscaled = (scaled * self.stds) + self.means
 
-                    if self.scale_normal:
+                    if self.scale_minmax:
                         unscaled = (
                             scaled * (self.maxs - self.mins)
                         ) + self.mins
@@ -382,7 +401,7 @@ def inverse_transform(self, scaled, as_numpy=False):
                     if self.scale_standard:
                         unscaled = (scaled * self.total_std) + self.total_mean
 
-                    if self.scale_normal:
+                    if self.scale_minmax:
                         unscaled = (
                             scaled * (self.total_max - self.total_min)
                         ) + self.total_min
diff --git a/test/all_lazy_loading_test.py b/test/all_lazy_loading_test.py
index 351c98292..4fcaebaff 100644
--- a/test/all_lazy_loading_test.py
+++ b/test/all_lazy_loading_test.py
@@ -30,7 +30,7 @@ def test_scaling(self):
         ####################
         test_parameters = Parameters()
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.descriptors.bispectrum_twojmax = 11
         test_parameters.targets.ldos_gridsize = 10
@@ -53,9 +53,9 @@ def test_scaling(self):
         training_tester = []
         for scalingtype in [
             "standard",
-            "normal",
+            "minmax",
             "feature-wise-standard",
-            "feature-wise-normal",
+            "feature-wise-minmax",
         ]:
             comparison = [scalingtype]
             for ll_type in [True, False]:
@@ -125,7 +125,7 @@ def test_scaling(self):
                         data_handler.output_data_scaler.total_std
                         / data_handler.nr_training_data
                     )
-                elif scalingtype == "normal":
+                elif scalingtype == "minmax":
                     torch.manual_seed(2002)
                     this_result.append(
                         data_handler.input_data_scaler.total_max
@@ -188,7 +188,7 @@ def test_scaling(self):
                             0
                         ].grid_size
                     )
-                elif scalingtype == "feature-wise-normal":
+                elif scalingtype == "feature-wise-minmax":
                     this_result.append(
                         torch.mean(data_handler.input_data_scaler.maxs)
                     )
@@ -261,7 +261,7 @@ def test_performance_horovod(self):
         ####################
         test_parameters = Parameters()
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.network.layer_activations = ["LeakyReLU"]
         test_parameters.running.max_number_epochs = 20
@@ -391,7 +391,7 @@ def _train_lazy_loading(prefetching):
         test_parameters = Parameters()
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.network.layer_activations = ["ReLU"]
         test_parameters.manual_seed = 1234
         test_parameters.running.max_number_epochs = 100
diff --git a/test/basic_gpu_test.py b/test/basic_gpu_test.py
index 514a70f21..46a44803f 100644
--- a/test/basic_gpu_test.py
+++ b/test/basic_gpu_test.py
@@ -82,7 +82,7 @@ def __run(use_gpu):
 
         # Specify the data scaling.
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
 
         # Specify the used activation function.
         test_parameters.network.layer_activations = ["ReLU"]
diff --git a/test/checkpoint_hyperopt_test.py b/test/checkpoint_hyperopt_test.py
index a1909f21b..3c64ffa71 100644
--- a/test/checkpoint_hyperopt_test.py
+++ b/test/checkpoint_hyperopt_test.py
@@ -61,7 +61,7 @@ def __original_setup(n_trials):
 
         # Specify the data scaling.
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
 
         # Specify the training parameters.
         test_parameters.running.max_number_epochs = 10
diff --git a/test/checkpoint_training_test.py b/test/checkpoint_training_test.py
index 3bc5e83e3..abb2921f0 100644
--- a/test/checkpoint_training_test.py
+++ b/test/checkpoint_training_test.py
@@ -137,7 +137,7 @@ def __original_setup(
 
         # Specify the data scaling.
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
 
         # Specify the used activation function.
         test_parameters.network.layer_activations = ["ReLU"]
diff --git a/test/complete_interfaces_test.py b/test/complete_interfaces_test.py
index 8aa7da85d..1e219830a 100644
--- a/test/complete_interfaces_test.py
+++ b/test/complete_interfaces_test.py
@@ -109,7 +109,7 @@ def test_ase_calculator(self):
         test_parameters = mala.Parameters()
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.network.layer_activations = ["ReLU"]
         test_parameters.running.max_number_epochs = 100
         test_parameters.running.mini_batch_size = 40
diff --git a/test/hyperopt_test.py b/test/hyperopt_test.py
index 77b0b9896..d9f966728 100644
--- a/test/hyperopt_test.py
+++ b/test/hyperopt_test.py
@@ -38,7 +38,7 @@ def test_hyperopt(self):
         test_parameters = mala.Parameters()
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.running.max_number_epochs = 20
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
@@ -129,7 +129,7 @@ def test_distributed_hyperopt(self):
         test_parameters = mala.Parameters()
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.running.max_number_epochs = 5
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
@@ -238,7 +238,7 @@ def test_naswot_eigenvalues(self):
         test_parameters.manual_seed = 1234
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.running.max_number_epochs = 10
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
@@ -306,7 +306,7 @@ def __optimize_hyperparameters(hyper_optimizer):
         test_parameters = mala.Parameters()
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.running.max_number_epochs = 20
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
@@ -387,7 +387,7 @@ def test_hyperopt_optuna_requeue_zombie_trials(self, tmp_path):
         test_parameters = mala.Parameters()
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.running.max_number_epochs = 2
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
diff --git a/test/scaling_test.py b/test/scaling_test.py
index b7925cd9f..bae56cb82 100644
--- a/test/scaling_test.py
+++ b/test/scaling_test.py
@@ -19,8 +19,8 @@ def test_errors_and_accuracy(self):
             "feature-wise-standard",
             "standard",
             "None",
-            "normal",
-            "feature-wise-normal",
+            "minmax",
+            "feature-wise-minmax",
         ]:
             data = np.load(os.path.join(data_path, "Be_snapshot2.out.npy"))
             data = data.astype(np.float32)
diff --git a/test/shuffling_test.py b/test/shuffling_test.py
index 72d28d6ef..0d1c0073c 100644
--- a/test/shuffling_test.py
+++ b/test/shuffling_test.py
@@ -119,7 +119,7 @@ def test_training(self):
         test_parameters = mala.Parameters()
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.network.layer_activations = ["ReLU"]
         test_parameters.running.max_number_epochs = 50
         test_parameters.running.mini_batch_size = 40
@@ -163,7 +163,7 @@ def test_training(self):
         test_parameters.data.shuffling_seed = 1234
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.network.layer_activations = ["ReLU"]
         test_parameters.running.max_number_epochs = 50
         test_parameters.running.mini_batch_size = 40
@@ -215,7 +215,7 @@ def test_training_openpmd(self):
         test_parameters = mala.Parameters()
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.network.layer_activations = ["ReLU"]
         test_parameters.running.max_number_epochs = 50
         test_parameters.running.mini_batch_size = 40
@@ -261,7 +261,7 @@ def test_training_openpmd(self):
         test_parameters.data.shuffling_seed = 1234
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.network.layer_activations = ["ReLU"]
         test_parameters.running.max_number_epochs = 50
         test_parameters.running.mini_batch_size = 40
diff --git a/test/workflow_test.py b/test/workflow_test.py
index 8cc33faf6..e5c1b20da 100644
--- a/test/workflow_test.py
+++ b/test/workflow_test.py
@@ -523,7 +523,7 @@ def __simple_training(
         test_parameters = mala.Parameters()
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.network.layer_activations = ["ReLU"]
         test_parameters.running.max_number_epochs = 400
         test_parameters.running.mini_batch_size = 40

From 4312c07650e7b4a57af7b2fe3ecd660c90cda52d Mon Sep 17 00:00:00 2001
From: Lenz Fiedler <l.fiedler@hzdr.de>
Date: Fri, 25 Oct 2024 16:13:39 +0200
Subject: [PATCH 02/13] Made DataScaler API consistent with sklearn

---
 mala/datahandling/data_scaler.py | 64 ++++++++++++++++++++++----------
 1 file changed, 44 insertions(+), 20 deletions(-)

diff --git a/mala/datahandling/data_scaler.py b/mala/datahandling/data_scaler.py
index b9867f201..58dc1d294 100644
--- a/mala/datahandling/data_scaler.py
+++ b/mala/datahandling/data_scaler.py
@@ -285,7 +285,7 @@ def fit(self, unscaled):
 
         self.cantransform = True
 
-    def transform(self, unscaled):
+    def transform(self, unscaled, copy=False):
         """
         Transform data from unscaled to scaled.
 
@@ -297,11 +297,19 @@ def transform(self, unscaled):
         unscaled : torch.Tensor
             Real world data.
 
+        copy : bool
+            If False, data is modified in-place. If True, a copy of the
+            data is modified. Default is False.
+
         Returns
         -------
         scaled : torch.Tensor
             Scaled data.
         """
+        # Backward compatability.
+        if not hasattr(self, "scale_minmax") and hasattr(self, "scale_normal"):
+            self.scale_minmax = self.scale_normal
+
         # First we need to find out if we even have to do anything.
         if self.scale_standard is False and self.scale_minmax is False:
             pass
@@ -314,6 +322,8 @@ def transform(self, unscaled):
 
         # Perform the actual scaling, but use no_grad to make sure
         # that the next couple of iterations stay untracked.
+        scaled = unscaled if copy is False else unscaled.clone()
+
         with torch.no_grad():
             if self.feature_wise:
 
@@ -322,12 +332,12 @@ def transform(self, unscaled):
                 ##########################
 
                 if self.scale_standard:
-                    unscaled -= self.means
-                    unscaled /= self.stds
+                    scaled -= self.means
+                    scaled /= self.stds
 
                 if self.scale_minmax:
-                    unscaled -= self.mins
-                    unscaled /= self.maxs - self.mins
+                    scaled -= self.mins
+                    scaled /= self.maxs - self.mins
 
             else:
 
@@ -336,14 +346,16 @@ def transform(self, unscaled):
                 ##########################
 
                 if self.scale_standard:
-                    unscaled -= self.total_mean
-                    unscaled /= self.total_std
+                    scaled -= self.total_mean
+                    scaled /= self.total_std
 
                 if self.scale_minmax:
-                    unscaled -= self.total_min
-                    unscaled /= self.total_max - self.total_min
+                    scaled -= self.total_min
+                    scaled /= self.total_max - self.total_min
 
-    def inverse_transform(self, scaled, as_numpy=False):
+        return scaled
+
+    def inverse_transform(self, scaled, copy=False, as_numpy=False):
         """
         Transform data from scaled to unscaled.
 
@@ -356,7 +368,11 @@ def inverse_transform(self, scaled, as_numpy=False):
             Scaled data.
 
         as_numpy : bool
-            If True, a numpy array is returned, otherwsie.
+            If True, a numpy array is returned, otherwise a torch tensor.
+
+        copy : bool
+            If False, data is modified in-place. If True, a copy of the
+            data is modified. Default is False.
 
         Returns
         -------
@@ -364,9 +380,17 @@ def inverse_transform(self, scaled, as_numpy=False):
             Real world data.
 
         """
+        # Backward compatability.
+        if not hasattr(self, "scale_minmax") and hasattr(self, "scale_normal"):
+            self.scale_minmax = self.scale_normal
+
+        # Perform the actual scaling, but use no_grad to make sure
+        # that the next couple of iterations stay untracked.
+        unscaled = scaled if copy is False else scaled.clone()
+
         # First we need to find out if we even have to do anything.
         if self.scale_standard is False and self.scale_minmax is False:
-            unscaled = scaled
+            pass
 
         else:
             if self.cantransform is False:
@@ -385,12 +409,12 @@ def inverse_transform(self, scaled, as_numpy=False):
                     ##########################
 
                     if self.scale_standard:
-                        unscaled = (scaled * self.stds) + self.means
+                        unscaled *= self.stds
+                        unscaled += self.means
 
                     if self.scale_minmax:
-                        unscaled = (
-                            scaled * (self.maxs - self.mins)
-                        ) + self.mins
+                        unscaled *= self.maxs - self.mins
+                        unscaled += self.mins
 
                 else:
 
@@ -399,12 +423,12 @@ def inverse_transform(self, scaled, as_numpy=False):
                     ##########################
 
                     if self.scale_standard:
-                        unscaled = (scaled * self.total_std) + self.total_mean
+                        unscaled *= self.total_std
+                        unscaled += self.total_mean
 
                     if self.scale_minmax:
-                        unscaled = (
-                            scaled * (self.total_max - self.total_min)
-                        ) + self.total_min
+                        unscaled *= self.total_max - self.total_min
+                        unscaled += self.total_min
         #
         if as_numpy:
             return unscaled.detach().numpy().astype(np.float64)

From f31f9b9fef0753881583e1f9dad473c071820e48 Mon Sep 17 00:00:00 2001
From: Lenz Fiedler <l.fiedler@hzdr.de>
Date: Fri, 25 Oct 2024 16:22:35 +0200
Subject: [PATCH 03/13] Made interface more consistent with sklearn

---
 mala/datahandling/data_scaler.py |  2 +-
 test/scaling_test.py             | 34 ++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/mala/datahandling/data_scaler.py b/mala/datahandling/data_scaler.py
index 58dc1d294..c2ae8cd7e 100644
--- a/mala/datahandling/data_scaler.py
+++ b/mala/datahandling/data_scaler.py
@@ -429,7 +429,7 @@ def inverse_transform(self, scaled, copy=False, as_numpy=False):
                     if self.scale_minmax:
                         unscaled *= self.total_max - self.total_min
                         unscaled += self.total_min
-        #
+
         if as_numpy:
             return unscaled.detach().numpy().astype(np.float64)
         else:
diff --git a/test/scaling_test.py b/test/scaling_test.py
index bae56cb82..eed0c201f 100644
--- a/test/scaling_test.py
+++ b/test/scaling_test.py
@@ -43,3 +43,37 @@ def test_errors_and_accuracy(self):
             transformed = scaler.inverse_transform(transformed)
             relative_error = torch.sum(np.abs((data2 - transformed) / data2))
             assert relative_error < desired_accuracy
+
+    def test_array_referencing(self):
+        # Asserts that even with the new in-place scaling, data is referenced
+        # and not copied (unless that is explicitly asked)
+
+        for scaling in [
+            "feature-wise-standard",
+            "standard",
+            "None",
+            "minmax",
+            "feature-wise-minmax",
+        ]:
+            data = np.load(os.path.join(data_path, "Be_snapshot2.in.npy"))
+            data = data.astype(np.float32)
+            data = data.reshape(
+                [np.prod(np.shape(data)[0:3]), np.shape(data)[3]]
+            )
+            data = torch.from_numpy(data).float()
+
+            scaler = mala.DataScaler(scaling)
+            scaler.fit(data)
+
+            numpy_array = np.expand_dims(np.random.random(94), axis=0)
+            test_data = torch.from_numpy(numpy_array)
+            scaler.transform(test_data)
+            scaler.inverse_transform(test_data)
+            numpy_array *= 2
+            assert np.isclose(
+                np.sum(
+                    test_data.detach().numpy().astype(np.float64) - numpy_array
+                ),
+                0.0,
+                rtol=1e-16,
+            )

From dc6a8ff491128876abddb4fa5dc722d3abca7b6e Mon Sep 17 00:00:00 2001
From: Lenz Fiedler <l.fiedler@hzdr.de>
Date: Fri, 25 Oct 2024 16:28:25 +0200
Subject: [PATCH 04/13] Also made partial_fit consistent with the sklearn, but
 have to test this in the CI to check that nothing breaks

---
 mala/datahandling/data_handler.py | 11 ++++-------
 mala/datahandling/data_scaler.py  | 12 +++---------
 2 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/mala/datahandling/data_handler.py b/mala/datahandling/data_handler.py
index 7b8fc2a43..e4bcb3dfe 100644
--- a/mala/datahandling/data_handler.py
+++ b/mala/datahandling/data_handler.py
@@ -130,6 +130,8 @@ def clear_data(self):
         self.nr_training_snapshots = 0
         self.nr_test_snapshots = 0
         self.nr_validation_snapshots = 0
+        self.input_data_scaler.reset()
+        self.output_data_scaler.reset()
         super(DataHandler, self).clear_data()
 
     # Preparing data
@@ -815,7 +817,6 @@ def __parametrize_scalers(self):
         # scaling. This should save some performance.
 
         if self.parameters.use_lazy_loading:
-            self.input_data_scaler.start_incremental_fitting()
             # We need to perform the data scaling over the entirety of the
             # training data.
             for snapshot in self.parameters.snapshot_directories_list:
@@ -853,9 +854,7 @@ def __parametrize_scalers(self):
                         [snapshot.grid_size, self.input_dimension]
                     )
                     tmp = torch.from_numpy(tmp).float()
-                    self.input_data_scaler.incremental_fit(tmp)
-
-            self.input_data_scaler.finish_incremental_fitting()
+                    self.input_data_scaler.partial_fit(tmp)
 
         else:
             self.__load_data("training", "inputs")
@@ -876,7 +875,6 @@ def __parametrize_scalers(self):
 
         if self.parameters.use_lazy_loading:
             i = 0
-            self.output_data_scaler.start_incremental_fitting()
             # We need to perform the data scaling over the entirety of the
             # training data.
             for snapshot in self.parameters.snapshot_directories_list:
@@ -912,9 +910,8 @@ def __parametrize_scalers(self):
                         [snapshot.grid_size, self.output_dimension]
                     )
                     tmp = torch.from_numpy(tmp).float()
-                    self.output_data_scaler.incremental_fit(tmp)
+                    self.output_data_scaler.partial_fit(tmp)
                 i += 1
-            self.output_data_scaler.finish_incremental_fitting()
 
         else:
             self.__load_data("training", "outputs")
diff --git a/mala/datahandling/data_scaler.py b/mala/datahandling/data_scaler.py
index c2ae8cd7e..9e34fecb6 100644
--- a/mala/datahandling/data_scaler.py
+++ b/mala/datahandling/data_scaler.py
@@ -92,7 +92,7 @@ def __parse_typestring(self):
         if self.scale_standard is True and self.scale_minmax is True:
             raise Exception("Invalid input data rescaling.")
 
-    def start_incremental_fitting(self):
+    def reset(self):
         """
         Start the incremental calculation of scaling parameters.
 
@@ -100,7 +100,7 @@ def start_incremental_fitting(self):
         """
         self.total_data_count = 0
 
-    def incremental_fit(self, unscaled):
+    def partial_fit(self, unscaled):
         """
         Add data to the incremental calculation of scaling parameters.
 
@@ -113,6 +113,7 @@ def incremental_fit(self, unscaled):
 
         """
         if self.scale_standard is False and self.scale_minmax is False:
+            self.cantransform = True
             return
         else:
             with torch.no_grad():
@@ -232,13 +233,6 @@ def incremental_fit(self, unscaled):
                         new_min = torch.min(unscaled)
                         if new_min < self.total_min:
                             self.total_min = new_min
-
-    def finish_incremental_fitting(self):
-        """
-        Indicate that all data has been added to the incremental calculation.
-
-        This is necessary for lazy loading.
-        """
         self.cantransform = True
 
     def fit(self, unscaled):

From 30768ee4bf38ce8a975ca443e3f0738dcf57705e Mon Sep 17 00:00:00 2001
From: Lenz Fiedler <l.fiedler@hzdr.de>
Date: Tue, 29 Oct 2024 11:32:12 +0100
Subject: [PATCH 05/13] Fixed docs

---
 mala/common/parameters.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mala/common/parameters.py b/mala/common/parameters.py
index 5b415e9d7..63bda2c1b 100644
--- a/mala/common/parameters.py
+++ b/mala/common/parameters.py
@@ -582,7 +582,7 @@ class ParametersData(ParametersBase):
               standard deviation 1) is applied to each feature dimension
               individually. I.e., if your training data has dimensions
               (x,y,z,f), then each of the f rows with (x,y,z) entries is scaled
-               indiviually.
+              indiviually.
             - "feature-wise-minmax": Row Min-Max scaling (Scale to be in range
               0...1) is applied to each feature dimension individually.
               I.e., if your training data has dimensions (x,y,z,f), then each
@@ -604,7 +604,7 @@ class ParametersData(ParametersBase):
               standard deviation 1) is applied to each feature dimension
               individually. I.e., if your training data has dimensions
               (x,y,z,f), then each of the f rows with (x,y,z) entries is scaled
-               indiviually.
+              indiviually.
             - "feature-wise-minmax": Row Min-Max scaling (Scale to be in range
               0...1) is applied to each feature dimension individually.
               I.e., if your training data has dimensions (x,y,z,f), then each

From 5765b48b869146b9a78f27be46f8aa8dac45e298 Mon Sep 17 00:00:00 2001
From: Lenz Fiedler <37868410+RandomDefaultUser@users.noreply.github.com>
Date: Thu, 14 Nov 2024 17:26:30 +0100
Subject: [PATCH 06/13] Update mala/datahandling/data_scaler.py

Co-authored-by: Steve Schmerler <git@elcorto.com>
---
 mala/datahandling/data_scaler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mala/datahandling/data_scaler.py b/mala/datahandling/data_scaler.py
index 9e34fecb6..86852161d 100644
--- a/mala/datahandling/data_scaler.py
+++ b/mala/datahandling/data_scaler.py
@@ -316,7 +316,7 @@ def transform(self, unscaled, copy=False):
 
         # Perform the actual scaling, but use no_grad to make sure
         # that the next couple of iterations stay untracked.
-        scaled = unscaled if copy is False else unscaled.clone()
+        scaled = unscaled.clone() if copy else unscaled
 
         with torch.no_grad():
             if self.feature_wise:

From 1ddd3c79b3d3287926689bf723076d1c502f948a Mon Sep 17 00:00:00 2001
From: Lenz Fiedler <37868410+RandomDefaultUser@users.noreply.github.com>
Date: Thu, 14 Nov 2024 17:26:54 +0100
Subject: [PATCH 07/13] Update mala/datahandling/data_scaler.py

Co-authored-by: Steve Schmerler <git@elcorto.com>
---
 mala/datahandling/data_scaler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mala/datahandling/data_scaler.py b/mala/datahandling/data_scaler.py
index 86852161d..7483433a4 100644
--- a/mala/datahandling/data_scaler.py
+++ b/mala/datahandling/data_scaler.py
@@ -380,7 +380,7 @@ def inverse_transform(self, scaled, copy=False, as_numpy=False):
 
         # Perform the actual scaling, but use no_grad to make sure
         # that the next couple of iterations stay untracked.
-        unscaled = scaled if copy is False else scaled.clone()
+        unscaled = scaled.clone() if copy else scaled
 
         # First we need to find out if we even have to do anything.
         if self.scale_standard is False and self.scale_minmax is False:

From c0f80ff6c92e1b7aa459bbc538f5e8bb2ff7ad4d Mon Sep 17 00:00:00 2001
From: Lenz Fiedler <l.fiedler@hzdr.de>
Date: Thu, 14 Nov 2024 17:43:38 +0100
Subject: [PATCH 08/13] Fixed dimensions as given in docstrings, added array
 check in DataScaler

---
 mala/datahandling/data_scaler.py | 44 ++++++++++++++++++++++++++++----
 test/scaling_test.py             |  6 ++---
 2 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/mala/datahandling/data_scaler.py b/mala/datahandling/data_scaler.py
index 9e34fecb6..2505495fe 100644
--- a/mala/datahandling/data_scaler.py
+++ b/mala/datahandling/data_scaler.py
@@ -13,7 +13,9 @@ class DataScaler:
     """Scales input and output data.
 
     Sort of emulates the functionality of the scikit-learn library, but by
-    implementing the class by ourselves we have more freedom.
+    implementing the class by ourselves we have more freedom. Specifically
+    assumes data of the form (d,f), where d=x*y*z, i.e., the product of spatial
+    dimensions, and f is the feature dimension.
 
     Parameters
     ----------
@@ -29,12 +31,12 @@ class DataScaler:
         - "feature-wise-standard": Standardization (Scale to mean 0,
           standard deviation 1) is applied to each feature dimension
           individually.
-          I.e., if your training data has dimensions (x,y,z,f), then each
-          of the f rows with (x,y,z) entries is scaled indiviually.
+          I.e., if your training data has dimensions (d,f), then each
+          of the f rows with d entries is scaled indiviually.
         - "feature-wise-minmax": Min-Max scaling (Scale to be in range
           0...1) is applied to each feature dimension individually.
-          I.e., if your training data has dimensions (x,y,z,f), then each
-          of the f rows with (x,y,z) entries is scaled indiviually.
+          I.e., if your training data has dimensions (d,f), then each
+          of the f rows with d entries is scaled indiviually.
         - "normal": (DEPRECATED) Old name for "minmax".
         - "feature-wise-normal": (DEPRECATED) Old name for
           "feature-wise-minmax"
@@ -112,6 +114,14 @@ def partial_fit(self, unscaled):
             Data that is to be added to the fit.
 
         """
+        if len(unscaled.size()) != 2:
+            raise ValueError(
+                "MALA DataScaler are designed for 2D-arrays, "
+                "while a {0}D-array has been provided.".format(
+                    len(unscaled.size())
+                )
+            )
+
         if self.scale_standard is False and self.scale_minmax is False:
             self.cantransform = True
             return
@@ -245,6 +255,14 @@ def fit(self, unscaled):
             Data that on which the scaling will be calculated.
 
         """
+        if len(unscaled.size()) != 2:
+            raise ValueError(
+                "MALA DataScaler are designed for 2D-arrays, "
+                "while a {0}D-array has been provided.".format(
+                    len(unscaled.size())
+                )
+            )
+
         if self.scale_standard is False and self.scale_minmax is False:
             return
         else:
@@ -300,6 +318,14 @@ def transform(self, unscaled, copy=False):
         scaled : torch.Tensor
             Scaled data.
         """
+        if len(unscaled.size()) != 2:
+            raise ValueError(
+                "MALA DataScaler are designed for 2D-arrays, "
+                "while a {0}D-array has been provided.".format(
+                    len(unscaled.size())
+                )
+            )
+
         # Backward compatability.
         if not hasattr(self, "scale_minmax") and hasattr(self, "scale_normal"):
             self.scale_minmax = self.scale_normal
@@ -374,6 +400,14 @@ def inverse_transform(self, scaled, copy=False, as_numpy=False):
             Real world data.
 
         """
+        if len(scaled.size()) != 2:
+            raise ValueError(
+                "MALA DataScaler are designed for 2D-arrays, "
+                "while a {0}D-array has been provided.".format(
+                    len(scaled.size())
+                )
+            )
+
         # Backward compatability.
         if not hasattr(self, "scale_minmax") and hasattr(self, "scale_normal"):
             self.scale_minmax = self.scale_normal
diff --git a/test/scaling_test.py b/test/scaling_test.py
index eed0c201f..8f5fa4fb4 100644
--- a/test/scaling_test.py
+++ b/test/scaling_test.py
@@ -57,9 +57,9 @@ def test_array_referencing(self):
         ]:
             data = np.load(os.path.join(data_path, "Be_snapshot2.in.npy"))
             data = data.astype(np.float32)
-            data = data.reshape(
-                [np.prod(np.shape(data)[0:3]), np.shape(data)[3]]
-            )
+            # data = data.reshape(
+            #     [np.prod(np.shape(data)[0:3]), np.shape(data)[3]]
+            # )
             data = torch.from_numpy(data).float()
 
             scaler = mala.DataScaler(scaling)

From c1dce0c7c73a4ff5922f4f85a79c7e71d33a1d82 Mon Sep 17 00:00:00 2001
From: Lenz Fiedler <l.fiedler@hzdr.de>
Date: Tue, 19 Nov 2024 13:23:53 +0100
Subject: [PATCH 09/13] Fixed pipeline

---
 mala/datahandling/data_handler.py | 12 ++++++------
 test/scaling_test.py              |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/mala/datahandling/data_handler.py b/mala/datahandling/data_handler.py
index e4bcb3dfe..9f63734fd 100644
--- a/mala/datahandling/data_handler.py
+++ b/mala/datahandling/data_handler.py
@@ -305,7 +305,10 @@ def get_snapshot_calculation_output(self, snapshot_number):
     ######################
 
     def raw_numpy_to_converted_scaled_tensor(
-        self, numpy_array, data_type, units, convert3Dto1D=False
+        self,
+        numpy_array,
+        data_type,
+        units,
     ):
         """
         Transform a raw numpy array into a scaled torch tensor.
@@ -322,9 +325,6 @@ def raw_numpy_to_converted_scaled_tensor(
             processed.
         units : string
             Units of the data that is processed.
-        convert3Dto1D : bool
-            If True (default: False), then a (x,y,z,dim) array is transformed
-            into a (x*y*z,dim) array.
 
         Returns
         -------
@@ -343,12 +343,12 @@ def raw_numpy_to_converted_scaled_tensor(
         )
 
         # If desired, the dimensions can be changed.
-        if convert3Dto1D:
+        if len(np.shape(numpy_array)) == 4:
             if data_type == "in":
                 data_dimension = self.input_dimension
             else:
                 data_dimension = self.output_dimension
-            grid_size = np.prod(numpy_array[0:3])
+            grid_size = np.prod(np.shape(numpy_array)[0:3])
             desired_dimensions = [grid_size, data_dimension]
         else:
             desired_dimensions = None
diff --git a/test/scaling_test.py b/test/scaling_test.py
index 8f5fa4fb4..eed0c201f 100644
--- a/test/scaling_test.py
+++ b/test/scaling_test.py
@@ -57,9 +57,9 @@ def test_array_referencing(self):
         ]:
             data = np.load(os.path.join(data_path, "Be_snapshot2.in.npy"))
             data = data.astype(np.float32)
-            # data = data.reshape(
-            #     [np.prod(np.shape(data)[0:3]), np.shape(data)[3]]
-            # )
+            data = data.reshape(
+                [np.prod(np.shape(data)[0:3]), np.shape(data)[3]]
+            )
             data = torch.from_numpy(data).float()
 
             scaler = mala.DataScaler(scaling)

From d7087d3ed9220062d7ae47c383973be18e4c4520 Mon Sep 17 00:00:00 2001
From: Lenz Fiedler <37868410+RandomDefaultUser@users.noreply.github.com>
Date: Fri, 22 Nov 2024 18:38:29 +0100
Subject: [PATCH 10/13] Update mala/datahandling/data_scaler.py

Co-authored-by: Steve Schmerler <git@elcorto.com>
---
 mala/datahandling/data_scaler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mala/datahandling/data_scaler.py b/mala/datahandling/data_scaler.py
index 8997ea4ea..d10c5a25b 100644
--- a/mala/datahandling/data_scaler.py
+++ b/mala/datahandling/data_scaler.py
@@ -32,7 +32,7 @@ class DataScaler:
           standard deviation 1) is applied to each feature dimension
           individually.
           I.e., if your training data has dimensions (d,f), then each
-          of the f rows with d entries is scaled indiviually.
+          of the f columns with d entries is scaled indiviually.
         - "feature-wise-minmax": Min-Max scaling (Scale to be in range
           0...1) is applied to each feature dimension individually.
           I.e., if your training data has dimensions (d,f), then each

From ec4777b3ad8965c14e9e28bbb01b490e02811e91 Mon Sep 17 00:00:00 2001
From: Lenz Fiedler <37868410+RandomDefaultUser@users.noreply.github.com>
Date: Fri, 22 Nov 2024 18:38:37 +0100
Subject: [PATCH 11/13] Update mala/datahandling/data_scaler.py

Co-authored-by: Steve Schmerler <git@elcorto.com>
---
 mala/datahandling/data_scaler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mala/datahandling/data_scaler.py b/mala/datahandling/data_scaler.py
index d10c5a25b..96112d5f0 100644
--- a/mala/datahandling/data_scaler.py
+++ b/mala/datahandling/data_scaler.py
@@ -36,7 +36,7 @@ class DataScaler:
         - "feature-wise-minmax": Min-Max scaling (Scale to be in range
           0...1) is applied to each feature dimension individually.
           I.e., if your training data has dimensions (d,f), then each
-          of the f rows with d entries is scaled indiviually.
+          of the f columns with d entries is scaled indiviually.
         - "normal": (DEPRECATED) Old name for "minmax".
         - "feature-wise-normal": (DEPRECATED) Old name for
           "feature-wise-minmax"

From 608ba3907a87be21c5fbba08e0951f3d60f1a84d Mon Sep 17 00:00:00 2001
From: Lenz Fiedler <l.fiedler@hzdr.de>
Date: Fri, 22 Nov 2024 18:42:39 +0100
Subject: [PATCH 12/13] Corrected (x,y,z) to (d) in two places

---
 docs/source/basic_usage/trainingmodel.rst |  8 ++------
 mala/common/parameters.py                 | 24 +++++++++++------------
 2 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/docs/source/basic_usage/trainingmodel.rst b/docs/source/basic_usage/trainingmodel.rst
index bfb157c9a..53cb8a8df 100644
--- a/docs/source/basic_usage/trainingmodel.rst
+++ b/docs/source/basic_usage/trainingmodel.rst
@@ -51,14 +51,10 @@ improves the performance of NN based ML models. Options are
 * ``minmax``: Min-Max scaling (Scale to be in range 0...1) is applied to the entire array.
 
 * ``feature-wise-standard``: Standardization (Scale to mean 0, standard
-  deviation 1) is applied to each feature dimension individually. I.e., if your
-  training data has dimensions (x,y,z,f), then each of the f rows with (x,y,z)
-  entries is scaled indiviually.
+  deviation 1) is applied to each feature dimension individually.
 
 * ``feature-wise-minmax``: Min-Max scaling (Scale to be in range 0...1) is
-  applied to each feature dimension individually. I.e., if your training data
-  has dimensions (x,y,z,f), then each of the f rows with (x,y,z) entries is
-  scaled indiviually.
+  applied to each feature dimension individually.
 
 Here, we specify that MALA should standardize the input (=descriptors)
 by feature (i.e., each entry of the vector separately on the grid) and
diff --git a/mala/common/parameters.py b/mala/common/parameters.py
index 63bda2c1b..720d1308a 100644
--- a/mala/common/parameters.py
+++ b/mala/common/parameters.py
@@ -580,13 +580,13 @@ class ParametersData(ParametersBase):
               to the entire array.
             - "feature-wise-standard": Standardization (Scale to mean 0,
               standard deviation 1) is applied to each feature dimension
-              individually. I.e., if your training data has dimensions
-              (x,y,z,f), then each of the f rows with (x,y,z) entries is scaled
-              indiviually.
-            - "feature-wise-minmax": Row Min-Max scaling (Scale to be in range
+              individually.
+              I.e., if your training data has dimensions (d,f), then each
+              of the f columns with d entries is scaled indiviually.
+            - "feature-wise-minmax": Min-Max scaling (Scale to be in range
               0...1) is applied to each feature dimension individually.
-              I.e., if your training data has dimensions (x,y,z,f), then each
-              of the f rows with (x,y,z) entries is scaled indiviually.
+              I.e., if your training data has dimensions (d,f), then each
+              of the f columns with d entries is scaled indiviually.
             - "normal": (DEPRECATED) Old name for "minmax".
             - "feature-wise-normal": (DEPRECATED) Old name for
               "feature-wise-minmax"
@@ -602,13 +602,13 @@ class ParametersData(ParametersBase):
               to the entire array.
             - "feature-wise-standard": Standardization (Scale to mean 0,
               standard deviation 1) is applied to each feature dimension
-              individually. I.e., if your training data has dimensions
-              (x,y,z,f), then each of the f rows with (x,y,z) entries is scaled
-              indiviually.
-            - "feature-wise-minmax": Row Min-Max scaling (Scale to be in range
+              individually.
+              I.e., if your training data has dimensions (d,f), then each
+              of the f columns with d entries is scaled indiviually.
+            - "feature-wise-minmax": Min-Max scaling (Scale to be in range
               0...1) is applied to each feature dimension individually.
-              I.e., if your training data has dimensions (x,y,z,f), then each
-              of the f rows with (x,y,z) entries is scaled indiviually.
+              I.e., if your training data has dimensions (d,f), then each
+              of the f columns with d entries is scaled indiviually.
             - "normal": (DEPRECATED) Old name for "minmax".
             - "feature-wise-normal": (DEPRECATED) Old name for
               "feature-wise-minmax"

From a98830ba75f20bb007ce9854aab36d4a27bbb5ad Mon Sep 17 00:00:00 2001
From: Lenz Fiedler <l.fiedler@hzdr.de>
Date: Fri, 22 Nov 2024 18:47:05 +0100
Subject: [PATCH 13/13] Added note about propagating changes

---
 mala/datahandling/data_scaler.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mala/datahandling/data_scaler.py b/mala/datahandling/data_scaler.py
index 96112d5f0..5f4491907 100644
--- a/mala/datahandling/data_scaler.py
+++ b/mala/datahandling/data_scaler.py
@@ -9,6 +9,9 @@
 from mala.common.parallelizer import parallel_warn
 
 
+# IMPORTANT: If you change the docstrings, make sure to also change them
+# in the ParametersData subclass, because users do usually not interact
+# with this class directly.
 class DataScaler:
     """Scales input and output data.