From 17900923664b83a1a831207510b83b9810586962 Mon Sep 17 00:00:00 2001
From: Lenz Fiedler <l.fiedler@hzdr.de>
Date: Wed, 27 Nov 2024 19:46:04 +0100
Subject: [PATCH] Small final adjustments

---
 mala/network/acsd_analyzer.py                |  4 +--
 mala/network/descriptor_scoring_optimizer.py | 30 ++++++++++++++------
 mala/network/mutual_information_analyzer.py  | 10 +++----
 setup.py                                     |  2 +-
 4 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/mala/network/acsd_analyzer.py b/mala/network/acsd_analyzer.py
index 2f2f0a130..049a1d824 100644
--- a/mala/network/acsd_analyzer.py
+++ b/mala/network/acsd_analyzer.py
@@ -50,8 +50,8 @@ def _update_logging(self, score, index):
             self.best_score = score
             self.best_trial_index = index
 
-    def get_best_trial(self):
-        """Different from best_trial because of parallelization."""
+    def _get_best_trial(self):
+        """Determine the best trial as given by this study."""
         return self._study[np.argmin(self._study[:, -1])]
 
     @staticmethod
diff --git a/mala/network/descriptor_scoring_optimizer.py b/mala/network/descriptor_scoring_optimizer.py
index 3a8c59efc..11608f5d3 100644
--- a/mala/network/descriptor_scoring_optimizer.py
+++ b/mala/network/descriptor_scoring_optimizer.py
@@ -33,7 +33,7 @@ class DescriptorScoringOptimizer(HyperOpt, ABC):
     """
     Base class for all training-free descriptor hyperparameter optimizers.
 
-    These optimizer use alternative metrics (ACSD, mutual information, etc.
+    These optimizer use alternative metrics ACSD, mutual information, etc.
     to tune descriptor hyperparameters.
 
     Parameters
@@ -50,6 +50,14 @@ class DescriptorScoringOptimizer(HyperOpt, ABC):
         Target calculator used for parsing/converting target data. If None,
         the target calculator will be created by this object using the
         parameters provided. Default: None
+
+    Attributes
+    ----------
+    best_score : float
+        Score associated with best-performing trial.
+
+    best_trial_index : int
+        Index of best-performing trial
     """
 
     def __init__(
@@ -199,7 +207,7 @@ def perform_study(
         Perform the study, i.e. the optimization.
 
         This is done by sampling different descriptors, calculated with
-        different hyperparameters and then calculating the ACSD.
+        different hyperparameters and then calculating some surrogate score.
         """
         # Prepare the hyperparameter lists.
         self._construct_hyperparam_list()
@@ -317,14 +325,20 @@ def perform_study(
                         }
 
     def set_optimal_parameters(self):
+        """
+        Set optimal parameters.
+
+        This function will write the determined hyperparameters directly to
+        MALA parameters object referenced in this class.
+        """
         if get_rank() == 0:
-            best_trial = self.get_best_trial()
+            best_trial = self._get_best_trial()
             minimum_score = self._study[np.argmin(self._study[:, -1])]
             if isinstance(self._descriptor_calculator, Bispectrum):
                 self.params.descriptors.bispectrum_cutoff = best_trial[0]
                 self.params.descriptors.bispectrum_twojmax = int(best_trial[1])
                 printout(
-                    "ACSD analysis finished, optimal parameters: ",
+                    "Descriptor scoring analysis finished, optimal parameters: ",
                 )
                 printout(
                     "Bispectrum twojmax: ",
@@ -338,7 +352,7 @@ def set_optimal_parameters(self):
                 self.params.descriptors.atomic_density_cutoff = best_trial[0]
                 self.params.descriptors.atomic_density_sigma = best_trial[1]
                 printout(
-                    "ACSD analysis finished, optimal parameters: ",
+                    "Descriptor scoring analysis finished, optimal parameters: ",
                 )
                 printout(
                     "Atomic density sigma: ",
@@ -350,8 +364,8 @@ def set_optimal_parameters(self):
                 )
 
     @abstractmethod
-    def get_best_trial(self):
-        """Different from best_trial because of parallelization."""
+    def _get_best_trial(self):
+        """Determine the best trial as given by this study."""
         pass
 
     def _construct_hyperparam_list(self):
@@ -486,7 +500,7 @@ def _load_target(
             self.params.descriptors._configuration["mpi"]
             and file_based_communication
         ):
-            memmap = "acsd.out.npy_temp"
+            memmap = "descriptor_scoring.out.npy_temp"
 
         target_calculator_kwargs = {}
 
diff --git a/mala/network/mutual_information_analyzer.py b/mala/network/mutual_information_analyzer.py
index 577d93446..f563bd648 100644
--- a/mala/network/mutual_information_analyzer.py
+++ b/mala/network/mutual_information_analyzer.py
@@ -45,8 +45,8 @@ def __init__(
             descriptor_calculator=descriptor_calculator,
         )
 
-    def get_best_trial(self):
-        """Different from best_trial because of parallelization."""
+    def _get_best_trial(self):
+        """Determine the best trial as given by this study."""
         return self._study[np.argmax(self._study[:, -1])]
 
     def _update_logging(self, score, index):
@@ -131,7 +131,7 @@ def _calculate_mutual_information(
         return mi
 
     @staticmethod
-    def normalize(data):
+    def _normalize(data):
         mean = np.mean(data, axis=0)
         std = np.std(data, axis=0)
         std_nonzero = std > 1e-6
@@ -161,8 +161,8 @@ def _mutual_information(
         dim_X = X.shape[-1]
         rand_subset = np.random.permutation(n)[:n_samples]
         if normalize_data:
-            X = MutualInformationAnalyzer.normalize(X)
-            Y = MutualInformationAnalyzer.normalize(Y)
+            X = MutualInformationAnalyzer._normalize(X)
+            Y = MutualInformationAnalyzer._normalize(Y)
         X = X[rand_subset]
         Y = Y[rand_subset]
         XY = np.concatenate([X, Y], axis=1)
diff --git a/setup.py b/setup.py
index b34c3fef2..7ce1509ff 100644
--- a/setup.py
+++ b/setup.py
@@ -16,7 +16,7 @@
 
 extras = {
     "dev": ["bump2version"],
-    "opt": ["oapackage"],
+    "opt": ["oapackage", "scikit-learn"],
     "test": ["pytest", "pytest-cov"],
     "doc": open("docs/requirements.txt").read().splitlines(),
     "experimental": ["asap3", "dftpy", "minterpy"],