From 17900923664b83a1a831207510b83b9810586962 Mon Sep 17 00:00:00 2001 From: Lenz Fiedler Date: Wed, 27 Nov 2024 19:46:04 +0100 Subject: [PATCH] Small final adjustments --- mala/network/acsd_analyzer.py | 4 +-- mala/network/descriptor_scoring_optimizer.py | 30 ++++++++++++++------ mala/network/mutual_information_analyzer.py | 10 +++---- setup.py | 2 +- 4 files changed, 30 insertions(+), 16 deletions(-) diff --git a/mala/network/acsd_analyzer.py b/mala/network/acsd_analyzer.py index 2f2f0a130..049a1d824 100644 --- a/mala/network/acsd_analyzer.py +++ b/mala/network/acsd_analyzer.py @@ -50,8 +50,8 @@ def _update_logging(self, score, index): self.best_score = score self.best_trial_index = index - def get_best_trial(self): - """Different from best_trial because of parallelization.""" + def _get_best_trial(self): + """Determine the best trial as given by this study.""" return self._study[np.argmin(self._study[:, -1])] @staticmethod diff --git a/mala/network/descriptor_scoring_optimizer.py b/mala/network/descriptor_scoring_optimizer.py index 3a8c59efc..11608f5d3 100644 --- a/mala/network/descriptor_scoring_optimizer.py +++ b/mala/network/descriptor_scoring_optimizer.py @@ -33,7 +33,7 @@ class DescriptorScoringOptimizer(HyperOpt, ABC): """ Base class for all training-free descriptor hyperparameter optimizers. - These optimizer use alternative metrics (ACSD, mutual information, etc. + These optimizer use alternative metrics ACSD, mutual information, etc. to tune descriptor hyperparameters. Parameters @@ -50,6 +50,14 @@ class DescriptorScoringOptimizer(HyperOpt, ABC): Target calculator used for parsing/converting target data. If None, the target calculator will be created by this object using the parameters provided. Default: None + + Attributes + ---------- + best_score : float + Score associated with best-performing trial. + + best_trial_index : int + Index of best-performing trial """ def __init__( @@ -199,7 +207,7 @@ def perform_study( Perform the study, i.e. the optimization. This is done by sampling different descriptors, calculated with - different hyperparameters and then calculating the ACSD. + different hyperparameters and then calculating some surrogate score. """ # Prepare the hyperparameter lists. self._construct_hyperparam_list() @@ -317,14 +325,20 @@ def perform_study( } def set_optimal_parameters(self): + """ + Set optimal parameters. + + This function will write the determined hyperparameters directly to + MALA parameters object referenced in this class. + """ if get_rank() == 0: - best_trial = self.get_best_trial() + best_trial = self._get_best_trial() minimum_score = self._study[np.argmin(self._study[:, -1])] if isinstance(self._descriptor_calculator, Bispectrum): self.params.descriptors.bispectrum_cutoff = best_trial[0] self.params.descriptors.bispectrum_twojmax = int(best_trial[1]) printout( - "ACSD analysis finished, optimal parameters: ", + "Descriptor scoring analysis finished, optimal parameters: ", ) printout( "Bispectrum twojmax: ", @@ -338,7 +352,7 @@ def set_optimal_parameters(self): self.params.descriptors.atomic_density_cutoff = best_trial[0] self.params.descriptors.atomic_density_sigma = best_trial[1] printout( - "ACSD analysis finished, optimal parameters: ", + "Descriptor scoring analysis finished, optimal parameters: ", ) printout( "Atomic density sigma: ", @@ -350,8 +364,8 @@ def set_optimal_parameters(self): ) @abstractmethod - def get_best_trial(self): - """Different from best_trial because of parallelization.""" + def _get_best_trial(self): + """Determine the best trial as given by this study.""" pass def _construct_hyperparam_list(self): @@ -486,7 +500,7 @@ def _load_target( self.params.descriptors._configuration["mpi"] and file_based_communication ): - memmap = "acsd.out.npy_temp" + memmap = "descriptor_scoring.out.npy_temp" target_calculator_kwargs = {} diff --git a/mala/network/mutual_information_analyzer.py b/mala/network/mutual_information_analyzer.py index 577d93446..f563bd648 100644 --- a/mala/network/mutual_information_analyzer.py +++ b/mala/network/mutual_information_analyzer.py @@ -45,8 +45,8 @@ def __init__( descriptor_calculator=descriptor_calculator, ) - def get_best_trial(self): - """Different from best_trial because of parallelization.""" + def _get_best_trial(self): + """Determine the best trial as given by this study.""" return self._study[np.argmax(self._study[:, -1])] def _update_logging(self, score, index): @@ -131,7 +131,7 @@ def _calculate_mutual_information( return mi @staticmethod - def normalize(data): + def _normalize(data): mean = np.mean(data, axis=0) std = np.std(data, axis=0) std_nonzero = std > 1e-6 @@ -161,8 +161,8 @@ def _mutual_information( dim_X = X.shape[-1] rand_subset = np.random.permutation(n)[:n_samples] if normalize_data: - X = MutualInformationAnalyzer.normalize(X) - Y = MutualInformationAnalyzer.normalize(Y) + X = MutualInformationAnalyzer._normalize(X) + Y = MutualInformationAnalyzer._normalize(Y) X = X[rand_subset] Y = Y[rand_subset] XY = np.concatenate([X, Y], axis=1) diff --git a/setup.py b/setup.py index b34c3fef2..7ce1509ff 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ extras = { "dev": ["bump2version"], - "opt": ["oapackage"], + "opt": ["oapackage", "scikit-learn"], "test": ["pytest", "pytest-cov"], "doc": open("docs/requirements.txt").read().splitlines(), "experimental": ["asap3", "dftpy", "minterpy"],