Chore/docstrings (#88)

* Added docstring to DataLoader class and deleted deprecated tutorial * Docstrings for data_utils.py * Add dockstrings to evaluation.py * Readd deleted function to evaluation with docstrings * Rename vars in evaluation.py for clarity * Added docstring to visualizer.py Wait, are we even using this or will this just be deleted anyway? * Docstrings for classifier.py * Added docstring to the add_rows_metrics function This function is very ugly, however it works and I don't think anyone should try to reformat this thing * Add Poster.pdf * Add Poster.pdf This time for real * Added poster into Readme as picture * Added different resolution posters and defaulted to 200dpi * Clarify general.py * Delete assets/Poster_100dpi.png * Delete assets/Poster_200dpi.png * Delete assets/Poster_400dpi.png --------- Co-authored-by: Tiago Würthner <[email protected]> Co-authored-by: Tiago Würthner <[email protected]> Co-authored-by: Magdalena Lederbauer <[email protected]>
mlederbauer · Jun 19, 2024 · 4868d4c · 4868d4c
1 parent d1bd519
commit 4868d4c
Show file tree

Hide file tree

Showing 10 changed files with 192 additions and 486 deletions.
diff --git a/README.md b/README.md
@@ -104,7 +104,9 @@ When the parameter `max_eval` is set to a high value such as 20, expect the whol
 
 # 🖼️Poster
 
-If you were not able to visit our beautiful poster at ETH Zurich on May 30th 2024, you can access our poster [here](TODO)!
+If you were not able to visit our beautiful poster at ETH Zurich on May 30th 2024, you can access our poster [here](assets/Poster.pdf)!
+
+![Poster](assets/Poster_200dpi.png)
 
 # 🧑‍💻 Developing
 

diff --git a/assets/Poster.pdf b/assets/Poster.pdf
diff --git a/assets/Poster_1000dpi.png b/assets/Poster_1000dpi.png
diff --git a/nmrcraft/data/data_utils.py b/nmrcraft/data/data_utils.py
@@ -40,8 +40,16 @@ def filename_to_ligands(dataset: pd.DataFrame):
     return dataset
 
 
-def load_dummy_dataset_locally(datset_path: str = "tests/data.csv"):
-    dataset = pd.read_csv(datset_path)
+def load_dummy_dataset_locally(dataset_path: str = "tests/data.csv"):
+    """
+    Load a dummy dataset from a local CSV file for testing purposes.
+
+    Args:
+        dataset_path (str, optional): The path to the CSV file containing the dataset. Defaults to "tests/data.csv".
+    Returns:
+        pandas.DataFrame: The dataset loaded from the CSV file.
+    """
+    dataset = pd.read_csv(dataset_path)
     return dataset
 
 

diff --git a/nmrcraft/data/dataloader.py b/nmrcraft/data/dataloader.py
@@ -31,6 +31,37 @@
 
 
 class DataLoader:
+    """
+    DataLoader is responsible for loading and preparing data for machine learning models
+    in the `nmrcraft` project.
+
+    It supports configuration of various dataset parameters including feature selection,
+    target column specification, dataset size manipulation, and can return split datasets
+    tuned for training and testing phases.
+
+    Parameters:
+    feature_columns (list of str): Names of columns to be used as features.
+    target_columns (str): Name(s) of the column(s) used as targets.
+    test_size (float): Proportion of the dataset to include in the test split.
+    random_state (int): Seed used by random number generator for reproducibility.
+    dataset_size (float): Proportion of the full dataset to use.
+    complex_geometry (str): Specifies the type of complex geometries to include ('oct', 'spy', 'tbp', or 'all').
+    include_structural_features (bool): Indicates whether structural features should be included in the dataset.
+
+    Returns:
+    dataloader (DataLoader): dataloader object that is used to load and preprocess the dataset.
+    Example:
+    >>> data_loader = DataLoader(
+        feature_columns=["M_sigma11_ppm", "M_sigma22_ppm"],
+        target_columns="metal X4_ligand E_ligand",
+        test_size=0.2,
+        random_state=42,
+        dataset_size=0.1,
+        complex_geometry="all",
+        include_structural_features=True
+    )
+    """
+
     def __init__(
         self,
         target_columns: str,

diff --git a/nmrcraft/evaluation/evaluation.py b/nmrcraft/evaluation/evaluation.py
@@ -65,6 +65,25 @@ def evaluate_model(
 
 
 def evaluate_bootstrap(X_test, y_test, model, targets, n_times=10):
+    """
+    Perform bootstrap evaluation of a model on test data.
+
+    This function repeatedly samples with replacement from the test dataset and evaluates
+    the model on these samples. It aggregates the performance metrics across all bootstrap
+    samples to give a robust estimate of the model's generalizability.
+
+    Args:
+        X_test (np.ndarray): The input features of the test data.
+        y_test (np.ndarray): The true labels of the test data.
+        model (object): The model that is being evaluated.
+        targets (List[str]): A list of target variable names.
+        n_times (int, optional): The number of bootstrap samples to generate.
+
+    Returns:
+        Dict[str, Dict[str, List[float]]]: A dictionary containing the computed metrics
+        for each target. Each target's value is another dictionary containing lists
+        of performance scores ('Accuracy' and 'F1') across the bootstrap samples.
+    """
     bootstrap_metrics: Dict = {}
     for _ in range(n_times):
         X_test, y_test = resample(
@@ -91,27 +110,33 @@ def evaluate_bootstrap(X_test, y_test, model, targets, n_times=10):
 
 def metrics_statistics(
     bootstrapped_metrics,
-):  # TODO: Handle what to do when there are more than one target -> unify scores or return splitted
-    """
-    Do statistics with the bootsrapped metrics
+):
+    """Calculate the statistical summary of bootstrapped evaluation metrics with F1 score and Accuracy.
 
     Args:
-        dict: bootstrapped_metrics
+        bootstrapped_metrics (dict): A dictionary containing the name of each target with another dictionary
+        as value, which includes values of the F1 scores and Accuracies of the bootstrapped models.
 
     Returns:
-        dict: Mean and 95% ci for the bootstrapped values for each target
+        list: A list containing five elements:
+            - [0]: List of target names for which metrics are calculated.
+            - [1]: List of mean accuracies for each target.
+            - [2]: List of tuples where each tuple consists of the lower and upper bounds of the 95% confidence interval for accuracy for each target.
+            - [3]: List of mean F1 scores for each target.
+            - [4]: List of tuples where each tuple consists of the lower and upper bounds of the 95% confidence interval for F1 score for each target.
+
+        Each element in the list corresponds to a specific set of statistical values related to the performance metrics (accuracy and F1 score) of the bootstrapped models for each target.
     """
-    # metrics_stats = pd.DataFrame(columns=["Targets", "Accuracy_mean", "Accuracy_ci", "F1_mean", "F1_ci",])
     Targets = []
     Accuracy_mean = []
     Accuracy_ci = []
     F1_mean = []
     F1_ci = []
 
-    for key, value in bootstrapped_metrics.items():
-        # calc mean and 95% confidence interval for Accuracy
-        Targets.append(key)
+    for target, value in bootstrapped_metrics.items():
+        Targets.append(target)
 
+        # Calculate mean and 95% confidence interval for Accuracy
         Accuracy_mean.append(np.mean(value["Accuracy"]))
         Accuracy_ci.append(
             st.t.interval(
@@ -122,7 +147,7 @@ def metrics_statistics(
             )
         )
 
-        # calc mean and 95% confidence interval for F1 score
+        # Calculate mean and 95% confidence interval for F1 score
         F1_mean.append(np.mean(value["F1"]))
         F1_ci.append(
             st.t.interval(

diff --git a/nmrcraft/evaluation/visualizer.py b/nmrcraft/evaluation/visualizer.py
@@ -123,6 +123,21 @@ def plot_metric(
         title="Title",
         filename="Plot.png",
     ):
+        """
+        Generates a plot for a specified metric against dataset size for different models.
+
+        The graph includes error bars representing the standard deviation of the metric.
+
+        Args:
+            data (pd.DataFrame): DataFrame with columns 'model', 'dataset_size', metric, and its standard deviation.
+            metric (str): Name of the metric to be plotted (e.g., 'accuracy', 'f1_score').
+            title (str, optional): Plot title. Defaults to "Title".
+            filename (str, optional): Filename for saving the plot. Defaults to "Plot.png".
+
+        Returns:
+            str: Path where the plot is saved.
+        """
+
         for model in data["model"].unique():
             model_data = data[data["model"] == model]
             std_name = metric + "_std"

diff --git a/nmrcraft/models/classifier.py b/nmrcraft/models/classifier.py
@@ -18,6 +18,31 @@
 
 
 class Classifier:
+    """
+    A machine learning classifier for structured data prediction.
+
+    This class encapsulates the entire process of model construction, from data loading
+    and preprocessing, through hyperparameter tuning, to training and evaluation.
+
+    Attributes:
+        model_name (str): Identifier for the model type.
+        max_evals (int): Maximum number of evaluations for tuning the model's hyperparameters.
+        target (str): Name of the target variable(s) in the dataset.
+        dataset_size (float): Size of the dataset to be used.
+        feature_columns (list, optional): List of feature names to be included in the model. Defaults to a predefined list.
+        random_state (int, optional): Seed for random number generators for reproducibility. Defaults to 42.
+        include_structural_features (bool, optional): Flag to include structural features in the data. Defaults to True.
+        complex_geometry (str, optional): Geometry type associated with the metal complexes. Defaults to 'oct'.
+        test_size (float, optional): Proportion of the dataset to include in the test split. Defaults to 0.2.
+        testing (bool, optional): Flag to indicate whether the instance is used for testing, affecting certain behaviors. Defaults to False.
+
+    Methods:
+        hyperparameter_tune: Tunes model parameters using specified algorithms.
+        train: Fits the model on the training data.
+        train_bootstrapped: Performs training using bootstrapped samples to gather statistics on models.
+        evaluate: Assesses model performance on test data.
+    """
+
     def __init__(
         self,
         model_name: str,
@@ -71,6 +96,10 @@ def __init__(
         ) = data_loader.load_data()
 
     def hyperparameter_tune(self):
+        """
+        Optimizes model parameters using training data and updates the best_params attribute.
+        """
+
         log.info(
             f"Performing Hyperparameter tuning for the Model ({self.model_name})"
         )
@@ -80,15 +109,27 @@ def hyperparameter_tune(self):
     def train(self):
         """
         Train the machine learning model using the best hyperparameters.
-
-        Returns:
-            None
         """
+
         all_params = {**self.model_config["model_params"], **self.best_params}
         self.model = load_model(self.model_name, **all_params)
         self.model.fit(self.X_train, self.y_train)
 
     def train_bootstrapped(self, n_times=10):
+        """
+        Trains the model using bootstrapping to estimate accuracy and F1 score.
+
+        This method resamples the training set with replacement 'n_times', trains the model,
+        and then evaluates it to collect accuracy and F1 scores. It returns a DataFrame containing
+        the mean and standard deviation of these metrics.
+
+        Args:
+            n_times (int, optional): Number of bootstrap samples to generate. Defaults to 10.
+
+        Returns:
+            pd.DataFrame: DataFrame containing mean and standard deviation of accuracy and F1 score.
+        """
+
         accuracy = []
         f1_score = []
         i = 0
@@ -120,8 +161,16 @@ def evaluate(self) -> pd.DataFrame:
         Evaluate the performance of the trained machine learning model.
 
         Returns:
-            pd.DataFrame: A DataFrame containing evaluation metrics (accuracy, f1_score, roc_auc),
-                        the confusion matrix, false positive rates, and true positive rates for each class.
+            pd.DataFrame: A single-row DataFrame with the following columns:
+                - 'accuracy' (float)
+                - 'accuracy_std' (float)
+                - 'f1_score' (float)
+                - 'f1_score_std' (float)
+                - 'dataset_size' (float)
+                - 'model' (str)
+                - 'confusion_matrix' (list of lists)
+                - 'fpr' (list)
+                - 'tpr' (list)
         """
         y_pred = self.model.predict(self.X_test)
         # print(y_pred)
@@ -138,6 +187,15 @@ def evaluate(self) -> pd.DataFrame:
         cm = confusion_matrix(self.y_test, y_pred)
 
         def calculate_fpr_fnr(cm):
+            """
+            Calculates the False Positive Rate (FPR) and False Negative Rate (FNR) for each class from a confusion matrix.
+
+            Args:
+                cm (np.ndarray): Confusion matrix.
+
+            Returns:
+                tuple: Two numpy arrays `(FPR, FNR)` containing the FPR and FNR for each class.
+            """
             FPR = []
             FNR = []
             num_classes = cm.shape[0]

diff --git a/nmrcraft/utils/general.py b/nmrcraft/utils/general.py
@@ -9,21 +9,47 @@ def add_rows_metrics(
     model_name: str,
     max_evals: int,
 ):
-    # Add all the newly generated metrics to the unified dataframe targetwise
+    """
+    Compiles and adds a series of statistical metrics into a unified DataFrame, one row at a time.
+
+    Args:
+        statistical_metrics (list): List of lists containing the mean and confidence intervals of
+        accuracy and F1-score.
+        dataset_size (int): Number of samples in the dataset.
+        include_structural (bool): Indicates whether structural data was included in the analysis.
+        model_name (str):  Name of the model that produced the metrics.
+        max_evals (int): Number of evaluations conducted in the Hyperparameter tuning.
+
+    Returns:
+        unified_metrics (pd.DataFrame): DataFrame with all metrics containing these columns:
+            target, model_targets, model, nmr_only, dataset_fraction, max_evals, accuracy_mean,
+            accuracy_lb, accuracy_hb, f1_mean, f1_lb, f1_hb
+
+    """
+    # Give meaning to indices
+    idx_name = 0
+    idx_accuracy_mean = 1
+    idx_accuracy_ci = 2
+    idx_f1score_mean = 3
+    idx_f1score_ci = 4
+    idx_lb = 0
+    idx_hb = 1
+
+    # Combine all data into single row and append to dataframe
     for i in range(len(statistical_metrics[0])):
         new_row = [
-            statistical_metrics[0][i],
-            statistical_metrics[0],
+            statistical_metrics[idx_name][i],
+            statistical_metrics[idx_name],
             model_name,
             not include_structural,
             dataset_size,
             max_evals,
-            statistical_metrics[1][i],
-            statistical_metrics[2][i][0],
-            statistical_metrics[2][i][1],
-            statistical_metrics[3][i],
-            statistical_metrics[4][i][0],
-            statistical_metrics[4][i][1],
+            statistical_metrics[idx_accuracy_mean][i],
+            statistical_metrics[idx_accuracy_ci][i][idx_lb],
+            statistical_metrics[idx_accuracy_ci][i][idx_hb],
+            statistical_metrics[idx_f1score_mean][i],
+            statistical_metrics[idx_f1score_ci][i][idx_lb],
+            statistical_metrics[idx_f1score_ci][i][idx_hb],
         ]
         unified_metrics.loc[len(unified_metrics)] = new_row
     return unified_metrics