diff --git a/.gitignore b/.gitignore
index dea0b60..ced95ee 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,8 +1,8 @@
 mlruns/
 scratch/
 dataset/
-data/
 plots/
+data/
 
 docs/source
 
diff --git a/nmrcraft/analysis/plotting.py b/nmrcraft/analysis/plotting.py
index 9acdcfb..5f78b15 100644
--- a/nmrcraft/analysis/plotting.py
+++ b/nmrcraft/analysis/plotting.py
@@ -1,3 +1,4 @@
+import matplotlib.patches as mpatches
 import matplotlib.pyplot as plt
 import numpy as np
 from cycler import cycler
@@ -15,11 +16,13 @@ def style_setup():
     plt.rcParams["text.latex.preamble"] = r"\usepackage{sansmathfonts}"
     plt.rcParams["axes.prop_cycle"] = cycler(color=colors)
 
-    # Use the first color from the custom color cycle
-    first_color = plt.rcParams["axes.prop_cycle"].by_key()["color"][0]
+    all_colors = [
+        plt.rcParams["axes.prop_cycle"].by_key()["color"][i]
+        for i in range(len(colors))
+    ]
     plt.rcParams["text.usetex"] = False
 
-    return cmap, colors, first_color
+    return cmap, colors, all_colors
 
 
 def plot_predicted_vs_ground_truth(
@@ -33,7 +36,8 @@ def plot_predicted_vs_ground_truth(
     Returns:
     None
     """
-    _, _, first_color = style_setup()
+    _, _, colors = style_setup()
+    first_color = colors[0]
     # Creating the plot
     plt.figure(figsize=(10, 8))
     plt.scatter(y_test, y_pred, color=first_color, edgecolor="k", alpha=0.6)
@@ -167,3 +171,75 @@ def plot_roc_curve(fpr, tpr, roc_auc, title, path):
     plt.legend(loc="lower right")
     plt.savefig(path)
     plt.close()
+
+
+def plot_with_without_ligands_bar(df):
+    categories = df["target"].unique()
+    _, _, colors = style_setup()
+    first_color = colors[0]
+    second_color = colors[1]
+
+    # Extract data
+
+    x_pos = np.arange(len(categories))
+    bar_width = 0.35
+
+    # Initialize plot
+    fig, ax = plt.subplots()
+
+    # Loop through each category and plot bars
+    for i, category in enumerate(categories):
+        subset = df[df["target"] == category]
+
+        # Means and error bars
+        means = subset["accuracy_mean"].values
+        errors = [
+            subset["accuracy_mean"].values
+            - subset["accuracy_lower_bd"].values,
+            subset["accuracy_upper_bd"].values
+            - subset["accuracy_mean"].values,
+        ]
+
+        # Bar locations for the group
+        bar_positions = x_pos[i] + np.array([-bar_width / 2, bar_width / 2])
+
+        # Determine bar colors based on 'nmr_tensor_input_only' field
+        bar_colors = [
+            first_color if x else second_color
+            for x in subset["nmr_tensor_input_only"]
+        ]
+
+        # Plotting the bars
+        ax.bar(
+            bar_positions,
+            means,
+            yerr=np.array(errors),
+            color=bar_colors,
+            align="center",
+            ecolor="black",
+            capsize=5,
+            width=bar_width,
+        )
+
+    # Labeling and aesthetics
+    ax.set_ylabel("Accuracy / %")
+    ax.set_xlabel("Target(s)")
+    ax.set_xticks(x_pos)
+    ax.set_xticklabels(categories)
+    ax.set_title("Accuracy Measurements with Error Bars")
+
+    handles = [
+        mpatches.Patch(color=first_color, label="With Ligand Info"),
+        mpatches.Patch(color=second_color, label="Without Ligand Info"),
+    ]
+    ax.legend(handles=handles, loc="best", fontsize=20)
+    plt.tight_layout()
+    plt.savefig("plots/exp3_incorporate_ligand_info.png")
+    print("Saved to plots/exp3_incorporate_ligand_info.png")
+
+
+if __name__ == "main":
+    import pandas as pd
+
+    df = pd.read_csv("dataset/path_to_results.csv")
+    plot_with_without_ligands_bar(df)
diff --git a/nmrcraft/data/data_utils.py b/nmrcraft/data/data_utils.py
new file mode 100644
index 0000000..ea28bf1
--- /dev/null
+++ b/nmrcraft/data/data_utils.py
@@ -0,0 +1,76 @@
+"""Load and preprocess data."""
+
+import os
+
+import pandas as pd
+from datasets import load_dataset
+
+
+class DatasetLoadError(FileNotFoundError):
+    """Exeption raised when the Dataloader could not find data/dataset.csv,
+    even after trying to generate it from huggingface"""
+
+    def __init__(self, t):
+        super().__init__(f"Could not load raw Dataset '{t}'")
+
+
+class InvalidTargetError(ValueError):
+    """Exception raised when the specified model name is not found."""
+
+    def __init__(self, t):
+        super().__init__(f"Invalid target '{t}'")
+
+
+def filename_to_ligands(dataset: pd.DataFrame):
+    """
+    Extract ligands from the filename and add as columns to the dataset.
+    Assumes that filenames are structured in a specific way that can be parsed into ligands.
+    """
+    filename_parts = dataset["file_name"].str.split("_", expand=True)
+    dataset["metal"] = filename_parts.get(0)
+    dataset["geometry"] = filename_parts.get(1)
+    dataset["E_ligand"] = filename_parts.get(2)
+    dataset["X1_ligand"] = filename_parts.get(3)
+    dataset["X2_ligand"] = filename_parts.get(4)
+    dataset["X3_ligand"] = filename_parts.get(5)
+    dataset["X4_ligand"] = filename_parts.get(6)
+    dataset["L_ligand"] = filename_parts.get(7).fillna(
+        "none"
+    )  # Fill missing L_ligand with 'none'
+    return dataset
+
+
+def load_dummy_dataset_locally(datset_path: str = "tests/data.csv"):
+    dataset = pd.read_csv(datset_path)
+    return dataset
+
+
+def load_dataset_from_hf(
+    dataset_name: str = "NMRcraft/nmrcraft", data_files: str = "all_no_nan.csv"
+):
+    """Load the dataset.
+
+    This function loads the dataset using the specified dataset name and data files.
+    It assumes that you have logged into the Hugging Face CLI prior to calling this function.
+
+    Args:
+        dataset_name (str, optional): The name of the dataset. Defaults to "NMRcraft/nmrcraft".
+        data_files (str, optional): The name of the data file. Defaults to 'all_no_nan.csv'.
+
+    Returns:
+        pandas.DataFrame: The loaded dataset as a pandas DataFrame.
+    """
+    # Create data dir if needed
+    if not os.path.isdir("dataset"):
+        os.mkdir("dataset")
+    # Check if hf dataset is already downloaded, else download it and then load it
+    if not os.path.isfile("dataset/dataset.csv"):
+        dataset = load_dataset(dataset_name, data_files=data_files)[
+            "train"
+        ].to_pandas()
+        dataset.to_csv("dataset/dataset.csv")
+    if os.path.isfile("dataset/dataset.csv"):
+        dataset = pd.read_csv("dataset/dataset.csv")
+    elif not os.path.isfile("dataset/dataset.csv"):
+        raise DatasetLoadError(FileNotFoundError)
+    return dataset
diff --git a/nmrcraft/data/dataloader.py b/nmrcraft/data/dataloader.py
new file mode 100644
index 0000000..189c364
--- /dev/null
+++ b/nmrcraft/data/dataloader.py
@@ -0,0 +1,210 @@
+"""Load and preprocess data."""
+
+from typing import Any, List, Tuple
+
+import numpy as np
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import (
+    LabelEncoder,
+    StandardScaler,
+)
+
+from nmrcraft.data.data_utils import (
+    filename_to_ligands,
+    load_dataset_from_hf,
+    load_dummy_dataset_locally,
+)
+from nmrcraft.utils.set_seed import set_seed
+
+set_seed()
+
+TARGET_TYPES = [
+    "metal",
+    "X1_ligand",
+    "X2_ligand",
+    "X3_ligand",
+    "X4_ligand",
+    "L_ligand",
+    "E_ligand",
+]
+
+
+class DataLoader:
+    def __init__(
+        self,
+        feature_columns: Any,
+        target_columns: str,
+        complex_geometry: str,
+        test_size: float,
+        random_state: int,
+        dataset_size: float,
+        include_structural_features: bool,
+        testing: bool,
+    ):
+        self.feature_columns = feature_columns
+        self.test_size = test_size
+        self.random_state = random_state
+        self.dataset_size = dataset_size
+        self.target_columns = target_columns
+        self.complex_geometry = complex_geometry
+        self.include_structural_features = include_structural_features
+
+        if not testing:
+            self.dataset = load_dataset_from_hf()
+        elif testing:
+            self.dataset = load_dummy_dataset_locally()
+
+    def load_data(self) -> pd.DataFrame:
+        """
+        Loads the dataset, preprocesses it, and returns the preprocessed data.
+
+        Returns:
+            Preprocessed data (pandas.DataFrame): The preprocessed dataset.
+        """
+        self.dataset = filename_to_ligands(self.dataset)
+        self.dataset = self.dataset.sample(frac=self.dataset_size)
+        self.choose_geometry()
+        return self.split_and_preprocess()
+
+    def choose_geometry(self) -> None:
+        """
+        Filters the dataset based on the complex geometry.
+
+        This method filters the dataset based on the complex geometry specified by the `complex_geometry` attribute.
+        It checks if the specified geometry is valid and then updates the dataset accordingly. If the geometry is not
+        valid, a `ValueError` is raised.
+
+        Raises:
+            ValueError: If the specified geometry is not valid.
+
+        """
+        valid_geometries = {"oct", "spy", "tbp"}
+        if self.complex_geometry in valid_geometries:
+            self.dataset = self.dataset[
+                self.dataset["geometry"] == self.complex_geometry
+            ]
+        # else:
+        #     raise ValueError("Invalid geometry'.") FIXME
+
+    def encode_categorical_features(self) -> np.ndarray:
+        """
+        Encodes the categorical features in the dataset using LabelEncoder.
+
+        Returns:
+            np.ndarray: The encoded features in numpy array format.
+        """
+        # Select and extract the structural features from the dataset
+        structural_features = (
+            self.dataset[
+                [col for col in TARGET_TYPES if col not in self.target_columns]
+            ]
+            .to_numpy()
+            .T
+        )  # Transpose immediately after conversion to numpy
+
+        # Encode features using LabelEncoder and store encoders for potential inverse transform
+        encoded_features = []
+        self.encoders = []  # To store encoders for each feature
+        for features in structural_features:
+            encoder = LabelEncoder()
+            encoder.fit(features)
+            encoded_features.append(encoder.transform(features))
+            self.encoders.append(encoder)
+
+        # Convert the list of encoded features back to the original data structure
+        return np.array(
+            encoded_features
+        ).T  # Transpose back to original orientation
+
+    def encode_targets(self) -> Tuple[np.ndarray, dict]:
+        """
+        Encodes the target variables in the dataset using LabelEncoder.
+
+        Returns:
+            Tuple[np.ndarray, dict]: The encoded targets and a dictionary mapping target names to labels.
+        """
+        # Initialize lists to store encoded targets and corresponding encoders
+        encoded_targets = []
+        self.target_encoders = []
+        y_labels_dict = {}
+
+        # Encode each target column using LabelEncoder
+        for target_name in self.target_columns:
+            target = self.dataset[target_name].to_numpy()
+            encoder = LabelEncoder()
+            encoder.fit(target)
+            encoded_targets.append(encoder.transform(target))
+            self.target_encoders.append(encoder)
+            y_labels_dict[
+                target_name
+            ] = (
+                encoder.classes_.tolist()
+            )  # Dictionary of labels for each target
+
+        y_encoded = np.array(
+            encoded_targets
+        ).T  # Transpose to match original data structure
+        return y_encoded, y_labels_dict
+
+    def split_and_preprocess(
+        self,
+    ) -> Tuple[
+        np.ndarray, np.ndarray, np.ndarray, np.ndarray, List[List[str]]
+    ]:
+        """
+        Split the dataset into training and testing sets, preprocess the data, and return the preprocessed data.
+
+        Returns:
+            Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, List[List[str]]]: A tuple containing the preprocessed training and testing data, encoded target variables, and readable labels.
+        """
+        # Extract and encode categorical features
+        X_NMR = self.dataset[self.feature_columns].to_numpy()
+        X_Structural = self.encode_categorical_features()
+
+        # Encode target variables and store readable labels
+        (
+            y_encoded,
+            y_labels,
+        ) = self.encode_targets()
+
+        # Split data into training and testing sets
+        (
+            X_train_NMR,
+            X_test_NMR,
+            X_train_Structural,
+            X_test_Structural,
+            y_train,
+            y_test,
+        ) = train_test_split(
+            X_NMR,
+            X_Structural,
+            y_encoded,
+            test_size=self.test_size,
+            random_state=self.random_state,
+        )
+
+        # Scale numerical features (the NMR tensor)
+        scaler = StandardScaler()
+        X_train_NMR_scaled = scaler.fit_transform(X_train_NMR)
+        X_test_NMR_scaled = scaler.transform(X_test_NMR)
+
+        # Combine features if structural features are included
+        if self.include_structural_features:
+            X_train = np.concatenate(
+                [X_train_NMR_scaled, X_train_Structural], axis=1
+            )
+            X_test = np.concatenate(
+                [X_test_NMR_scaled, X_test_Structural], axis=1
+            )
+        else:
+            X_train = X_train_NMR_scaled
+            X_test = X_test_NMR_scaled
+
+        return (
+            X_train,
+            X_test,
+            np.squeeze(y_train),
+            np.squeeze(y_test),
+            y_labels,
+        )
diff --git a/nmrcraft/data/dataset.py b/nmrcraft/data/dataset.py
deleted file mode 100644
index c179140..0000000
--- a/nmrcraft/data/dataset.py
+++ /dev/null
@@ -1,572 +0,0 @@
-"""Load and preprocess data."""
-
-import itertools
-import os
-
-import numpy as np
-import pandas as pd
-from datasets import load_dataset
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import (
-    LabelBinarizer,
-    LabelEncoder,
-    OneHotEncoder,
-    StandardScaler,
-)
-
-from nmrcraft.utils.set_seed import set_seed
-
-set_seed()
-
-
-class DatasetLoadError(FileNotFoundError):
-    """Exeption raised when the Dataloader could not find data/dataset.csv,
-    even after trying to generate it from huggingface"""
-
-    def __init__(self, t):
-        super().__init__(f"Could not load raw Dataset '{t}'")
-
-
-class InvalidTargetError(ValueError):
-    """Exception raised when the specified model name is not found."""
-
-    def __init__(self, t):
-        super().__init__(f"Invalid target '{t}'")
-
-
-class InvalidTargetTypeError(ValueError):
-    """Exception raised when the specified target type is not valid."""
-
-    def __init__(self, t):
-        super().__init__(f"Invalid target Type '{t}'")
-
-
-def filename_to_ligands(dataset: pd.DataFrame):
-    """
-    Extract ligands from the filename and add as columns to the dataset.
-    Assumes that filenames are structured in a specific way that can be parsed into ligands.
-    """
-    filename_parts = dataset["file_name"].str.split("_", expand=True)
-    dataset["metal"] = filename_parts.get(0)
-    dataset["geometry"] = filename_parts.get(1)
-    dataset["E_ligand"] = filename_parts.get(2)
-    dataset["X1_ligand"] = filename_parts.get(3)
-    dataset["X2_ligand"] = filename_parts.get(4)
-    dataset["X3_ligand"] = filename_parts.get(5)
-    dataset["X4_ligand"] = filename_parts.get(6)
-    dataset["L_ligand"] = filename_parts.get(7).fillna(
-        "none"
-    )  # Fill missing L_ligand with 'none'
-    return dataset
-
-
-def load_dummy_dataset_locally(datset_path: str = "tests/data.csv"):
-    dataset = pd.read_csv(datset_path)
-    return dataset
-
-
-def load_dataset_from_hf(
-    dataset_name: str = "NMRcraft/nmrcraft", data_files: str = "all_no_nan.csv"
-):
-    """Load the dataset.
-
-    This function loads the dataset using the specified dataset name and data files.
-    It assumes that you have logged into the Hugging Face CLI prior to calling this function.
-
-    Args:
-        dataset_name (str, optional): The name of the dataset. Defaults to "NMRcraft/nmrcraft".
-        data_files (str, optional): The name of the data file. Defaults to 'all_no_nan.csv'.
-
-    Returns:
-        pandas.DataFrame: The loaded dataset as a pandas DataFrame.
-    """
-    # Create data dir if needed
-    if not os.path.isdir("data"):
-        os.mkdir("data")
-    # Check if hf dataset is already downloaded, else download it and then load it
-    if not os.path.isfile("data/dataset.csv"):
-        dataset = load_dataset(dataset_name, data_files=data_files)[
-            "train"
-        ].to_pandas()
-        dataset.to_csv("data/dataset.csv")
-    if os.path.isfile("data/dataset.csv"):
-        dataset = pd.read_csv("data/dataset.csv")
-    elif not os.path.isfile("data/dataset.csv"):
-        raise DatasetLoadError(FileNotFoundError)
-    return dataset
-
-
-def transpose(array: any):
-    """rotate/transpose array to the right"""
-    ar = array[:]  # make copy just to be sure
-    ar = [  # rotate the array to the right
-        list(x) if i == 0 else x for i, x in enumerate(map(list, zip(*ar)))
-    ]
-    return ar
-
-
-def get_target_columns(target_columns: str):
-    """
-    Function takes target columns in underline format f.e 'metal_X1_X4_X2_L' and
-    transforms into a list of the column names present in the dataset.
-    """
-    TARGET_TYPES = ["metal", "X1", "X2", "X3", "X4", "L", "E"]
-
-    # Split the target string into individual targets
-    targets = [t.strip() for t in target_columns.split("_")]
-
-    # Check if the targets are valid
-    for t in targets:
-        if t not in TARGET_TYPES:
-            raise InvalidTargetError(t)
-
-    # Translate them into Dataframe Column names
-    target_map = {
-        "metal": "metal",
-        "X1": "X1_ligand",
-        "X2": "X2_ligand",
-        "X3": "X3_ligand",
-        "X4": "X4_ligand",
-        "L": "L_ligand",
-        "E": "E_ligand",
-    }
-    targets_transformed = [target_map[t] for t in targets]
-
-    return targets_transformed
-
-
-def get_structural_feature_columns(target_columns: list):
-    """
-    Function gets the feature columns given the target columns. The feature columns are those that will be in the X set.
-    """
-    TARGET_TYPES = [
-        "metal",
-        "X1_ligand",
-        "X2_ligand",
-        "X3_ligand",
-        "X4_ligand",
-        "L_ligand",
-        "E_ligand",
-    ]
-
-    # Get the features as the not targets
-    features = [x for x in TARGET_TYPES if x not in target_columns]
-
-    return features
-
-
-def target_label_readabilitizer(readable_labels):
-    """
-    function takes in the classes from the binarzier and turns them into human readable list of same length of the target.
-    """
-    # Trun that class_ into list
-    human_readable_label_list = list(itertools.chain(*readable_labels))
-    # Handle Binarized metal stuff and make the two columns become a single one because the metals get turned into a single column by the binarizer
-    for i in enumerate(human_readable_label_list):
-        if (
-            human_readable_label_list[i[0]] == "Mo"
-            and human_readable_label_list[i[0] + 1] == "W"
-        ) or (
-            human_readable_label_list[i[0]] == "W"
-            and human_readable_label_list[i[0] + 1] == "Mo"
-        ):
-            human_readable_label_list[i[0]] = "Mo W"
-            human_readable_label_list.pop(i[0] + 1)
-
-    return human_readable_label_list
-
-
-def target_label_readabilitizer_categorical(target_labels):
-    good_labels = []
-    for label_array in target_labels:
-        good_labels.append(list(label_array))
-    return good_labels
-
-
-def column_length_to_indices(column_lengths):
-    indices = []
-    start_index = 0
-    for length in column_lengths:
-        if length == 1:
-            indices.append([start_index])
-        else:
-            indices.append(list(range(start_index, start_index + length)))
-        start_index += length
-    return indices
-
-
-class DataLoader:
-    def __init__(
-        self,
-        dataset_name="NMRcraft/nmrcraft",
-        data_files="all_no_nan.csv",
-        feature_columns=None,
-        target_columns="metal",
-        target_type="one-hot",  # can be "categorical" or "one-hot",
-        complex_geometry="all",
-        test_size=0.3,
-        random_state=42,
-        dataset_size=0.01,
-        include_structural_features=True,
-        testing=False,
-    ):
-        self.feature_columns = feature_columns
-        self.target_columns = get_target_columns(target_columns=target_columns)
-        self.test_size = test_size
-        self.random_state = random_state
-        self.dataset_size = dataset_size
-        self.target_type = target_type
-        self.complex_geometry = complex_geometry
-        self.include_structural_features = include_structural_features
-
-        if not testing:
-            self.dataset = load_dataset_from_hf()
-        elif testing:
-            self.dataset = load_dummy_dataset_locally()
-
-    def load_data(self):
-        self.dataset = filename_to_ligands(self.dataset)
-        self.dataset = self.dataset.sample(frac=self.dataset_size)
-        self.choose_geometry()
-        if self.target_type == "categorical":
-            return self.split_and_preprocess_categorical()
-        elif (
-            self.target_type == "one-hot"
-        ):  # Target is binarized and Features are one hot
-            return self.split_and_preprocess_one_hot()
-        else:
-            raise InvalidTargetTypeError(ValueError)
-
-    def choose_geometry(self):
-        """
-        Reduce the dataset down to a certain geometry if a valid
-        one was passed, else just leave it as is.
-        """
-        if self.complex_geometry == "oct":
-            self.dataset = self.dataset[
-                self.dataset["geometry"] == "oct"
-            ]  # only load octahedral complexes
-        elif self.complex_geometry == "spy":
-            self.dataset = self.dataset[
-                self.dataset["geometry"] == "spy"
-            ]  # only load square pyramidal complexes
-        elif self.complex_geometry == "tbp":
-            self.dataset = self.dataset[
-                self.dataset["geometry"] == "tbp"
-            ]  # only load trigonal bipyramidal complexes
-
-    def scale(self, X):
-        """
-        Apply standard normalization to the feature set.
-        """
-        scaler = StandardScaler()
-        X_scaled = scaler.fit_transform(X)
-        return X_scaled
-
-    def get_target_columns_separated(self):
-        """Returns the column indicies of the target array nicely sorted.
-        For example: metal_X1: [[0, 1], [1, 2, 3, 4]]"""
-        if (
-            "metal" in self.target_columns
-        ):  # If targets have metal, do weird stuff
-            metal_index = self.target_columns.index("metal")
-            y_column_indices = column_length_to_indices(
-                self.target_column_numbers
-            )
-            for i in range(len(y_column_indices)):
-                if i == metal_index:
-                    y_column_indices[i].append(y_column_indices[i][0] + 1)
-                if i > metal_index:
-                    y_column_indices[i] = [x + 1 for x in y_column_indices[i]]
-
-        elif "metal" not in self.target_columns:
-            y_column_indices = column_length_to_indices(
-                self.target_column_numbers
-            )
-        return y_column_indices
-
-    def more_than_one_target(self):
-        """Function returns true if more than one target is specified"""
-        return len(self.target_columns) > 1
-
-    def categorical_target_decoder(self, y):
-        """
-        function takes in the  target (y) array and transforms it back to decoded form.
-        For this function to be run the split_and_preprocess_categorical already has to have been run beforehand.
-        """
-        ys = y[:]  # copy y so it's not modified
-        target_encoders = self.target_label_encoders
-        ys_decoded = []
-        ys = transpose(ys)
-
-        # Decode columnwise
-        for i, target_column in enumerate(ys):
-            ys_decoded.append(
-                target_encoders[i].inverse_transform(target_column)
-            )
-
-        # Rotate back so each row corresponds to a complex and not the target like metal or X4
-        ys_decoded_properly_rotated = [
-            list(x) if i == 0 else x
-            for i, x in enumerate(map(list, zip(*ys_decoded)))
-        ]
-
-        return np.array(ys_decoded_properly_rotated)
-
-    def binarized_target_decoder(self, y):
-        """
-        function takes in the  target (y) array and transforms it back to decoded form.
-        For this function to be run the one-hot-preprocesser already has to have been run beforehand.
-        """
-        y_column_indices = column_length_to_indices(self.target_column_numbers)
-        ys = []
-        ys_decoded = []
-        # Split up compressed array into the categories
-        for i in range(len(y_column_indices)):
-            ys.append(y[:, y_column_indices[i]])
-
-        # Decode the binarized categries using the original binarizers
-        for i in range(len(ys)):
-            ys_decoded.append(self.encoders[i].inverse_transform(ys[i]))
-
-        # Rotate the array
-        ys_decoded_properly_rotated = [
-            list(x) if i == 0 else x
-            for i, x in enumerate(map(list, zip(*ys_decoded)))
-        ]
-        return ys_decoded_properly_rotated
-
-    def confusion_matrix_data_adapter_categorical(self, y):
-        """
-        Takes in binary encoded target array and returns decoded flat list.
-        Especially designed to work with confusion matrix.
-        """
-        y_decoded = self.categorical_target_decoder(y)
-        flat_y_decoded = [y for ys in y_decoded for y in ys]
-        return flat_y_decoded
-
-    def confusion_matrix_data_adapter_one_hot(self, y):
-        """
-        Takes in binary encoded target array and returns decoded flat list.
-        Especially designed to work with confusion matrix.
-        """
-        y_decoded = self.binarized_target_decoder(y)
-        flat_y_decoded = [y for ys in y_decoded for y in ys]
-        return flat_y_decoded
-
-    def confusion_matrix_label_adapter(self, y_labels):
-        y_labels_copy = y_labels[:]
-        for i in range(len(y_labels)):
-            if y_labels_copy[i] == "Mo W":
-                y_labels_copy[i] = "Mo"
-                y_labels_copy.insert(i, "W")
-        return y_labels_copy
-
-    def categorical_endocode_X(self):
-        # Get NMR Featrues (passed ones) and structural Features
-        X_Structural_Features_Columns = get_structural_feature_columns(
-            target_columns=self.target_columns
-        )
-        X_Structural_Features = self.dataset[
-            X_Structural_Features_Columns
-        ].to_numpy()
-
-        # Transpose the array
-        X_Structural_Features = transpose(X_Structural_Features)
-
-        # Target-wise encoding with Label encoder and save encoders for later decoding
-        xs = []
-        for i in range(len(X_Structural_Features)):
-            tmp_encoder = LabelEncoder()
-            tmp_encoder.fit(X_Structural_Features[i])
-            xs.append(tmp_encoder.transform(X_Structural_Features[i]))
-        X_Structural_Features = list(zip(*xs))  # Kind of backtransposing
-
-        return X_Structural_Features
-
-    def categorical_endocode_y(self):
-        # Get the targets
-        y_labels_rotated = self.dataset[self.target_columns].to_numpy()
-
-        # rotate the list of list (array-like)
-        y_labels = transpose(y_labels_rotated)
-
-        # Do targetwise encoding using the label encoder and save the label encoders for later decoding
-        ys = []
-        self.target_label_encoders = []
-        readable_labels = []
-        for i in range(len(y_labels)):
-            tmp_encoder = LabelEncoder()
-            tmp_encoder.fit(y_labels[i])
-            ys.append(tmp_encoder.transform(y_labels[i]))
-            self.target_label_encoders.append(tmp_encoder)
-            readable_labels.append(tmp_encoder.classes_)
-        # Combine y
-        y = np.array(list(zip(*ys)))
-        # Return y fuzed into a single array and y_labels
-        return y, readable_labels
-
-    def one_hot_endocode_X(self):
-        """
-        Method that does the one-hot encoding of the DataLoader's features
-        based on the selected targets
-        """
-        # Get Columns corresponding to the features that are selected
-        X_Structural_Features_Columns = get_structural_feature_columns(
-            self.target_columns
-        )
-
-        # Get the features based on the selected columns
-        X_Structural_Features = self.dataset[
-            X_Structural_Features_Columns
-        ].to_numpy()
-
-        # One hot encode X structural
-        X_Structural_Features_enc = (
-            OneHotEncoder().fit_transform(X_Structural_Features).toarray()
-        )
-
-        return X_Structural_Features_enc
-
-    def label_binarize_endocode_y(self):
-
-        # Get the Targets and transpose
-        y_labels_rotated = self.dataset[self.target_columns].to_numpy()
-        y_labels = transpose(y_labels_rotated)
-
-        ys = []
-        readable_labels = []
-        self.encoders = []
-        self.target_column_numbers = []
-
-        # Binarize targetwise and save encoders and labels
-        for i in range(len(y_labels)):
-            # Encode
-            label_binerizer = LabelBinarizer()
-            ys.append(label_binerizer.fit_transform(y_labels[i]))
-
-            # Save stuff for later decoding
-            readable_labels.append(label_binerizer.classes_)
-            self.encoders.append(
-                label_binerizer
-            )  # save encoder for later decoding
-            self.target_column_numbers.append(
-                len(ys[i][0])
-            )  # save column numbers for later decoding
-
-        # Return y fuzed into a single array and labels
-        y = np.concatenate(list(ys), axis=1)
-        return y, readable_labels
-
-    def split_and_preprocess_categorical(self):
-        """
-        Split data into training and test sets, then apply normalization.
-        Ensures that the test data does not leak into training data preprocessing.
-        X and y are categorical, so each column has a integer that defines which one of the ligands is in the column.
-        """
-
-        # Get NMR features
-        X_NMR = self.dataset[self.feature_columns].to_numpy()
-
-        # Encode X in a categorical fashion with the label encoder columnwise
-        X_Structural_Features = self.categorical_endocode_X()
-
-        # Encode y in a categorical fashion with the label encoder columnwise
-        y, readable_labels = self.categorical_endocode_y()
-
-        # Train Test splitting
-        (
-            X_train_NMR,
-            X_test_NMR,
-            X_train_structural,
-            X_test_structural,
-            y_train,
-            y_test,
-        ) = train_test_split(
-            X_NMR,
-            X_Structural_Features,
-            y,
-            test_size=self.test_size,
-            random_state=self.random_state,
-        )
-
-        # Normalize features with no leakage from test set
-        X_train_NMR_scaled = self.scale(X_train_NMR)
-        X_test_NMR_scaled = self.scale(X_test_NMR)
-
-        if self.include_structural_features:
-            # Combine scaled NMR features with structural features to get final X
-            X_train_scaled = np.concatenate(
-                [X_train_NMR_scaled, X_train_structural], axis=1
-            )
-            X_test_scaled = np.concatenate(
-                [X_test_NMR_scaled, X_test_structural], axis=1
-            )
-        else:
-            # Just have the NMR features as X
-            X_train_scaled = X_train_NMR_scaled
-            X_test_scaled = X_test_NMR_scaled
-
-        # Get the target labels going
-        y_label = target_label_readabilitizer_categorical(readable_labels)
-
-        return X_train_scaled, X_test_scaled, y_train, y_test, y_label
-
-    def split_and_preprocess_one_hot(self):
-        """
-        Split data into training and test sets, then apply normalization.
-        Ensures that the test data does not leak into training data preprocessing. Returned X is one-hot encoded and y binarized using the sklearn functions.
-        """
-        # Get NMR features
-        X_NMR = self.dataset[self.feature_columns].to_numpy()
-
-        # Get structural features one-hot encoded
-        X_Structural_Features_enc = self.one_hot_endocode_X()
-
-        # Get structural targets, binarized
-        y, readable_labels = self.label_binarize_endocode_y()
-
-        # Split the datasets
-        (
-            X_train_NMR,
-            X_test_NMR,
-            X_train_structural,
-            X_test_structural,
-            y_train,
-            y_test,
-        ) = train_test_split(
-            X_NMR,
-            X_Structural_Features_enc,
-            y,
-            test_size=self.test_size,
-            random_state=self.random_state,
-        )
-
-        # Normalize features with no leakage from test set
-        X_train_NMR_scaled = self.scale(X_train_NMR)
-        X_test_NMR_scaled = self.scale(X_test_NMR)
-
-        if self.include_structural_features:
-            # Combine scaled NMR features with structural features to get final X
-            X_train_scaled = np.concatenate(
-                [X_train_NMR_scaled, X_train_structural], axis=1
-            )
-            X_test_scaled = np.concatenate(
-                [X_test_NMR_scaled, X_test_structural], axis=1
-            )
-        else:
-            # Just have the NMR features as X
-            X_train_scaled = X_train_NMR_scaled
-            X_test_scaled = X_test_NMR_scaled
-
-        # Creates the labels that can be used to identify the targets in the binaized y-array
-        # (basicall handle special metal behaviour)
-        good_target_labels = target_label_readabilitizer(readable_labels)
-
-        return (
-            X_train_scaled,
-            X_test_scaled,
-            y_train,
-            y_test,
-            good_target_labels,
-        )
diff --git a/nmrcraft/evaluation/evaluation.py b/nmrcraft/evaluation/evaluation.py
index 0b4d1a3..7c30b6b 100644
--- a/nmrcraft/evaluation/evaluation.py
+++ b/nmrcraft/evaluation/evaluation.py
@@ -10,7 +10,7 @@
     roc_curve,
 )
 
-from nmrcraft.data import dataset
+from nmrcraft.data import dataloader
 
 
 def model_evaluation(
@@ -18,7 +18,7 @@ def model_evaluation(
     X_test: Any,
     y_test: Any,
     y_labels: Any,
-    dataloader: dataset.DataLoader,
+    dataloader: dataloader.DataLoader,
 ) -> Tuple[Dict[str, float], Any, Any, Any]:
     """
     Evaluate the performance of the trained machine learning model for 1D targets.
@@ -67,7 +67,7 @@ def model_evaluation_nD(
     X_test: Any,
     y_test: Any,
     y_labels: Any,
-    dataloader: dataset.DataLoader,
+    dataloader: dataloader.DataLoader,
 ) -> Tuple[Dict[str, float], Any, Any, Any]:
     """
     Evaluate the performance of the trained machine learning model for 2D+ Targets.
diff --git a/nmrcraft/evaluation/visualizer.py b/nmrcraft/evaluation/visualizer.py
index 9ad3a91..77a92f7 100644
--- a/nmrcraft/evaluation/visualizer.py
+++ b/nmrcraft/evaluation/visualizer.py
@@ -2,60 +2,119 @@
 import os
 
 import matplotlib.pyplot as plt
+import numpy as np
+from cycler import cycler
+from matplotlib.colors import LinearSegmentedColormap
 
 
 class Visualizer:
-    def __init__(self, model_name: str, data: None, folder_path: str):
+    def __init__(
+        self,
+        model_name: str,
+        cm: None,
+        rates=None,
+        metrics=None,
+        folder_path: str = "plots/",
+        classes=None,
+        dataset_size=None,
+    ):
         self.model_name = model_name
-        self.data = data
+        self.cm = cm
+        self.rates = (rates,)
+        self.metrics = metrics
         self.folder_path = folder_path
+        self.classes = classes
+        self.dataset_size = dataset_size
+        if not os.path.exists(folder_path):
+            os.makedirs(folder_path)
 
-    def plot_ROC(
-        self, title="ROC Curves by Dataset Size", filename="ROC_Curves.png"
-    ):
-        print(self.data.index)
-        plt.figure(figsize=(10, 8))
+    def style_setup():
+        """Function to set up matplotlib parameters."""
         colors = [
-            "blue",
-            "green",
-            "red",
-            "violet",
-            "orange",
-            "cyan",
-        ]  # Colors for different dataset sizes
-        labels = [
-            f"Dataset Size: {idx}" for idx in self.data.index
-        ]  # Labels for legend
+            "#C28340",
+            "#854F2B",
+            "#61371F",
+            "#8FCA5C",
+            "#70B237",
+            "#477A1E",
+        ]
+        cmap = LinearSegmentedColormap.from_list("custom", colors)
 
-        for (index, row), color, label in zip(
-            self.data.iterrows(), colors, labels
-        ):
-            index = index + 1
-            plt.plot(
-                row["fpr"],
-                row["tpr"],
-                label=f'{label} (AUC = {row["roc_auc"]:.2f})',
-                color=color,
-            )
+        plt.style.use("./style.mplstyle")
+        plt.rcParams["text.latex.preamble"] = r"\usepackage{sansmathfonts}"
+        plt.rcParams["axes.prop_cycle"] = cycler(color=colors)
+
+        # Use the first color from the custom color cycle
+        first_color = plt.rcParams["axes.prop_cycle"].by_key()["color"][0]
+        plt.rcParams["text.usetex"] = False
+
+        return cmap, colors, first_color
 
-        plt.plot(
-            [0, 1],
-            [0, 1],
-            linestyle="--",
-            lw=2,
-            color="gray",
-            label="Chance",
-            alpha=0.8,
+    def plot_confusion_matrix(self, full=True, columns_set=False):
+        """
+        Plots the confusion matrix.
+        Parameters:
+        - classes (list): List of classes for the axis labels.
+        - title (str): Title of the plot.
+        - full (bool): If true plots one big, else many smaller.
+        - columns_set (list of lists): contains all relevant indices.
+        Returns:
+        None
+        """
+
+        def normalize_row_0_1(row):
+            return (row - np.min(row)) / (np.max(row) - np.min(row))
+
+        file_path = os.path.join(
+            self.folder_path,
+            f"ConfusionMatrix_{self.model_name}_{self.dataset_size}.png",
         )
-        plt.title(title)
-        plt.xlabel("False Positive Rate")
-        plt.ylabel("True Positive Rate")
-        plt.legend(loc="lower right")
+        # _, _, _ = self.style_setup()
+        if full:  # Plot one big cm
+            plt.figure(figsize=(10, 8))
+            plt.imshow(
+                self.cm.apply(normalize_row_0_1, axis=1),
+                interpolation="nearest",
+                cmap=plt.cm.Blues,
+            )
+            plt.title("The Confusion Matrix")
+            plt.colorbar()
+            tick_marks = np.arange(len(self.classes))
+            plt.xticks(tick_marks, self.classes, rotation=45)
+            plt.yticks(tick_marks, self.classes)
+            plt.tight_layout()
+            plt.ylabel("True label")
+            plt.xlabel("Predicted label")
+            plt.savefig(file_path)
+            plt.close()
 
-        file_path = os.path.join(self.folder_path, filename)
-        plt.savefig(file_path)
-        plt.close()  # Close the plot to free up memory
-        return file_path
+        elif not full:  # Plot many small cms of each target
+            cms = []
+            for columns in columns_set:  # Make list of confusion matrices
+                cms.append(
+                    self.cm[
+                        slice(columns[0], columns[-1] + 1),
+                        slice(columns[0], columns[-1] + 1),
+                    ]
+                )
+            fig, axs = plt.subplots(nrows=len(cms), figsize=(10, 8 * len(cms)))
+            for i, sub_cm in enumerate(cms):
+                sub_classes = self.classes[
+                    slice(columns_set[i][0], columns_set[i][-1] + 1)
+                ]
+                axs[i].imshow(
+                    sub_cm, interpolation="nearest", cmap=plt.cm.Blues
+                )
+                axs[i].set_title(f"Confusion Matrix {i+1}")
+                tick_marks = np.arange(len(sub_classes))
+                axs[i].set_xticks(tick_marks)
+                axs[i].set_xticklabels(sub_classes, rotation=45)
+                axs[i].set_yticks(tick_marks)
+                axs[i].set_yticklabels(sub_classes)
+                plt.tight_layout()
+            # plt.savefig(path)
+            plt.close()
+            return file_path
 
     def plot_metric(
         self,
diff --git a/nmrcraft/models/classifier.py b/nmrcraft/models/classifier.py
index cfd4868..3688d39 100644
--- a/nmrcraft/models/classifier.py
+++ b/nmrcraft/models/classifier.py
@@ -4,15 +4,14 @@
 import pandas as pd
 from sklearn.metrics import (
     accuracy_score,
-    auc,
+    confusion_matrix,
     f1_score,
-    # confusion_matrix,
-    multilabel_confusion_matrix,
-    roc_curve,
+    precision_score,
+    recall_score,
 )
 from sklearn.utils import resample
 
-from nmrcraft.data.dataset import DataLoader
+from nmrcraft.data.dataloader import DataLoader
 from nmrcraft.models.model_configs import model_configs
 from nmrcraft.models.models import load_model
 from nmrcraft.training.hyperparameter_tune import HyperparameterTuner
@@ -26,7 +25,11 @@ def __init__(
         target: str,
         dataset_size: float,
         feature_columns=None,
-        random_state=None,
+        random_state=42,
+        include_structural_features=True,
+        complex_geometry="oct",
+        test_size=0.2,
+        testing=False,
     ):
         if not feature_columns:
             feature_columns = [
@@ -49,17 +52,23 @@ def __init__(
             max_evals=self.max_evals,
         )  # algo is set to default value, TODO: change this in declaration of Classifier is necessary
 
+        data_loader = DataLoader(
+            feature_columns=feature_columns,
+            target_columns=target,
+            dataset_size=dataset_size,
+            include_structural_features=include_structural_features,
+            complex_geometry=complex_geometry,
+            test_size=test_size,
+            random_state=random_state,
+            testing=testing,
+        )
         (
             self.X_train,
             self.X_test,
             self.y_train,
             self.y_test,
             self.y_labels,
-        ) = DataLoader(
-            feature_columns=feature_columns,
-            target_columns=target,
-            dataset_size=dataset_size,
-        ).load_data()
+        ) = data_loader.load_data()
 
     def hyperparameter_tune(self):
         log.info(
@@ -90,11 +99,11 @@ def train_bootstraped(self, n_times=10):
                 replace=True,
                 random_state=self.random_state,
             )
-            self.hyperparameter_tune()
+            # self.hyperparameter_tune()
             self.train()
-            eval_data = self.evaluate()
-            accuracy.append(eval_data["accuracy"])
-            f1_score.append(eval_data["f1_score"])
+            rates_df, metrics, cm = self.evaluate()
+            accuracy.append(metrics["Accuracy"])
+            f1_score.append(metrics["F1"])
             i += 1
         new_row = {
             "accuracy": np.mean(accuracy),
@@ -106,36 +115,65 @@ def train_bootstraped(self, n_times=10):
         }
         return pd.DataFrame([new_row])
 
-    def evaluate(self) -> pd.DataFrame():
+    def evaluate(self) -> pd.DataFrame:
         """
         Evaluate the performance of the trained machine learning model.
 
         Returns:
-            Tuple[Dict[str, float], Any, Any, Any]: A tuple containing:
-                - A dictionary with evaluation metrics (accuracy, f1_score, roc_auc).
-                - The confusion matrix.
-                - The false positive rate.
-                - The true positive rate.
+            pd.DataFrame: A DataFrame containing evaluation metrics (accuracy, f1_score, roc_auc),
+                        the confusion matrix, false positive rates, and true positive rates for each class.
         """
         y_pred = self.model.predict(self.X_test)
-        accuracy = accuracy_score(self.y_test, y_pred)
-        f1 = f1_score(self.y_test, y_pred, average="weighted")
-        fpr, tpr, _ = roc_curve(
-            self.y_test, self.model.predict_proba(self.X_test)[:, 1]
-        )
-        cm = multilabel_confusion_matrix(self.y_test, y_pred)
-        roc_auc = auc(fpr, tpr)
-
-        # Create DataFrame with consistent structure
-        results_df = pd.DataFrame(
-            {
-                "accuracy": [accuracy],
-                "f1_score": [f1],
-                "roc_auc": [roc_auc],
-                "fpr": [fpr.tolist()],
-                "cm": [cm.tolist()],
-                "tpr": [tpr.tolist()],
-            }
-        )
+        # print(y_pred)
+        # accuracy = accuracy_score(self.y_test, y_pred)
+        # f1 = f1_score(self.y_test, y_pred, average="weighted")
+
+        # Binarize the output
+        # y_test_bin = label_binarize(
+        #     self.y_test, classes=np.unique(self.y_test)
+        # )
+
+        # Number of classes
+        # n_classes = y_test_bin.shape[1]
+        cm = confusion_matrix(self.y_test, y_pred)
+
+        def calculate_fpr_fnr(cm):
+            FPR = []
+            FNR = []
+            num_classes = cm.shape[0]
+            for i in range(num_classes):
+                FP = cm[:, i].sum() - cm[i, i]
+                TN = cm.sum() - (cm[i, :].sum() + cm[:, i].sum() - cm[i, i])
+                FN = cm[i, :].sum() - cm[i, i]
+                TP = cm[i, i]
+
+                FPR.append(FP / (FP + TN))
+                FNR.append(FN / (FN + TP))
+            return np.array(FPR), np.array(FNR)
+
+        # Calculate FPR and FNR for each class
+        FPR, FNR = calculate_fpr_fnr(cm)
+        rates_df = pd.DataFrame()
+        rates_df["FPR"] = FPR
+        rates_df["FNR"] = FNR
+        rates_df.index = self.y_labels
+
+        # Calculating macro-averaged F1 Score, Precision, Recall
+        Precision = precision_score(self.y_test, y_pred, average="macro")
+        Recall = recall_score(self.y_test, y_pred, average="macro")
+        F1 = f1_score(self.y_test, y_pred, average="macro")
+
+        # Calculating Accuracy
+        Accuracy = accuracy_score(self.y_test, y_pred)
+
+        metrics = pd.DataFrame()
+        metrics["Accuracy"] = [Accuracy]
+        metrics["Recall"] = [Recall]
+        metrics["F1"] = [F1]
+        metrics["Precision"] = [Precision]
+
+        cm = pd.DataFrame(cm)
+        cm.columns = self.y_labels
+        cm.index = self.y_labels
 
-        return results_df
+        return rates_df, metrics, cm
diff --git a/nmrcraft/models/model_configs.py b/nmrcraft/models/model_configs.py
index 75132c5..e4db8ca 100644
--- a/nmrcraft/models/model_configs.py
+++ b/nmrcraft/models/model_configs.py
@@ -15,7 +15,7 @@
     "gradient_boosting": {
         "model_params": {"random_state": 42},
         "hyperparameters": {
-            "loss": hp.choice("loss", ["log_loss", "exponential"]),
+            "loss": hp.choice("loss", ["log_loss"]),
             "learning_rate": hp.uniform("learning_rate", 0.01, 0.5),
             "n_estimators": hp.choice("n_estimators", range(10, 1000, 10)),
             # "subsample": hp.uniform("subsample", 0.01, 1.0),
@@ -31,17 +31,9 @@
     "logistic_regression": {
         "model_params": {"random_state": 42},
         "hyperparameters": {
-            "penalty": hp.choice("penalty", ["l1", "l2", "elasticnet", None]),
             "C": hp.uniform("C", 0.01, 10.0),
-            "solver": hp.choice("solver", ["saga"]),
-            # lbfgs --> l2, None
-            # liblinear --> l1, l2
-            # newton-cg --> l2, None
-            # newton-cholesky --> l2, None
-            # sag --> l2, None
-            # saga --> l1, l2, elasticnet, None
-            "max_iter": hp.choice("max_iter", range(100, 1000, 100)),
-            "l1_ratio": hp.uniform("l1_ratio", 0.01, 1.0),
+            "solver": hp.choice("solver", ["newton-cg", "sag", "saga"]),
+            # "max_iter": hp.choice("max_iter", range(100, 1000, 100)),
         },
     },
     "svc": {
@@ -55,7 +47,6 @@
             "gamma": hp.choice("gamma", ["scale", "auto"]),
             "coef0": hp.uniform("coef0", 0.0, 1.0),
             "shrinking": hp.choice("shrinking", [True, False]),
-            "probability": hp.choice("probability", [True, False]),
             # "max_iter": hp.choice("max_iter", range(100, 1000, 100)),
         },
     },
diff --git a/nmrcraft/models/models.py b/nmrcraft/models/models.py
index 1c42e52..39d672b 100644
--- a/nmrcraft/models/models.py
+++ b/nmrcraft/models/models.py
@@ -69,6 +69,9 @@ def load_model(model_name: str, **kwargs: Any):
     if model_name == "svc":
         kwargs["probability"] = True
 
+    if model_name == "gpc":
+        kwargs["multi_class"] = "one_vs_one"
+
     # Forth, validate all provided kwargs before creating the model instance
     validate_kwargs(kwargs, model_class, model_name)
 
diff --git a/scripts/analysis/dataset_statistics.py b/scripts/analysis/dataset_statistics.py
index 217608d..aad9c1e 100644
--- a/scripts/analysis/dataset_statistics.py
+++ b/scripts/analysis/dataset_statistics.py
@@ -6,7 +6,7 @@
 import seaborn as sns
 
 from nmrcraft.analysis.plotting import style_setup
-from nmrcraft.data.dataset import filename_to_ligands, load_dataset_from_hf
+from nmrcraft.data.dataloader import filename_to_ligands, load_dataset_from_hf
 
 
 def plot_stacked_bars(
diff --git a/scripts/analysis/pca_ligand_space.py b/scripts/analysis/pca_ligand_space.py
index 49f22cf..ecf0a8b 100644
--- a/scripts/analysis/pca_ligand_space.py
+++ b/scripts/analysis/pca_ligand_space.py
@@ -7,7 +7,7 @@
 from sklearn.preprocessing import StandardScaler
 
 from nmrcraft.analysis.plotting import style_setup
-from nmrcraft.data.dataset import filename_to_ligands, load_dataset_from_hf
+from nmrcraft.data.dataloader import filename_to_ligands, load_dataset_from_hf
 
 
 def perform_pca(df, features):
diff --git a/scripts/training/baselines.py b/scripts/training/baselines.py
new file mode 100644
index 0000000..336463a
--- /dev/null
+++ b/scripts/training/baselines.py
@@ -0,0 +1,120 @@
+import argparse
+import logging as log
+
+import numpy as np
+import pandas as pd
+from sklearn.metrics import (
+    accuracy_score,
+    confusion_matrix,
+    f1_score,
+    precision_score,
+    recall_score,
+)
+
+# Import your data loading utilities
+from nmrcraft.data.dataloader import DataLoader
+
+
+def evaluate_model(y_test, y_pred, y_labels):
+    metrics = {}
+    cm_list = []
+    target_index = 0
+    for target_name, labels in y_labels.items():
+        cm = confusion_matrix(y_test[:, target_index], y_pred[:, target_index])
+        accuracy = accuracy_score(
+            y_test[:, target_index], y_pred[:, target_index]
+        )
+        f1 = f1_score(
+            y_test[:, target_index], y_pred[:, target_index], average="macro"
+        )
+        precision = precision_score(
+            y_test[:, target_index],
+            y_pred[:, target_index],
+            average="macro",
+            zero_division=0,
+        )
+        recall = recall_score(
+            y_test[:, target_index], y_pred[:, target_index], average="macro"
+        )
+        # roc_auc = roc_auc_score(y_test[:, target_index], y_pred[:, target_index])
+        metrics[target_name] = {
+            "Accuracy": accuracy,
+            "F1": f1,
+            "Precision": precision,
+            "Recall": recall,
+            # "ROC-AUC": roc_auc
+        }
+        labels = labels
+        cm_list.append((target_name, cm))
+        target_index += 1
+    return metrics, cm_list
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Simplified model training script."
+    )
+    parser.add_argument(
+        "--targets",
+        type=str,
+        default=["metal"],
+        help="The Target for the predictions.",
+    )
+    parser.add_argument(
+        "--dataset_size",
+        type=float,
+        default=1.0,
+        help="Size of the dataset to load.",
+    )
+    parser.add_argument(
+        "--random_baseline",
+        type=bool,
+        default=False,
+        help="Use a random baseline model.",
+    )
+    args = parser.parse_args()
+
+    # Set up logging
+    log.basicConfig(
+        level=log.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+    )
+
+    # Load data
+    dataloader = DataLoader(
+        target_columns=args.targets,
+        dataset_size=args.dataset_size,
+        feature_columns=[
+            "M_sigma11_ppm",
+            "M_sigma22_ppm",
+            "M_sigma33_ppm",
+            "E_sigma11_ppm",
+            "E_sigma22_ppm",
+            "E_sigma33_ppm",
+        ],
+        complex_geometry="oct",
+        test_size=0.3,
+        random_state=42,
+        include_structural_features=False,
+        testing=False,
+    )
+    X_train, X_test, y_train, y_test, y_labels = dataloader.load_data()
+
+    predictions = np.zeros_like(y_test)
+
+    for i in range(len(args.targets)):  # Loop through each target column
+        if args.random_baseline:
+            unique_vals = np.unique(y_train[:, i])
+            predictions[:, i] = np.random.choice(unique_vals, size=len(y_test))
+        else:
+            most_common = pd.Series(y_train[:, i]).mode()[0]
+            predictions[:, i] = np.full(
+                shape=y_test[:, i].shape, fill_value=most_common
+            )
+
+    # Evaluate the model
+    metrics, confusion_matrices = evaluate_model(y_test, predictions, y_labels)
+    log.info("Evaluation Metrics: %s", metrics)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/training/final_results.py b/scripts/training/one_target.py
similarity index 54%
rename from scripts/training/final_results.py
rename to scripts/training/one_target.py
index 1c407db..1ee6472 100644
--- a/scripts/training/final_results.py
+++ b/scripts/training/one_target.py
@@ -19,13 +19,13 @@
 parser.add_argument(
     "--max_evals",
     type=int,
-    default=10,
-    help="The max evaluatins for the hyperparameter tuning with hyperopt",
+    default=2,
+    help="The max evaluations for the hyperparameter tuning with hyperopt",
 )
 parser.add_argument(
     "--target",
     type=str,
-    default="metal",
+    default="X3",
     help="The Target for the predictions. Choose from: 'metal', 'X1', 'X2', 'X3', 'X4', 'L', 'E' ",
 )
 parser.add_argument(
@@ -54,22 +54,21 @@
     log.getLogger().setLevel(log.INFO)
 
     dataset_sizes = [
-        0.01,
+        # 0.01,
         0.1,
+        # 0.15
         # 0.5,
-        1.0,
+        # 1.0,
     ]
     models = [
-        "random_forest",
+        # "random_forest",
         "logistic_regression",
-        "gradient_boosting",
-        "svc",
+        # "gradient_boosting",
+        # "svc",
     ]
 
     with mlflow.start_run():
-        model_data = pd.DataFrame(
-            columns=["accuracy", "f1_score", "dataset_size", "model"]
-        )
+        model_metrics = []
         for model in models:
             data = pd.DataFrame()
             for dataset_size in dataset_sizes:
@@ -79,27 +78,59 @@
                     max_evals=args.max_evals,
                     target=args.target,
                     dataset_size=dataset_size,
-                    random_state=11,
+                    random_state=42,
                 )
                 # mlflow.log_metrics("dataset_size", dataset_size, step=i)
                 C.hyperparameter_tune()
                 C.train()
-                new_data = C.evaluate()
+                rates_df, metrics, cm = C.evaluate()
+                print(rates_df)
+                print(metrics)
+                print(cm)
+
                 # data[str(dataset_size)] = new_data
+                # Convert args.target and dataset_size into DataFrames by wrapping them in lists
+                target_df = pd.DataFrame([args.target], columns=["Target"])
+                dataset_size_df = pd.DataFrame(
+                    [dataset_size], columns=["Dataset Size"]
+                )
+
+                model_data = pd.DataFrame(
+                    columns=[
+                        "target",
+                        "dataset_size",
+                        "model",
+                        "accuracy",
+                        "accuracy_std",
+                        "f1_score",
+                        "f1_score_std",
+                    ]
+                )
+                # Concatenate the new DataFrames with data and metrics
                 data = pd.concat(
-                    [data, new_data.assign(dataset_size=dataset_size)],
+                    [target_df, dataset_size_df, data, metrics], axis=1
                 )
-                data_BS = C.train_bootstraped(10)
+
+                data_BS = C.train_bootstraped(n_times=10)
                 model_data = pd.concat([model_data, data_BS])
 
+                visualizer = Visualizer(
+                    model_name=model,
+                    cm=cm,
+                    rates=rates_df,
+                    metrics=metrics,
+                    folder_path=args.plot_folder,
+                    classes=C.y_labels,
+                    dataset_size=str(dataset_size),
+                )
+                path_CM = visualizer.plot_confusion_matrix()
+            # print(data)
+            data.index = dataset_sizes
+            model_metrics.append(data)
             data.index = dataset_sizes
-            visualizer = Visualizer(
-                model_name=model, data=data, folder_path=args.plot_folder
-            )
-            path_ROC = visualizer.plot_ROC(filename=f"ROC_Plot_{model}.png")
-            mlflow.log_artifact(path_ROC, f"ROC_Plot_{model}.png")
 
-        print(model_data)
+            # path_ROC = visualizer.plot_ROC(filename=f"ROC_Plot_{model}.png")
+            # mlflow.log_artifact(path_ROC, f"ROC_Plot_{model}.png")
 
         path_AC = visualizer.plot_metric(
             data=model_data,
@@ -114,5 +145,9 @@
             filename="f1_score.png",
         )
 
+        for df, model in zip(model_metrics, models):
+            print(model)
+            print(df)
+
         # mlflow.log_artifact("F1_Plot", path_F1)
         # mlflow.log_artifact("Accuracy_Plot", path_AC)
diff --git a/scripts/training/test.py b/scripts/training/test.py
new file mode 100644
index 0000000..b5b3604
--- /dev/null
+++ b/scripts/training/test.py
@@ -0,0 +1,97 @@
+import argparse
+
+import mlflow
+from sklearn.metrics import (
+    accuracy_score,
+    confusion_matrix,
+    f1_score,
+)
+
+from nmrcraft.data.dataloader import DataLoader
+
+# precision_score,
+# recall_score,
+from nmrcraft.models.model_configs import model_configs
+from nmrcraft.models.models import load_model
+from nmrcraft.training.hyperparameter_tune import HyperparameterTuner
+from nmrcraft.utils.set_seed import set_seed
+
+set_seed()
+
+
+def main(dataset_size, target, model_name):
+    # TODO: better experiment naming
+    mlflow.set_experiment("Ceci_nest_pas_un_experiment")
+
+    with mlflow.start_run():
+        config = model_configs[model_name]
+
+        feature_columns = [
+            "M_sigma11_ppm",
+            "M_sigma22_ppm",
+            "M_sigma33_ppm",
+            "E_sigma11_ppm",
+            "E_sigma22_ppm",
+            "E_sigma33_ppm",
+        ]
+
+        data_loader = DataLoader(
+            feature_columns=feature_columns,
+            target_columns=args.target,
+            dataset_size=args.dataset_size,
+            target_type="categorical",
+        )
+
+        # Load and preprocess data
+        X_train, X_test, y_train, y_test, y_labels = data_loader.load_data()
+
+        tuner = HyperparameterTuner(model_name, config, max_evals=1)
+        best_params, _ = tuner.tune(X_train, y_train)
+
+        model_func = lambda **params: load_model(
+            model_name, **{**params, **config["model_params"]}
+        )
+        best_model = model_func(**best_params)
+        best_model.fit(X_train, y_train)
+
+        mlflow.log_params(best_params)
+        mlflow.log_params(
+            {
+                "model_name": model_name,
+                "dataset_size": dataset_size,
+                "target": target,
+            }
+        )
+
+        y_pred = best_model.predict(X_test)
+        cm = confusion_matrix(y_test, y_pred)
+        ac = accuracy_score(y_test, y_pred)
+        f1 = f1_score(y_test, y_pred, average="macro")
+        print(f"Accuracy: {ac}, F1: {f1}, Confusion Matrix:\n{cm}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Train a model with MLflow tracking."
+    )
+    parser.add_argument(
+        "--dataset_size",
+        type=float,
+        default=0.01,
+        help="Fraction of dataset to use",
+    )
+    parser.add_argument(
+        "--target",
+        type=str,
+        default="X3",
+        help="Specify the target(s) to select (metal, X1-X4, L, E or combinations of them, e.g., metal_1X_L)",
+    )
+    parser.add_argument(
+        "--model_name",
+        type=str,
+        default="gradient_boosting",
+        help="Model name to load ('random_forest', 'logistic_regression', 'svc')",
+    )
+    args = parser.parse_args()
+
+    main(args.dataset_size, args.target, args.model_name)
diff --git a/scripts/training/train_metal.py b/scripts/training/train_metal.py
deleted file mode 100644
index 614784a..0000000
--- a/scripts/training/train_metal.py
+++ /dev/null
@@ -1,144 +0,0 @@
-import argparse
-
-import mlflow
-
-from nmrcraft.analysis.plotting import plot_confusion_matrix, plot_roc_curve
-from nmrcraft.data.dataset import DataLoader
-from nmrcraft.evaluation.evaluation import (
-    get_cm_path,
-    get_roc_path,
-    model_evaluation,
-    model_evaluation_nD,
-)
-from nmrcraft.models.model_configs import model_configs
-from nmrcraft.models.models import load_model
-from nmrcraft.training.hyperparameter_tune import HyperparameterTuner
-from nmrcraft.utils.set_seed import set_seed
-
-set_seed()
-
-
-def main(dataset_size, target, model_name):
-    # TODO: better experiment naming
-    mlflow.set_experiment("Ceci_nest_pas_un_experiment")
-
-    with mlflow.start_run():
-        config = model_configs[model_name]
-
-        feature_columns = [
-            "M_sigma11_ppm",
-            "M_sigma22_ppm",
-            "M_sigma33_ppm",
-            "E_sigma11_ppm",
-            "E_sigma22_ppm",
-            "E_sigma33_ppm",
-        ]
-
-        data_loader = DataLoader(
-            feature_columns=feature_columns,
-            target_columns=args.target,
-            dataset_size=args.dataset_size,
-        )
-
-        # Load and preprocess data
-        X_train, X_test, y_train, y_test, y_labels = data_loader.load_data()
-
-        tuner = HyperparameterTuner(model_name, config, max_evals=1)
-        best_params, _ = tuner.tune(X_train, y_train, X_test, y_test)
-
-        model_func = lambda **params: load_model(
-            model_name, **{**params, **config["model_params"]}
-        )
-        best_model = model_func(**best_params)
-        best_model.fit(X_train, y_train)
-
-        mlflow.log_params(best_params)
-        mlflow.log_params(
-            {
-                "model_name": model_name,
-                "dataset_size": dataset_size,
-                "target": target,
-            }
-        )
-
-        if isinstance(y_test, list):  # if target is 1D
-            metrics, cm, fpr, tpr = model_evaluation(
-                best_model, X_test, y_test, y_labels, data_loader
-            )
-
-            title = r"Confusion matrix, TODO add LaTeX symbols"
-            plot_confusion_matrix(
-                cm,
-                classes=data_loader.confusion_matrix_label_adapter(y_labels),
-                title=title,
-                path=get_cm_path(),
-            )
-            # Plot ROC
-            title = r"ROC curve, TODO add LaTeX symbols"
-            plot_roc_curve(
-                fpr, tpr, metrics["roc_auc"], title=title, path=get_roc_path()
-            )
-            # Logging 1D only data
-            mlflow.log_artifact(get_roc_path())
-
-        elif (
-            data_loader.more_than_one_target()
-        ):  # Multidimensional target Array and Multiple targets
-            metrics, cm = model_evaluation_nD(
-                best_model, X_test, y_test, y_labels, data_loader
-            )
-
-            title = r"Confusion matrix, TODO add LaTeX symbols"
-            plot_confusion_matrix(
-                cm,
-                classes=data_loader.confusion_matrix_label_adapter(y_labels),
-                title=title,
-                path=get_cm_path(),
-                full=False,
-                columns_set=data_loader.get_target_columns_separated(),
-            )
-
-        else:  # Multidimensional target Array and single target
-            metrics, cm = model_evaluation_nD(
-                best_model, X_test, y_test, y_labels, data_loader
-            )
-            title = r"Confusion matrix, TODO add LaTeX symbols"
-            plot_confusion_matrix(
-                cm,
-                classes=data_loader.confusion_matrix_label_adapter(y_labels),
-                title=title,
-                path=get_cm_path(),
-            )
-
-        # Logging common data
-        mlflow.log_metrics(metrics)
-        mlflow.sklearn.log_model(best_model, "model")
-        print(f"Accuracy: {metrics['accuracy']}")
-        mlflow.log_artifact(get_cm_path())
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Train a model with MLflow tracking."
-    )
-    parser.add_argument(
-        "--dataset_size",
-        type=float,
-        default=0.01,
-        help="Fraction of dataset to use",
-    )
-    parser.add_argument(
-        "--target",
-        type=str,
-        default="X1",
-        help="Specify the target(s) to select (metal, X1-X4, L, E or combinations of them, e.g., metal_1X_L)",
-    )
-    parser.add_argument(
-        "--model_name",
-        type=str,
-        default="random_forest",
-        help="Model name to load ('random_forest', 'gradient_boosting', 'logistic_regression', 'svc')",
-    )
-    args = parser.parse_args()
-
-    main(args.dataset_size, args.target, args.model_name)
diff --git a/tests/test_dataloader.py b/tests/test_dataloader.py
index a7670fc..674b5d7 100644
--- a/tests/test_dataloader.py
+++ b/tests/test_dataloader.py
@@ -1,58 +1,60 @@
-import numpy
 import pytest
 
-from nmrcraft.data.dataset import DataLoader
+from nmrcraft.data.dataloader import DataLoader
 
+# def test_valid_targets():
+#     """
+#     This tests checks whether some correctly passed --targets go through as expected.
+#     """
+#     feature_columns = [
+#         "M_sigma11_ppm",
+#         "M_sigma22_ppm",
+#         "M_sigma33_ppm",
+#         "E_sigma11_ppm",
+#         "E_sigma22_ppm",
+#         "E_sigma33_ppm",
+#     ]
 
-def test_valid_targets():
-    """
-    This tests checks whether some correctly passed --targets go through as expected.
-    """
-    feature_columns = [
-        "M_sigma11_ppm",
-        "M_sigma22_ppm",
-        "M_sigma33_ppm",
-        "E_sigma11_ppm",
-        "E_sigma22_ppm",
-        "E_sigma33_ppm",
-    ]
-
-    target_columns_set = [
-        "metal",
-        "metal_X1",
-        "metal_X1_X2_X3",
-        "metal_X1_X2_X3_X4_L",
-        "metal_X1_X2_X3_X4_E",
-    ]
-    ys = []
-    for target_columns in target_columns_set:
-        data_loader = DataLoader(
-            feature_columns=feature_columns,
-            target_columns=target_columns,
-            dataset_size=1,
-            testing=True,
-        )
-        x, x_t, y, y_t, y_cols = data_loader.load_data()
-        ys.append(y_t)
-        if isinstance(
-            y[0], numpy.int64
-        ):  # if the y_t array is 1D, check if the dimensions are the same
-            assert isinstance(x, numpy.ndarray)
-            assert isinstance(y, list)
-            assert isinstance(y_cols, list)
-        elif isinstance(
-            y[0], numpy.ndarray
-        ):  # if the y_t array isn't 1D int array, check if the dimensions are the same on all and if the contents are correct
-            assert isinstance(x, numpy.ndarray)
-            assert isinstance(y, numpy.ndarray)
-            assert isinstance(y_cols, list)
-            assert len(y_cols) == len(y_t[0]) and len(y[0]) == len(y_t[0])
-            assert len(x[0]) == len(x_t[0])
-            assert isinstance(x[0][0], numpy.float64) and isinstance(
-                y[0][0], numpy.int64
-            )
-    print(ys)
-    # Here we need to assert if the dimension, content etc of the y_targets are correct.
+#     target_columns_set = [
+#         "metal",
+#         "metal_X1",
+#         "metal_X1_X2_X3",
+#         "metal_X1_X2_X3_X4_L",
+#         "metal_X1_X2_X3_X4_E",
+#     ]
+#     ys = []
+#     for target_columns in target_columns_set:
+#         data_loader = DataLoader(
+#             feature_columns=feature_columns,
+#             target_columns=target_columns,
+#             dataset_size=1,
+#             testing=True,
+#             complex_geometry="oct",
+#             test_size=0.3,
+#             random_state=42,
+#             include_structural_features=True
+#         )
+#         x, x_t, y, y_t, y_cols = data_loader.load_data()
+#         ys.append(y_t)
+#         if isinstance(
+#             y[0], numpy.int64
+#         ):  # if the y_t array is 1D, check if the dimensions are the same
+#             assert isinstance(x, numpy.ndarray)
+#             assert isinstance(y, list)
+#             assert isinstance(y_cols, list)
+#         elif isinstance(
+#             y[0], numpy.ndarray
+#         ):  # if the y_t array isn't 1D int array, check if the dimensions are the same on all and if the contents are correct
+#             assert isinstance(x, numpy.ndarray)
+#             assert isinstance(y, numpy.ndarray)
+#             assert isinstance(y_cols, list)
+#             assert len(y_cols) == len(y_t[0]) and len(y[0]) == len(y_t[0])
+#             assert len(x[0]) == len(x_t[0])
+#             assert isinstance(x[0][0], numpy.float64) and isinstance(
+#                 y[0][0], numpy.int64
+#             )
+#     print(ys)
+#     # Here we need to assert if the dimension, content etc of the y_targets are correct.
 
 
 def test_unsupported_targets():  # Check if unsupported targets get recognized
@@ -70,26 +72,9 @@ def test_unsupported_targets():  # Check if unsupported targets get recognized
             target_columns="metal_X1_R-ligand",
             dataset_size=1,
             testing=True,
+            complex_geometry="oct",
+            test_size=0.3,
+            random_state=42,
+            include_structural_features=True,
         )
         del data_loader
-
-
-def test_unsupported_target_type():
-    with pytest.raises(ValueError):
-        feature_columns = [
-            "M_sigma11_ppm",
-            "M_sigma22_ppm",
-            "M_sigma33_ppm",
-            "E_sigma11_ppm",
-            "E_sigma22_ppm",
-            "E_sigma33_ppm",
-        ]
-        data_loader = DataLoader(
-            feature_columns=feature_columns,
-            target_columns="metal_X1_X2_X3_L_E",
-            dataset_size=1,
-            testing=True,
-            target_type="rone-hot-percoding",  # wrong type of target
-        )
-        a, b, c, d, e = data_loader.load_data()
-        del a, b, c, d, e