Merge pull request #154 from ImperialCollegeLondon/add-save-capability

Add archive method and PyBaMM simulation importer
ImperialCollegeLondon · Oct 21, 2024 · f47fb14 · f47fb14
2 parents df72274 + f0ff9a0
commit f47fb14
Show file tree

Hide file tree

Showing 36 changed files with 1,115 additions and 536 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -22,7 +22,7 @@ repos:
     rev: v1.5.1
     hooks:
       - id: mypy
-        additional_dependencies: [types-beautifulsoup4, types-decorator, types-PyYAML, pydantic]
+        additional_dependencies: [types-beautifulsoup4, types-decorator, types-PyYAML, pydantic, types-toml]
   - repo: https://github.com/igorshubovych/markdownlint-cli
     rev: v0.35.0
     hooks:

diff --git a/docs/source/examples/comparing-pyprobe-performance.ipynb b/docs/source/examples/comparing-pyprobe-performance.ipynb
@@ -103,6 +103,12 @@
     "            csv_time[repeat]= timeit.default_timer() - start_time\n",
     "        start_time = timeit.default_timer()\n",
     "        df = pd.read_parquet(data_directory + '/' + file)\n",
+    "        # Add a column to identify the cycle number\n",
+    "        df['Cycle'] = (\n",
+    "            (df['Step'].astype(int) - df['Step'].astype(int).shift() < 0)\n",
+    "            .fillna(0)\n",
+    "            .cumsum()\n",
+    "        )\n",
     "        cumulative_time[0, repeat] = timeit.default_timer() - start_time\n",
     "\n",
     "        experiment = df[df['Step'].isin([4, 5, 6, 7])]\n",
@@ -240,20 +246,18 @@
     "repeated_data = pl.concat([data] * n_repeats)\n",
     "\n",
     "# Repeat the 'Cycle' and 'Event' columns to match the length of the repeated data\n",
-    "cycle_repeated = pl.concat([data['Cycle']] * n_repeats)\n",
     "event_repeated = pl.concat([data['Event']] * n_repeats)\n",
     "step_repeated = pl.concat([data['Step']] * n_repeats)\n",
     "time_repeated = pl.concat([data['Time [s]']]* n_repeats)\n",
     "\n",
     "# Increment the 'Cycle' and 'Event' columns\n",
-    "cycle_increment = data['Cycle'].max() + 1\n",
     "event_increment = data['Event'].max() + 1\n",
     "step_increment = data['Step'].max() + 1\n",
     "time_increment = data['Time [s]'].max()\n",
     "\n",
     "\n",
     "repeated_data = repeated_data.with_columns([\n",
-    "    (pl.arange(0, len(repeated_data)) // len(data) * cycle_increment + cycle_repeated).alias('Cycle'),\n",
+    "    # (pl.arange(0, len(repeated_data)) // len(data) * cycle_increment + cycle_repeated).alias('Cycle'),\n",
     "    (pl.arange(0, len(repeated_data)) // len(data) * event_increment + event_repeated).alias('Event'),\n",
     "    (pl.arange(0, len(repeated_data)) // len(data) * event_increment + step_repeated).alias('Step'),\n",
     "    (pl.arange(0, len(repeated_data)) // len(data) * time_increment + time_repeated).alias('Time [s]'),\n",

diff --git a/pyprobe/__init__.py b/pyprobe/__init__.py
@@ -1,5 +1,5 @@
 """The PyProBE package."""
-from .cell import Cell, make_cell_list  # noqa: F401
+from .cell import Cell, __version__, load_archive, make_cell_list  # noqa: F401
 from .dashboard import launch_dashboard  # noqa: F401
 from .plot import Plot  # noqa: F401
 from .result import Result  # noqa: F401
diff --git a/pyprobe/analysis/cycling.py b/pyprobe/analysis/cycling.py
@@ -5,7 +5,7 @@
 from pydantic import BaseModel
 
 from pyprobe.analysis.utils import AnalysisValidator
-from pyprobe.filters import Experiment
+from pyprobe.filters import Experiment, get_cycle_column
 from pyprobe.result import Result
 
 
@@ -42,6 +42,7 @@ def summary(self, dchg_before_chg: bool = True) -> Result:
         AnalysisValidator(
             input_data=self.input_data, required_columns=["Capacity [Ah]", "Time [s]"]
         )
+        self.input_data.base_dataframe = get_cycle_column(self.input_data)
 
         self._create_capacity_throughput()
         lf_capacity_throughput = self.input_data.base_dataframe.group_by(

diff --git a/pyprobe/analysis/degradation_mode_analysis.py b/pyprobe/analysis/degradation_mode_analysis.py
@@ -224,10 +224,10 @@ def quantify_degradation_modes(
         self.dma_result = electrode_capacity_results[0].clean_copy(
             pl.DataFrame(
                 {
-                    "SOH": SOH,
-                    "LAM_pe": LAM_pe,
-                    "LAM_ne": LAM_ne,
-                    "LLI": LLI,
+                    "SOH": SOH[:, 0],
+                    "LAM_pe": LAM_pe[:, 0],
+                    "LAM_ne": LAM_ne[:, 0],
+                    "LLI": LLI[:, 0],
                 }
             )
         )

diff --git a/pyprobe/cell.py b/pyprobe/cell.py
@@ -1,17 +1,23 @@
 """Module for the Cell class."""
+import json
 import os
+import shutil
 import time
 import warnings
+import zipfile
 from typing import Callable, Dict, List, Optional
 
 import distinctipy
 import polars as pl
+import pybamm.solvers.solution
 from pydantic import BaseModel, Field, field_validator, validate_call
 
 from pyprobe.cyclers import arbin, basecycler, basytec, biologic, maccor, neware
 from pyprobe.filters import Procedure
 from pyprobe.readme_processor import process_readme
 
+__version__ = "1.0.3"
+
 
 class Cell(BaseModel):
     """A class for a cell in a battery experiment."""
@@ -193,12 +199,9 @@ def add_procedure(
         readme = process_readme(readme_path)
 
         self.procedure[procedure_name] = Procedure(
-            titles=readme.titles,
-            steps_idx=readme.step_numbers,
+            readme_dict=readme.experiment_dict,
             base_dataframe=base_dataframe,
             info=self.info,
-            pybamm_experiment=readme.pybamm_experiment,
-            pybamm_experiment_list=readme.pybamm_experiment_list,
         )
 
     @staticmethod
@@ -287,6 +290,234 @@ def _get_data_paths(
         data_path = os.path.join(folder_path, filename_str)
         return data_path
 
+    def import_pybamm_solution(
+        self,
+        procedure_name: str,
+        experiment_names: List[str] | str,
+        pybamm_solutions: List[pybamm.solvers.solution] | pybamm.solvers.solution,
+        output_data_path: Optional[str] = None,
+        optional_variables: Optional[List[str]] = None,
+    ) -> None:
+        """Import a PyBaMM solution object into a procedure of the cell.
+
+        Filtering a PyBaMM solution object by cycle and step reflects the behaviour of
+        the :code:`cycles` and :code:`steps` dictionaries of the PyBaMM solution object.
+
+        Multiple experiments can be imported into the same procedure. This is achieved
+        by providing multiple solution objects and experiment names.
+
+        This method optionally writes the data to a parquet file, if a data path is
+        provided.
+
+        Args:
+            procedure_name (str):
+                A name to give the procedure. This will be used when calling
+                :code:`cell.procedure[procedure_name]`.
+            pybamm_solutions (list or pybamm_solution):
+                A list of PyBaMM solution objects or a single PyBaMM solution object.
+            experiment_names (list or str):
+                A list of experiment names or a single experiment name to assign to the
+                PyBaMM solution object.
+            output_data_path (str, optional):
+                The path to write the parquet file. Defaults to None.
+            optional_variables (list, optional):
+                A list of variables to import from the PyBaMM solution object in
+                addition to the PyProBE required variables. Defaults to None.
+        """
+        # the minimum required variables to import from the PyBaMM solution object
+        required_variables = [
+            "Time [s]",
+            "Current [A]",
+            "Terminal voltage [V]",
+            "Discharge capacity [A.h]",
+        ]
+
+        # get the list of variables to import from the PyBaMM solution object
+        if optional_variables is not None:
+            import_variables = required_variables + optional_variables
+        else:
+            import_variables = required_variables
+
+        # check if the experiment names and PyBaMM solutions are lists
+        if isinstance(experiment_names, list) and isinstance(pybamm_solutions, list):
+            if len(experiment_names) != len(pybamm_solutions):
+                raise ValueError(
+                    "The number of experiment names and PyBaMM solutions must be equal."
+                )
+        elif isinstance(experiment_names, list) != isinstance(pybamm_solutions, list):
+            if isinstance(experiment_names, list):
+                raise ValueError(
+                    "A list of experiment names must be provided with a list of PyBaMM"
+                    " solutions."
+                )
+            else:
+                raise ValueError(
+                    "A single experiment name must be provided with a single PyBaMM"
+                    " solution."
+                )
+        else:
+            experiment_names = [str(experiment_names)]
+            pybamm_solutions = [pybamm_solutions]
+
+        lazyframe_created = False
+        for experiment_name, pybamm_solution in zip(experiment_names, pybamm_solutions):
+            # get the data from the PyBaMM solution object
+            pybamm_data = pybamm_solution.get_data_dict(import_variables)
+            # convert the PyBaMM data to a polars dataframe and add the experiment name
+            # as a column
+            solution_data = pl.LazyFrame(pybamm_data).with_columns(
+                pl.lit(experiment_name).alias("Experiment")
+            )
+            if lazyframe_created is False:
+                all_solution_data = solution_data
+                lazyframe_created = True
+            else:
+                # join the new solution data with the existing solution data, a right
+                # join is used to keep all the data
+                all_solution_data = all_solution_data.join(
+                    solution_data, on=import_variables + ["Step"], how="right"
+                )
+                # fill null values where the experiment has been extended with the newly
+                #  joined experiment name
+                all_solution_data = all_solution_data.with_columns(
+                    pl.col("Experiment").fill_null(pl.col("Experiment_right"))
+                )
+        # get the maximum step number for each experiment
+        max_steps = (
+            all_solution_data.group_by("Experiment")
+            .agg(pl.max("Step").alias("Max Step"))
+            .sort("Experiment")
+            .with_columns(pl.col("Max Step").cum_sum().shift())
+        )
+        # add the maximum step number from the previous experiment to the step number
+        all_solution_data = all_solution_data.join(
+            max_steps, on="Experiment", how="left"
+        ).with_columns(
+            (pl.col("Step") + pl.col("Max Step").fill_null(-1) + 1).alias("Step")
+        )
+        # get the range of step values for each experiment
+        step_ranges = all_solution_data.group_by("Experiment").agg(
+            pl.arange(pl.col("Step").min(), pl.col("Step").max() + 1).alias(
+                "Step Range"
+            )
+        )
+
+        # create a dictionary of the experiment names and the step ranges
+        experiment_dict = {}
+        for row in step_ranges.collect().iter_rows():
+            experiment = row[0]
+            experiment_dict[experiment] = {"Steps": row[1]}
+            experiment_dict[experiment]["Step Descriptions"] = []
+
+        # reformat the data to the PyProBE format
+        base_dataframe = all_solution_data.select(
+            [
+                pl.col("Time [s]"),
+                pl.col("Current [A]") * -1,
+                pl.col("Terminal voltage [V]").alias("Voltage [V]"),
+                (pl.col("Discharge capacity [A.h]") * -1).alias("Capacity [Ah]"),
+                pl.col("Step"),
+                (
+                    (
+                        pl.col("Step").cast(pl.Int64)
+                        - pl.col("Step").cast(pl.Int64).shift()
+                        != 0
+                    )
+                    .fill_null(strategy="zero")
+                    .cum_sum()
+                    .alias("Event")
+                ),
+            ]
+        )
+        # create the procedure object
+        self.procedure[procedure_name] = Procedure(
+            base_dataframe=base_dataframe, info=self.info, readme_dict=experiment_dict
+        )
+
+        # write the data to a parquet file if a path is provided
+        if output_data_path is not None:
+            if not output_data_path.endswith(".parquet"):
+                output_data_path += ".parquet"
+            base_dataframe.collect().write_parquet(output_data_path)
+
+    def archive(self, path: str) -> None:
+        """Archive the cell object.
+
+        Args:
+            path (str): The path to the archive directory or zip file.
+        """
+        if path.endswith(".zip"):
+            zip = True
+            path = path[:-4]
+        else:
+            zip = False
+        if not os.path.exists(path):
+            os.makedirs(path)
+        metadata = self.dict()
+        metadata["PyProBE Version"] = __version__
+        for procedure_name, procedure in self.procedure.items():
+            if isinstance(procedure.base_dataframe, pl.LazyFrame):
+                df = procedure.base_dataframe.collect()
+            else:
+                df = procedure.base_dataframe
+            # write the dataframe to a parquet file
+            filename = procedure_name + ".parquet"
+            filepath = os.path.join(path, filename)
+            df.write_parquet(filepath)
+            # update the metadata with the filename
+            metadata["procedure"][procedure_name]["base_dataframe"] = filename
+        with open(os.path.join(path, "metadata.json"), "w") as f:
+            json.dump(metadata, f)
+
+        if zip:
+            with zipfile.ZipFile(path + ".zip", "w") as zipf:
+                for root, _, files in os.walk(path):
+                    for file in files:
+                        file_path = os.path.join(root, file)
+                        arcname = os.path.relpath(file_path, path)
+                        zipf.write(file_path, arcname)
+            # Delete the original directory
+            shutil.rmtree(path)
+
+
+def load_archive(path: str) -> Cell:
+    """Load a cell object from an archive.
+
+    Args:
+        path (str): The path to the archive directory.
+
+    Returns:
+        Cell: The cell object.
+    """
+    if path.endswith(".zip"):
+        extract_path = path[:-4]
+        with zipfile.ZipFile(path, "r") as zipf:
+            with zipfile.ZipFile(path, "r") as zipf:
+                zipf.extractall(extract_path)
+            # Delete the original zip file
+            os.remove(path)
+            archive_path = extract_path
+    else:
+        archive_path = path
+
+    with open(os.path.join(archive_path, "metadata.json"), "r") as f:
+        metadata = json.load(f)
+    if metadata["PyProBE Version"] != __version__:
+        warnings.warn(
+            f"The PyProBE version used to archive the cell was "
+            f"{metadata['PyProBE Version']}, the current version is "
+            f"{__version__}. There may be compatibility"
+            f" issues."
+        )
+    metadata.pop("PyProBE Version")
+    for procedure in metadata["procedure"].values():
+        procedure["base_dataframe"] = os.path.join(
+            archive_path, procedure["base_dataframe"]
+        )
+    cell = Cell(**metadata)
+
+    return cell
+
 
 def make_cell_list(
     record_filepath: str,

diff --git a/pyprobe/cyclers/basecycler.py b/pyprobe/cyclers/basecycler.py
@@ -302,7 +302,6 @@ def _assign_instructions(self) -> None:
             "Capacity": self.capacity,
             "Temperature": self.temperature,
             "Step": self.step,
-            "Cycle": self.cycle,
             "Event": self.event,
         }
         for quantity in self._column_map.keys():
@@ -353,7 +352,6 @@ def pyprobe_dataframe(self) -> pl.DataFrame:
         required_columns = [
             self.date if "Date" in self._column_map.keys() else None,
             self.time,
-            self.cycle,
             self.step,
             self.event,
             self.current,
@@ -480,24 +478,6 @@ def step(self) -> pl.Expr:
         """
         return pl.col("Step")
 
-    @property
-    def cycle(self) -> pl.Expr:
-        """Identify the cycle number.
-
-        Cycles are defined by repetition of steps. They are identified by a decrease
-        in the step number.
-
-        Returns:
-            pl.Expr: A polars expression for the cycle number.
-        """
-        return (
-            (pl.col("Step").cast(pl.Int64) - pl.col("Step").cast(pl.Int64).shift() < 0)
-            .fill_null(strategy="zero")
-            .cum_sum()
-            .alias("Cycle")
-            .cast(pl.Int64)
-        )
-
     @property
     def event(self) -> pl.Expr:
         """Identify the event number.