Skip to content

Commit

Permalink
Merge pull request #154 from ImperialCollegeLondon/add-save-capability
Browse files Browse the repository at this point in the history
Add archive method and PyBaMM simulation importer
  • Loading branch information
tomjholland authored Oct 21, 2024
2 parents df72274 + f0ff9a0 commit f47fb14
Show file tree
Hide file tree
Showing 36 changed files with 1,115 additions and 536 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ repos:
rev: v1.5.1
hooks:
- id: mypy
additional_dependencies: [types-beautifulsoup4, types-decorator, types-PyYAML, pydantic]
additional_dependencies: [types-beautifulsoup4, types-decorator, types-PyYAML, pydantic, types-toml]
- repo: https://github.com/igorshubovych/markdownlint-cli
rev: v0.35.0
hooks:
Expand Down
10 changes: 7 additions & 3 deletions docs/source/examples/comparing-pyprobe-performance.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@
" csv_time[repeat]= timeit.default_timer() - start_time\n",
" start_time = timeit.default_timer()\n",
" df = pd.read_parquet(data_directory + '/' + file)\n",
" # Add a column to identify the cycle number\n",
" df['Cycle'] = (\n",
" (df['Step'].astype(int) - df['Step'].astype(int).shift() < 0)\n",
" .fillna(0)\n",
" .cumsum()\n",
" )\n",
" cumulative_time[0, repeat] = timeit.default_timer() - start_time\n",
"\n",
" experiment = df[df['Step'].isin([4, 5, 6, 7])]\n",
Expand Down Expand Up @@ -240,20 +246,18 @@
"repeated_data = pl.concat([data] * n_repeats)\n",
"\n",
"# Repeat the 'Cycle' and 'Event' columns to match the length of the repeated data\n",
"cycle_repeated = pl.concat([data['Cycle']] * n_repeats)\n",
"event_repeated = pl.concat([data['Event']] * n_repeats)\n",
"step_repeated = pl.concat([data['Step']] * n_repeats)\n",
"time_repeated = pl.concat([data['Time [s]']]* n_repeats)\n",
"\n",
"# Increment the 'Cycle' and 'Event' columns\n",
"cycle_increment = data['Cycle'].max() + 1\n",
"event_increment = data['Event'].max() + 1\n",
"step_increment = data['Step'].max() + 1\n",
"time_increment = data['Time [s]'].max()\n",
"\n",
"\n",
"repeated_data = repeated_data.with_columns([\n",
" (pl.arange(0, len(repeated_data)) // len(data) * cycle_increment + cycle_repeated).alias('Cycle'),\n",
" # (pl.arange(0, len(repeated_data)) // len(data) * cycle_increment + cycle_repeated).alias('Cycle'),\n",
" (pl.arange(0, len(repeated_data)) // len(data) * event_increment + event_repeated).alias('Event'),\n",
" (pl.arange(0, len(repeated_data)) // len(data) * event_increment + step_repeated).alias('Step'),\n",
" (pl.arange(0, len(repeated_data)) // len(data) * time_increment + time_repeated).alias('Time [s]'),\n",
Expand Down
2 changes: 1 addition & 1 deletion pyprobe/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""The PyProBE package."""
from .cell import Cell, make_cell_list # noqa: F401
from .cell import Cell, __version__, load_archive, make_cell_list # noqa: F401
from .dashboard import launch_dashboard # noqa: F401
from .plot import Plot # noqa: F401
from .result import Result # noqa: F401
3 changes: 2 additions & 1 deletion pyprobe/analysis/cycling.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pydantic import BaseModel

from pyprobe.analysis.utils import AnalysisValidator
from pyprobe.filters import Experiment
from pyprobe.filters import Experiment, get_cycle_column
from pyprobe.result import Result


Expand Down Expand Up @@ -42,6 +42,7 @@ def summary(self, dchg_before_chg: bool = True) -> Result:
AnalysisValidator(
input_data=self.input_data, required_columns=["Capacity [Ah]", "Time [s]"]
)
self.input_data.base_dataframe = get_cycle_column(self.input_data)

self._create_capacity_throughput()
lf_capacity_throughput = self.input_data.base_dataframe.group_by(
Expand Down
8 changes: 4 additions & 4 deletions pyprobe/analysis/degradation_mode_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,10 +224,10 @@ def quantify_degradation_modes(
self.dma_result = electrode_capacity_results[0].clean_copy(
pl.DataFrame(
{
"SOH": SOH,
"LAM_pe": LAM_pe,
"LAM_ne": LAM_ne,
"LLI": LLI,
"SOH": SOH[:, 0],
"LAM_pe": LAM_pe[:, 0],
"LAM_ne": LAM_ne[:, 0],
"LLI": LLI[:, 0],
}
)
)
Expand Down
239 changes: 235 additions & 4 deletions pyprobe/cell.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,23 @@
"""Module for the Cell class."""
import json
import os
import shutil
import time
import warnings
import zipfile
from typing import Callable, Dict, List, Optional

import distinctipy
import polars as pl
import pybamm.solvers.solution
from pydantic import BaseModel, Field, field_validator, validate_call

from pyprobe.cyclers import arbin, basecycler, basytec, biologic, maccor, neware
from pyprobe.filters import Procedure
from pyprobe.readme_processor import process_readme

__version__ = "1.0.3"


class Cell(BaseModel):
"""A class for a cell in a battery experiment."""
Expand Down Expand Up @@ -193,12 +199,9 @@ def add_procedure(
readme = process_readme(readme_path)

self.procedure[procedure_name] = Procedure(
titles=readme.titles,
steps_idx=readme.step_numbers,
readme_dict=readme.experiment_dict,
base_dataframe=base_dataframe,
info=self.info,
pybamm_experiment=readme.pybamm_experiment,
pybamm_experiment_list=readme.pybamm_experiment_list,
)

@staticmethod
Expand Down Expand Up @@ -287,6 +290,234 @@ def _get_data_paths(
data_path = os.path.join(folder_path, filename_str)
return data_path

def import_pybamm_solution(
self,
procedure_name: str,
experiment_names: List[str] | str,
pybamm_solutions: List[pybamm.solvers.solution] | pybamm.solvers.solution,
output_data_path: Optional[str] = None,
optional_variables: Optional[List[str]] = None,
) -> None:
"""Import a PyBaMM solution object into a procedure of the cell.
Filtering a PyBaMM solution object by cycle and step reflects the behaviour of
the :code:`cycles` and :code:`steps` dictionaries of the PyBaMM solution object.
Multiple experiments can be imported into the same procedure. This is achieved
by providing multiple solution objects and experiment names.
This method optionally writes the data to a parquet file, if a data path is
provided.
Args:
procedure_name (str):
A name to give the procedure. This will be used when calling
:code:`cell.procedure[procedure_name]`.
pybamm_solutions (list or pybamm_solution):
A list of PyBaMM solution objects or a single PyBaMM solution object.
experiment_names (list or str):
A list of experiment names or a single experiment name to assign to the
PyBaMM solution object.
output_data_path (str, optional):
The path to write the parquet file. Defaults to None.
optional_variables (list, optional):
A list of variables to import from the PyBaMM solution object in
addition to the PyProBE required variables. Defaults to None.
"""
# the minimum required variables to import from the PyBaMM solution object
required_variables = [
"Time [s]",
"Current [A]",
"Terminal voltage [V]",
"Discharge capacity [A.h]",
]

# get the list of variables to import from the PyBaMM solution object
if optional_variables is not None:
import_variables = required_variables + optional_variables
else:
import_variables = required_variables

# check if the experiment names and PyBaMM solutions are lists
if isinstance(experiment_names, list) and isinstance(pybamm_solutions, list):
if len(experiment_names) != len(pybamm_solutions):
raise ValueError(
"The number of experiment names and PyBaMM solutions must be equal."
)
elif isinstance(experiment_names, list) != isinstance(pybamm_solutions, list):
if isinstance(experiment_names, list):
raise ValueError(
"A list of experiment names must be provided with a list of PyBaMM"
" solutions."
)
else:
raise ValueError(
"A single experiment name must be provided with a single PyBaMM"
" solution."
)
else:
experiment_names = [str(experiment_names)]
pybamm_solutions = [pybamm_solutions]

lazyframe_created = False
for experiment_name, pybamm_solution in zip(experiment_names, pybamm_solutions):
# get the data from the PyBaMM solution object
pybamm_data = pybamm_solution.get_data_dict(import_variables)
# convert the PyBaMM data to a polars dataframe and add the experiment name
# as a column
solution_data = pl.LazyFrame(pybamm_data).with_columns(
pl.lit(experiment_name).alias("Experiment")
)
if lazyframe_created is False:
all_solution_data = solution_data
lazyframe_created = True
else:
# join the new solution data with the existing solution data, a right
# join is used to keep all the data
all_solution_data = all_solution_data.join(
solution_data, on=import_variables + ["Step"], how="right"
)
# fill null values where the experiment has been extended with the newly
# joined experiment name
all_solution_data = all_solution_data.with_columns(
pl.col("Experiment").fill_null(pl.col("Experiment_right"))
)
# get the maximum step number for each experiment
max_steps = (
all_solution_data.group_by("Experiment")
.agg(pl.max("Step").alias("Max Step"))
.sort("Experiment")
.with_columns(pl.col("Max Step").cum_sum().shift())
)
# add the maximum step number from the previous experiment to the step number
all_solution_data = all_solution_data.join(
max_steps, on="Experiment", how="left"
).with_columns(
(pl.col("Step") + pl.col("Max Step").fill_null(-1) + 1).alias("Step")
)
# get the range of step values for each experiment
step_ranges = all_solution_data.group_by("Experiment").agg(
pl.arange(pl.col("Step").min(), pl.col("Step").max() + 1).alias(
"Step Range"
)
)

# create a dictionary of the experiment names and the step ranges
experiment_dict = {}
for row in step_ranges.collect().iter_rows():
experiment = row[0]
experiment_dict[experiment] = {"Steps": row[1]}
experiment_dict[experiment]["Step Descriptions"] = []

# reformat the data to the PyProBE format
base_dataframe = all_solution_data.select(
[
pl.col("Time [s]"),
pl.col("Current [A]") * -1,
pl.col("Terminal voltage [V]").alias("Voltage [V]"),
(pl.col("Discharge capacity [A.h]") * -1).alias("Capacity [Ah]"),
pl.col("Step"),
(
(
pl.col("Step").cast(pl.Int64)
- pl.col("Step").cast(pl.Int64).shift()
!= 0
)
.fill_null(strategy="zero")
.cum_sum()
.alias("Event")
),
]
)
# create the procedure object
self.procedure[procedure_name] = Procedure(
base_dataframe=base_dataframe, info=self.info, readme_dict=experiment_dict
)

# write the data to a parquet file if a path is provided
if output_data_path is not None:
if not output_data_path.endswith(".parquet"):
output_data_path += ".parquet"
base_dataframe.collect().write_parquet(output_data_path)

def archive(self, path: str) -> None:
"""Archive the cell object.
Args:
path (str): The path to the archive directory or zip file.
"""
if path.endswith(".zip"):
zip = True
path = path[:-4]
else:
zip = False
if not os.path.exists(path):
os.makedirs(path)
metadata = self.dict()
metadata["PyProBE Version"] = __version__
for procedure_name, procedure in self.procedure.items():
if isinstance(procedure.base_dataframe, pl.LazyFrame):
df = procedure.base_dataframe.collect()
else:
df = procedure.base_dataframe
# write the dataframe to a parquet file
filename = procedure_name + ".parquet"
filepath = os.path.join(path, filename)
df.write_parquet(filepath)
# update the metadata with the filename
metadata["procedure"][procedure_name]["base_dataframe"] = filename
with open(os.path.join(path, "metadata.json"), "w") as f:
json.dump(metadata, f)

if zip:
with zipfile.ZipFile(path + ".zip", "w") as zipf:
for root, _, files in os.walk(path):
for file in files:
file_path = os.path.join(root, file)
arcname = os.path.relpath(file_path, path)
zipf.write(file_path, arcname)
# Delete the original directory
shutil.rmtree(path)


def load_archive(path: str) -> Cell:
"""Load a cell object from an archive.
Args:
path (str): The path to the archive directory.
Returns:
Cell: The cell object.
"""
if path.endswith(".zip"):
extract_path = path[:-4]
with zipfile.ZipFile(path, "r") as zipf:
with zipfile.ZipFile(path, "r") as zipf:
zipf.extractall(extract_path)
# Delete the original zip file
os.remove(path)
archive_path = extract_path
else:
archive_path = path

with open(os.path.join(archive_path, "metadata.json"), "r") as f:
metadata = json.load(f)
if metadata["PyProBE Version"] != __version__:
warnings.warn(
f"The PyProBE version used to archive the cell was "
f"{metadata['PyProBE Version']}, the current version is "
f"{__version__}. There may be compatibility"
f" issues."
)
metadata.pop("PyProBE Version")
for procedure in metadata["procedure"].values():
procedure["base_dataframe"] = os.path.join(
archive_path, procedure["base_dataframe"]
)
cell = Cell(**metadata)

return cell


def make_cell_list(
record_filepath: str,
Expand Down
20 changes: 0 additions & 20 deletions pyprobe/cyclers/basecycler.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,6 @@ def _assign_instructions(self) -> None:
"Capacity": self.capacity,
"Temperature": self.temperature,
"Step": self.step,
"Cycle": self.cycle,
"Event": self.event,
}
for quantity in self._column_map.keys():
Expand Down Expand Up @@ -353,7 +352,6 @@ def pyprobe_dataframe(self) -> pl.DataFrame:
required_columns = [
self.date if "Date" in self._column_map.keys() else None,
self.time,
self.cycle,
self.step,
self.event,
self.current,
Expand Down Expand Up @@ -480,24 +478,6 @@ def step(self) -> pl.Expr:
"""
return pl.col("Step")

@property
def cycle(self) -> pl.Expr:
"""Identify the cycle number.
Cycles are defined by repetition of steps. They are identified by a decrease
in the step number.
Returns:
pl.Expr: A polars expression for the cycle number.
"""
return (
(pl.col("Step").cast(pl.Int64) - pl.col("Step").cast(pl.Int64).shift() < 0)
.fill_null(strategy="zero")
.cum_sum()
.alias("Cycle")
.cast(pl.Int64)
)

@property
def event(self) -> pl.Expr:
"""Identify the event number.
Expand Down
Loading

0 comments on commit f47fb14

Please sign in to comment.