Skip to content

Commit

Permalink
Merge pull request #90 from ImperialCollegeLondon/add-temperature-data
Browse files Browse the repository at this point in the history
Add methods for importing external data
  • Loading branch information
tomjholland authored Aug 14, 2024
2 parents 8846410 + 0bf97dc commit d3699ea
Show file tree
Hide file tree
Showing 20 changed files with 424 additions and 125 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/sphinx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ jobs:
- name: Gather dependencies
run: |
pip install -U pip
pip install .
pip install -r requirements-dev.txt
pip install .
pip install ipykernel
sudo apt-get install pandoc
Expand Down
2 changes: 1 addition & 1 deletion docs/source/developer_guide/developer_installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Installation
To install PyProBE you must be running Python 3.11 or later. It is recommended to use a
virtual environment to install PyProBE, for example venv or conda.

The steps to install PyProBE wih developer settings are as follows:
The steps to install PyProBE with developer settings are as follows:

1. Enter a directory in which you wish to install PyProBE:

Expand Down
11 changes: 11 additions & 0 deletions docs/source/user_guide/importing_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,17 @@ parametrically. The steps are as follows:
filename = output_name_generator,
filename_inputs = ["Cycler", "Channel"])
Adding data not from a cycler
-----------------------------
In your battery experiment, it is likely that you will be collecting data from sources
additional to your battery cycler. This can be added to your `~pyprobe.filters.Procedure`
object after it has been created with its :func:`~pyprobe.filters.Procedure.add_external_data`
method.

The data that you provide must be timeseries, with a column that can be interpreted in
DateTime format. This is usually a string that may appear like: `"2024-02-29 09:19:58.554"`.
PyProBE will interpolate your data into the time series of the cycling data already there,
so it will be filtered as normal.


.. footbibliography::
8 changes: 4 additions & 4 deletions pyprobe/analysis/cycling.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,22 +47,22 @@ def summary(self, dchg_before_chg: bool = True) -> Result:
)

self._create_capacity_throughput()
lf_capacity_throughput = self.input_data.base_dataframe.groupby(
lf_capacity_throughput = self.input_data.base_dataframe.group_by(
"Cycle", maintain_order=True
).agg(pl.col("Capacity Throughput [Ah]").first())
lf_time = self.input_data.base_dataframe.groupby(
lf_time = self.input_data.base_dataframe.group_by(
"Cycle", maintain_order=True
).agg(pl.col("Time [s]").first())

lf_charge = (
self.input_data.charge()
.base_dataframe.groupby("Cycle", maintain_order=True)
.base_dataframe.group_by("Cycle", maintain_order=True)
.agg(pl.col("Capacity [Ah]").max() - pl.col("Capacity [Ah]").min())
.rename({"Capacity [Ah]": "Charge Capacity [Ah]"})
)
lf_discharge = (
self.input_data.discharge()
.base_dataframe.groupby("Cycle", maintain_order=True)
.base_dataframe.group_by("Cycle", maintain_order=True)
.agg(pl.col("Capacity [Ah]").max() - pl.col("Capacity [Ah]").min())
.rename({"Capacity [Ah]": "Discharge Capacity [Ah]"})
)
Expand Down
39 changes: 35 additions & 4 deletions pyprobe/cyclers/basecycler.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def model_post_init(self, __context: Any) -> None:
"""Post initialization method for the BaseModel."""
dataframe_list = self.get_dataframe_list(self.input_data_path)
self._imported_dataframe = self.get_imported_dataframe(dataframe_list)
self._dataframe_columns = self._imported_dataframe.columns

@staticmethod
def read_file(filepath: str) -> pl.DataFrame | pl.LazyFrame:
Expand Down Expand Up @@ -73,10 +72,10 @@ def get_dataframe_list(
files = glob.glob(input_data_path)
files = self._sort_files(files)
list = [self.read_file(file) for file in files]
all_columns = set([col for df in list for col in df.columns])
all_columns = set([col for df in list for col in df.collect_schema().names()])
indices_to_remove = []
for i in range(len(list)):
if len(list[i].columns) < len(all_columns):
if len(list[i].collect_schema().names()) < len(all_columns):
indices_to_remove.append(i)
warnings.warn(
f"File {os.path.basename(files[i])} has missing columns, "
Expand Down Expand Up @@ -151,8 +150,18 @@ def required_columns(self) -> Dict[str, pl.Expr]:
"Current [A]": self.current,
"Voltage [V]": self.voltage,
"Capacity [Ah]": self.capacity,
"Temperature [C]": self.temperature,
}

@property
def _dataframe_columns(self) -> List[str]:
"""The columns of the DataFrame.
Returns:
List[str]: The columns.
"""
return self._imported_dataframe.collect_schema().names()

@property
def pyprobe_dataframe(self) -> pl.DataFrame:
"""The DataFrame containing the required columns.
Expand All @@ -171,7 +180,7 @@ def date(self) -> pl.Expr:
"""
if (
self._imported_dataframe.dtypes[
self._imported_dataframe.columns.index("Date")
self._imported_dataframe.collect_schema().names().index("Date")
]
!= pl.Datetime
):
Expand Down Expand Up @@ -302,6 +311,28 @@ def capacity(self) -> pl.Expr:
else:
return self.capacity_from_ch_dch

@property
def temperature(self) -> pl.Expr:
"""Identify and format the temperature column.
An optional column, if not found, a column of None values is returned.
Returns:
pl.Expr: A polars expression for the temperature column.
"""
try:
return (
self.search_columns(
self._dataframe_columns,
self.column_dict["Temperature"],
self.column_name_pattern,
)
.to_default_name_and_unit()
.cast(pl.Float64)
)
except ValueError:
return pl.lit(None).alias("Temperature [C]")

@property
def cycle(self) -> pl.Expr:
"""Identify the cycle number.
Expand Down
13 changes: 2 additions & 11 deletions pyprobe/cyclers/biologic.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class Biologic(BaseCycler):
"Voltage": "Ecell",
"Charge Capacity": "Q charge",
"Discharge Capacity": "Q discharge",
"Temperature": "Temperature",
}

@staticmethod
Expand Down Expand Up @@ -54,24 +55,14 @@ def read_file(filepath: str) -> pl.DataFrame | pl.LazyFrame:
_, value = start_time_line.split(" : ")
start_time = datetime.strptime(value.strip(), "%m/%d/%Y %H:%M:%S.%f")

columns_to_read = ["time/", "Ns", "I/", "Ecell/", "Q charge/", "Q discharge/"]

all_columns = pl.scan_csv(
filepath, skip_rows=n_header_lines - 1, separator="\t"
).columns
selected_columns = []
for substring in columns_to_read:
found_columns = [col for col in all_columns if substring in col]
selected_columns.extend(found_columns)

dataframe = pl.scan_csv(
filepath,
skip_rows=n_header_lines - 1,
separator="\t",
)

dataframe = dataframe.with_columns(
(pl.col("time/s") * 1000000 + pl.lit(start_time))
(pl.col("time/s").cast(pl.Duration) + pl.lit(start_time))
.cast(pl.Datetime)
.alias("Date")
)
Expand Down
1 change: 1 addition & 0 deletions pyprobe/cyclers/neware.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class Neware(BaseCycler):
"Voltage": "Voltage",
"Charge Capacity": "Chg. Cap.",
"Discharge Capacity": "DChg. Cap.",
"Temperature": "Temperature",
}

@property
Expand Down
2 changes: 1 addition & 1 deletion pyprobe/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def dataframe_with_selections(df: pd.DataFrame) -> List[int]:
df_with_selections,
hide_index=True, # Keep the index visible
column_config={"Select": st.column_config.CheckboxColumn(required=True)},
disabled=df.columns,
disabled=df.collect_schema().names(),
)

# Filter the dataframe using the temporary column, then drop the column
Expand Down
56 changes: 56 additions & 0 deletions pyprobe/filters.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"""A module for the filtering classes."""
import os
import warnings
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union

import polars as pl
Expand Down Expand Up @@ -304,6 +306,60 @@ def experiment_names(self) -> List[str]:
"""
return list(self.titles)

def add_external_data(
self,
filepath: str,
importing_columns: List[str] | Dict[str, str],
date_column_name: str = "Date",
) -> None:
"""Add data from another source to the procedure.
The data must be timestamped, with a column that can be interpreted in
DateTime format. The data will be interpolated to the procedure's time.
Args:
filepath (str): The path to the external file.
importing_columns (List[str] | Dict[str, str]):
The columns to import from the external file. If a list, the columns
will be imported as is. If a dict, the keys are the columns in the data
you want to import and the values are the columns you want to rename
them to.
date_column_name (str, optional):
The name of the date column in the external data. Defaults to "Date".
"""
external_data = self.load_external_file(filepath)
if isinstance(importing_columns, dict):
external_data = external_data.select(
[date_column_name] + list(importing_columns.keys())
)
external_data = external_data.rename(importing_columns)
elif isinstance(importing_columns, list):
external_data = external_data.select([date_column_name] + importing_columns)
self.add_new_data_columns(external_data, date_column_name)

def load_external_file(self, filepath: str) -> pl.LazyFrame:
"""Load an external file into a LazyFrame.
Supported file types are CSV, Parquet, and Excel. For maximum performance,
consider using Parquet files. If you have an Excel file, consider converting
it to CSV before loading.
Args:
filepath (str): The path to the external file.
"""
file = os.path.basename(filepath)
file_ext = os.path.splitext(file)[1]
match file_ext:
case ".csv":
return pl.scan_csv(filepath)
case ".parquet":
return pl.scan_parquet(filepath)
case ".xlsx":
warnings.warn("Excel reading is slow. Consider converting to CSV.")
return pl.read_excel(filepath)
case _:
raise ValueError(f"Unsupported file type: {file_ext}")

@classmethod
def _flatten(cls, lst: int | List[Any]) -> List[int]:
"""Flatten a list of lists into a single list.
Expand Down
4 changes: 3 additions & 1 deletion pyprobe/rawdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ def _check_required_columns(
) -> "RawData":
"""Check if the required columns are present in the input_data."""
missing_columns = [
col for col in required_columns if col not in dataframe.columns
col
for col in required_columns
if col not in dataframe.collect_schema().names()
]
if missing_columns:
raise ValueError(f"Missing required columns: {missing_columns}")
Expand Down
62 changes: 57 additions & 5 deletions pyprobe/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __call__(self, column_name: str) -> NDArray[np.float64]:
)

self._check_units(column_name)
if column_name not in self.data.columns:
if column_name not in self.data.collect_schema().names():
raise ValueError(f"Column '{column_name}' not in data.")
else:
return self.data[column_name].to_numpy()
Expand All @@ -72,7 +72,7 @@ def __getitem__(self, *column_name: str) -> "Result":
column_names = list(column_name)
for col in column_names:
self._check_units(col)
if not all(col in self.data.columns for col in column_names):
if not all(col in self.data.collect_schema().names() for col in column_names):
raise ValueError("One or more columns not in data.")
else:
return Result(base_dataframe=self.data.select(column_names), info=self.info)
Expand Down Expand Up @@ -145,7 +145,7 @@ def _get_filtered_array(
) -> NDArray[np.float64]:
for column_name in filtering_column_names:
self._check_units(column_name)
if column_name not in self.base_dataframe.columns:
if column_name not in self.base_dataframe.collect_schema().names():
raise ValueError(f"Column '{column_name}' not in data.")
frame_to_return = self.base_dataframe.select(filtering_column_names)
if isinstance(frame_to_return, pl.LazyFrame):
Expand All @@ -160,7 +160,7 @@ def _check_units(self, column_name: str) -> None:
Args:
column_name (str): The column name to convert to.
"""
if column_name not in self.base_dataframe.columns:
if column_name not in self.base_dataframe.collect_schema().names():
converter_object = Units(column_name)
if converter_object.input_quantity in self.quantities:
instruction = converter_object.from_default_unit()
Expand Down Expand Up @@ -193,7 +193,7 @@ def quantities(self) -> List[str]:
@property
def column_list(self) -> List[str]:
"""Return a list of the columns in the data."""
return self.base_dataframe.columns
return self.base_dataframe.collect_schema().names()

def define_column(self, column_name: str, definition: str) -> None:
"""Define a new column when it is added to the dataframe.
Expand Down Expand Up @@ -230,6 +230,58 @@ def clean_copy(
column_definitions=column_definitions,
)

def add_new_data_columns(
self, new_data: pl.DataFrame | pl.LazyFrame, date_column_name: str
) -> None:
"""Add new data columns to the result object.
The data must be time series data with a date column.
Args:
new_data (pl.DataFrame | pl.LazyFrame):
The new data to add to the result object.
date_column_name (str):
The name of the column in the new data containing the date.
"""
# get the columns of the new data
new_data_cols = new_data.collect_schema().names()
new_data_cols.remove(date_column_name)
# check if the new data is lazyframe or not
is_new_data_lazy = isinstance(new_data, pl.LazyFrame)
is_base_dataframe_lazy = isinstance(self.base_dataframe, pl.LazyFrame)
if is_new_data_lazy and not is_base_dataframe_lazy:
new_data = new_data.collect()
elif is_base_dataframe_lazy and not is_new_data_lazy:
new_data = new_data.lazy()
if (
new_data.dtypes[new_data.collect_schema().names().index(date_column_name)]
!= pl.Datetime
):
new_data = new_data.with_columns(pl.col(date_column_name).str.to_datetime())

# Ensure both DataFrames have DateTime columns in the same unit
new_data = new_data.with_columns(
pl.col(date_column_name).dt.cast_time_unit("us")
)
self.base_dataframe = self.base_dataframe.with_columns(
pl.col("Date").dt.cast_time_unit("us")
)

new_data = self.base_dataframe.join(
new_data,
left_on="Date",
right_on=date_column_name,
how="full",
coalesce=True,
)
new_data = new_data.with_columns(
pl.col(new_data_cols).interpolate_by("Date")
).select(pl.col(["Date"] + new_data_cols))

self.base_dataframe = self.base_dataframe.join(
new_data, on="Date", how="left", coalesce=True
)

@classmethod
def build(
cls,
Expand Down
2 changes: 2 additions & 0 deletions pyprobe/units.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class Units:
"Ah": "Capacity",
"A.h": "Capacity",
"s": "Time",
"C": "Temperature",
}

def __init__(
Expand Down Expand Up @@ -47,6 +48,7 @@ def _get_default_unit(self, unit: str) -> Tuple[Optional[str], str]:
Returns:
Tuple[Optional[str], str]: The prefix and default unit.
"""
unit = re.sub(r"[^a-zA-Z]", "", unit) # Remove non-alphabetic characters
if unit in self.time_unit_dict.keys():
return None, "s"
if unit[0] in self.prefix_dict:
Expand Down
Loading

0 comments on commit d3699ea

Please sign in to comment.