Skip to content

Commit

Permalink
implemented unit check for extensions char
Browse files Browse the repository at this point in the history
  • Loading branch information
konstantinstadler committed Aug 12, 2024
1 parent 2b3f875 commit 4b13d37
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 17 deletions.
61 changes: 46 additions & 15 deletions pymrio/core/mriosystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -1934,7 +1934,10 @@ def characterize(

def convert(self, df_map, extension_name,
agg_func="sum",
drop_not_bridged_index=True):
drop_not_bridged_index=True,
unit_column_orig="unit_orig",
unit_column_new="unit_new",
ignore_columns=None):
""" Apply the convert function to all dataframes in the extension
Parameters
Expand Down Expand Up @@ -2001,27 +2004,55 @@ def convert(self, df_map, extension_name,
make a bridge column for the ones to be dropped and map all to the same name.
Then drop this index level after the conversion.
unit_column_orig : str, optional
Name of the column in df_map with the original unit.
This will be used to check if the unit matches the original unit in the extension.
Default is "unit_orig", if None, no check is performed.
unit_column_new : str, optional
Name of the column in df_map with the new unit to be assigned to the new extension.
Default is "unit_new", if None same unit as in df_orig TODO EXPLAIN BETTER, THINK WARNING
ignore_columns : list, optional
List of column names in df_map which should be ignored.
These could be columns with additional information, etc.
The unit columns given in unit_column_orig and unit_column_new
are ignored by default.
TODO: remove after explain
Extension for extensions:
extension ... extension name
unit_orig ... the original unit (optional, for double check with the unit)
unit_new ... the new unit to be set for the extension
"""
# unit, unit_new, unit_orig
if "unit_orig" in df_map.columns:
# check if the units in the extensions confirm the units
pass
if "unit" in df_map.columns or "unit_new" in df_map.columns:
pass
# set the new unit column
else:
pass
# set unit to "undef" and raise warning

for df in self.get_DataFrame:
pass

if not ignore_columns:
ignore_columns = []

if unit_column_orig:
ignore_columns.append(unit_column_orig)
for entry in df_map.iterrows():
# need fullmatch here as the same is used in ioutil.convert
corresponding_rows = self.fullmatch(**entry[1].to_dict())
for row in corresponding_rows:
if self.unit.loc[row].unit != entry[1][unit_column_orig]:
raise ValueError(
f"Unit in extension does not match the unit in mapping for row {row}"
)
if unit_column_new:
ignore_columns.append(unit_column_new)


# for df in self.get_DataFrame:
# CONT:
#
# 1) run with one and check output
# 2) check unit aggregation if unit_column_new is not given
# - unit aggregate with str join of unique entries
# 3) make dict and build new extension
# # run convert of all dataframe and build new extension
#

class IOSystem(BaseSystem):
"""Class containing a whole EE MRIO System
Expand Down
16 changes: 14 additions & 2 deletions pymrio/tools/ioutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -1007,7 +1007,11 @@ def check_df_map(df_orig, df_map):
pass


def convert(df_orig, df_map, agg_func="sum", drop_not_bridged_index=True):
def convert(df_orig,
df_map,
agg_func="sum",
drop_not_bridged_index=True,
ignore_columns = None):
"""Convert a DataFrame to a new classification
Parameters
Expand Down Expand Up @@ -1082,6 +1086,10 @@ def convert(df_orig, df_map, agg_func="sum", drop_not_bridged_index=True):
make a bridge column for the ones to be dropped and map all to the same name.
Then drop this index level after the conversion.
ignore_columns : list, optional
List of column names in df_map which should be ignored.
These could be columns with additional information, unit columns, etc.
Extension for extensions:
extension ... extension name
Expand All @@ -1098,6 +1106,9 @@ def convert(df_orig, df_map, agg_func="sum", drop_not_bridged_index=True):
bridge_components = namedtuple("bridge_components", ["new", "orig", "raw"])
bridges = []

if not ignore_columns:
ignore_columns = []

if isinstance(df_orig, pd.Series):
df_orig = pd.DataFrame(df_orig)

Expand Down Expand Up @@ -1125,7 +1136,8 @@ def convert(df_orig, df_map, agg_func="sum", drop_not_bridged_index=True):
stacked_columns = []
orig_column_index = df_orig.columns
for col in df_map.columns:
if col in ["factor", "unit"]:
# TODO: other names for unit should be allowed
if col in (["factor"] + ignore_columns):
continue
if col not in df_orig.index.names:
if col in df_orig.columns.names:
Expand Down
35 changes: 35 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,41 @@ def test_characterize_extension(fix_testmrio):
check_names=False,
)

def test_extension_convert(fix_testmrio):

df_map = pd.DataFrame(
columns=["stressor", "compartment", "total__stressor", "factor", "unit_orig", "unit_new"],
data=[
["emis.*", "air|water", "total_regex", 1000, "kg", "t"],
["emission_type[1|2]", ".*", "total_sum", 1E-3, "kg", "g"],
["emission_type1", ".*", "air_emissions", 1000, "kg", "t"],
],
)

pass

def test_extension_convert_test_unit(fix_testmrio):

df_fail1 = pd.DataFrame(
columns=["stressor", "compartment", "total__stressor", "factor", "unit_orig", "unit_new"],
data=[
["emis.*", "air|water", "total_regex", 1000, "g", "t"],
],
)

df_fail2 = pd.DataFrame(
columns=["stressor", "compartment", "total__stressor", "factor", "unit_emis", "unit_new"],
data=[
["emission_type1", "air", "total_regex", 1000, "t", "t"],
],
)

with pytest.raises(ValueError):
fix_testmrio.testmrio.emissions.convert(df_fail1, extension_name="emissions_new")

with pytest.raises(ValueError):
fix_testmrio.testmrio.emissions.convert(df_fail2, extension_name="emissions_new", unit_column_orig="unit_emis")


def test_reset_to_flows(fix_testmrio):
tt = fix_testmrio.testmrio
Expand Down

0 comments on commit 4b13d37

Please sign in to comment.