diff --git a/pymrio/core/mriosystem.py b/pymrio/core/mriosystem.py index ce9919f4..3a76b063 100644 --- a/pymrio/core/mriosystem.py +++ b/pymrio/core/mriosystem.py @@ -1934,7 +1934,10 @@ def characterize( def convert(self, df_map, extension_name, agg_func="sum", - drop_not_bridged_index=True): + drop_not_bridged_index=True, + unit_column_orig="unit_orig", + unit_column_new="unit_new", + ignore_columns=None): """ Apply the convert function to all dataframes in the extension Parameters @@ -2001,27 +2004,55 @@ def convert(self, df_map, extension_name, make a bridge column for the ones to be dropped and map all to the same name. Then drop this index level after the conversion. + unit_column_orig : str, optional + Name of the column in df_map with the original unit. + This will be used to check if the unit matches the original unit in the extension. + Default is "unit_orig", if None, no check is performed. + unit_column_new : str, optional + Name of the column in df_map with the new unit to be assigned to the new extension. + Default is "unit_new", if None same unit as in df_orig TODO EXPLAIN BETTER, THINK WARNING + + ignore_columns : list, optional + List of column names in df_map which should be ignored. + These could be columns with additional information, etc. + The unit columns given in unit_column_orig and unit_column_new + are ignored by default. + + + TODO: remove after explain Extension for extensions: extension ... extension name unit_orig ... the original unit (optional, for double check with the unit) unit_new ... the new unit to be set for the extension """ - # unit, unit_new, unit_orig - if "unit_orig" in df_map.columns: - # check if the units in the extensions confirm the units - pass - if "unit" in df_map.columns or "unit_new" in df_map.columns: - pass - # set the new unit column - else: - pass - # set unit to "undef" and raise warning - - for df in self.get_DataFrame: - pass - + if not ignore_columns: + ignore_columns = [] + + if unit_column_orig: + ignore_columns.append(unit_column_orig) + for entry in df_map.iterrows(): + # need fullmatch here as the same is used in ioutil.convert + corresponding_rows = self.fullmatch(**entry[1].to_dict()) + for row in corresponding_rows: + if self.unit.loc[row].unit != entry[1][unit_column_orig]: + raise ValueError( + f"Unit in extension does not match the unit in mapping for row {row}" + ) + if unit_column_new: + ignore_columns.append(unit_column_new) + + + # for df in self.get_DataFrame: + # CONT: + # + # 1) run with one and check output + # 2) check unit aggregation if unit_column_new is not given + # - unit aggregate with str join of unique entries + # 3) make dict and build new extension + # # run convert of all dataframe and build new extension + # class IOSystem(BaseSystem): """Class containing a whole EE MRIO System diff --git a/pymrio/tools/ioutil.py b/pymrio/tools/ioutil.py index f0605cc6..4ffe4e17 100644 --- a/pymrio/tools/ioutil.py +++ b/pymrio/tools/ioutil.py @@ -1007,7 +1007,11 @@ def check_df_map(df_orig, df_map): pass -def convert(df_orig, df_map, agg_func="sum", drop_not_bridged_index=True): +def convert(df_orig, + df_map, + agg_func="sum", + drop_not_bridged_index=True, + ignore_columns = None): """Convert a DataFrame to a new classification Parameters @@ -1082,6 +1086,10 @@ def convert(df_orig, df_map, agg_func="sum", drop_not_bridged_index=True): make a bridge column for the ones to be dropped and map all to the same name. Then drop this index level after the conversion. + ignore_columns : list, optional + List of column names in df_map which should be ignored. + These could be columns with additional information, unit columns, etc. + Extension for extensions: extension ... extension name @@ -1098,6 +1106,9 @@ def convert(df_orig, df_map, agg_func="sum", drop_not_bridged_index=True): bridge_components = namedtuple("bridge_components", ["new", "orig", "raw"]) bridges = [] + if not ignore_columns: + ignore_columns = [] + if isinstance(df_orig, pd.Series): df_orig = pd.DataFrame(df_orig) @@ -1125,7 +1136,8 @@ def convert(df_orig, df_map, agg_func="sum", drop_not_bridged_index=True): stacked_columns = [] orig_column_index = df_orig.columns for col in df_map.columns: - if col in ["factor", "unit"]: + # TODO: other names for unit should be allowed + if col in (["factor"] + ignore_columns): continue if col not in df_orig.index.names: if col in df_orig.columns.names: diff --git a/tests/test_core.py b/tests/test_core.py index 91e69677..759e6e04 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -552,6 +552,41 @@ def test_characterize_extension(fix_testmrio): check_names=False, ) +def test_extension_convert(fix_testmrio): + + df_map = pd.DataFrame( + columns=["stressor", "compartment", "total__stressor", "factor", "unit_orig", "unit_new"], + data=[ + ["emis.*", "air|water", "total_regex", 1000, "kg", "t"], + ["emission_type[1|2]", ".*", "total_sum", 1E-3, "kg", "g"], + ["emission_type1", ".*", "air_emissions", 1000, "kg", "t"], + ], + ) + + pass + +def test_extension_convert_test_unit(fix_testmrio): + + df_fail1 = pd.DataFrame( + columns=["stressor", "compartment", "total__stressor", "factor", "unit_orig", "unit_new"], + data=[ + ["emis.*", "air|water", "total_regex", 1000, "g", "t"], + ], + ) + + df_fail2 = pd.DataFrame( + columns=["stressor", "compartment", "total__stressor", "factor", "unit_emis", "unit_new"], + data=[ + ["emission_type1", "air", "total_regex", 1000, "t", "t"], + ], + ) + + with pytest.raises(ValueError): + fix_testmrio.testmrio.emissions.convert(df_fail1, extension_name="emissions_new") + + with pytest.raises(ValueError): + fix_testmrio.testmrio.emissions.convert(df_fail2, extension_name="emissions_new", unit_column_orig="unit_emis") + def test_reset_to_flows(fix_testmrio): tt = fix_testmrio.testmrio