Skip to content

Commit

Permalink
fix empty entries in df_map
Browse files Browse the repository at this point in the history
  • Loading branch information
konstantinstadler committed Aug 30, 2024
1 parent 91d53f5 commit 549afe7
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 1 deletion.
5 changes: 5 additions & 0 deletions pymrio/tools/ioutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -1098,6 +1098,10 @@ def convert(
"""

bridge_columns = [col for col in df_map.columns if "__" in col]

# groupby breaks with NaNs or None, fix it here
df_map.loc[:, bridge_columns] = df_map.loc[:, bridge_columns].fillna("")

unique_new_index = (
df_map.loc[:, bridge_columns].drop_duplicates().set_index(bridge_columns).index
)
Expand All @@ -1111,6 +1115,7 @@ def convert(
if isinstance(df_orig, pd.Series):
df_orig = pd.DataFrame(df_orig)


# some consistency checks of arguments and restructuring if everything is ok
if len(bridge_columns) == 0:
raise ValueError("No columns with '__' in the mapping DataFrame")
Expand Down
53 changes: 52 additions & 1 deletion tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,7 @@ def test_convert_rename_spread_index():
rename_bridge_indexed.index.names = ["flow", "class", "class2"]
pdt.assert_index_equal(renamed_simple.index, rename_bridge_indexed.index)

# TEST WITH REGIONAL SPECS
# TEST WITH COLUMN SPECS

rename_bridge_with_reg_spec = pd.DataFrame(
columns=[
Expand Down Expand Up @@ -534,6 +534,56 @@ def test_convert_rename_spread_index():
99,
)


# TEST WITH EMPTY INDEX


rename_bridge_missing_string = pd.DataFrame(
columns=["stressor", "flow__stressor", "class__stressor", "class2__stressor"],
data=[
["em1", "emission1", "to_air", "to_air (unspecified)"],
["em2", "emission2", "to_air", "to_air (specified)"],
["em3", "emission3", "to_water",],
],
)

rename_bridge_missing_nan = pd.DataFrame(
columns=["stressor", "flow__stressor", "class__stressor", "class2__stressor"],
data=[
["em1", "emission1", "to_air", "to_air (unspecified)"],
["em2", "emission2", "to_air", "to_air (specified)"],
["em3", "emission3", "to_water", np.nan],
],
)

rename_bridge_missing_none = pd.DataFrame(
columns=["stressor", "flow__stressor", "class__stressor", "class2__stressor"],
data=[
["em1", "emission1", "to_air", "to_air (unspecified)"],
["em2", "emission2", "to_air", "to_air (specified)"],
["em3", "emission3", "to_water", None],
],
)


renamed_missing_string = convert(to_char, rename_bridge_missing_string)
renamed_missing_nan = convert(to_char, rename_bridge_missing_nan)
renamed_missing_none = convert(to_char, rename_bridge_missing_none)

renamed_missing_none

pdt.assert_frame_equal(renamed_missing_string, renamed_missing_nan)
pdt.assert_frame_equal(renamed_missing_string, renamed_missing_none)

assert all(renamed_simple.columns == to_char.columns)
rename_bridge_indexed = rename_bridge_simple.set_index(
["flow__stressor", "class__stressor", "class2__stressor"]
)
rename_bridge_indexed.index.names = ["flow", "class", "class2"]
pdt.assert_index_equal(renamed_simple.index, rename_bridge_indexed.index)



# TEST WITH RENAME IN MUTLIINDEX

to_char_multi = pd.DataFrame(
Expand Down Expand Up @@ -856,6 +906,7 @@ def test_convert_characterize():
char5_res.T.groupby(level="region").sum().T, char4_calc_nostack.astype("float")
)

# TODO: test case for multindex characterization on one of teh inner levels - does not work in the GLAM example

def test_convert_wrong_inputs():
to_char = pd.DataFrame(
Expand Down

0 comments on commit 549afe7

Please sign in to comment.