From 7fdaf1c2d62245fe7a1c06c59528925446837900 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20H=C3=B6rsch?= Date: Thu, 25 May 2023 18:39:19 +0200 Subject: [PATCH] Make extractlevel work with singlelevel indices (#18) --- CHANGELOG.rst | 4 ++++ src/pandas_indexing/core.py | 19 ++++++++++++++++--- tests/test_core.py | 18 ++++++++++++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e8b4905..7a41cd1 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,6 +3,10 @@ Changelog ========= +v0.2.6 (2023-05-25) +------------------------------------------------------------ +* :func:`~core.extractlevel` can be used on non-multiindex, like + f.ex. ``extractlevel(df, "{sector}|{gas}")`` :pull:`17` * :func:`~selectors.isin` accepts callable filters :pull:`16`, f.ex. ``df.loc[isin(year=lambda s: s>2000)]`` * New function :func:`~core.concat` makes concatenation level aware :pull:`14` diff --git a/src/pandas_indexing/core.py b/src/pandas_indexing/core.py index 364970e..62c61aa 100644 --- a/src/pandas_indexing/core.py +++ b/src/pandas_indexing/core.py @@ -554,11 +554,16 @@ def semijoin( def _extractlevel( - index: Index, drop: bool = False, **templates: str + index: Index, template: Optional[str] = None, drop: bool = False, **templates: str ) -> Tuple[Index, list[str]]: index = ensure_multiindex(index) all_identifiers = set() + if template is not None: + if len(index.names) > 1: + raise ValueError("``template`` may only be non-null for single index level") + templates[index.names[0]] = template + for dim, template in templates.items(): identifiers = re.findall(r"\{([a-zA-Z_]+)\}", template) all_identifiers.update(identifiers) @@ -594,6 +599,8 @@ def _extractlevel( ) def extractlevel( index_or_data: T, + template: Optional[str] = None, + *, drop: bool = False, dropna: bool = True, axis: Axis = 0, @@ -605,6 +612,8 @@ def extractlevel( Parameters ----------\ {index_or_data} + template : str, optional + Extraction template for a single level drop : bool, default False Whether to keep the split dimension dropna : bool, default True @@ -622,12 +631,16 @@ def extractlevel( ------ ValueError If ``dim`` is not a dimension of ``index_or_series`` + ValueError + If ``template`` is given, while index has more than one level """ if isinstance(index_or_data, Index): - index_or_data, identifiers = _extractlevel(index_or_data, drop, **templates) + index_or_data, identifiers = _extractlevel( + index_or_data, template, drop, **templates + ) else: index, identifiers = _extractlevel( - get_axis(index_or_data, axis), drop, **templates + get_axis(index_or_data, axis), template, drop, **templates ) index_or_data = index_or_data.set_axis(index, axis=axis) diff --git a/tests/test_core.py b/tests/test_core.py index 98dbdf6..1e2134e 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -228,6 +228,24 @@ def test_extractlevel_options(mdf): extractlevel(mdf, var="{e}|{typ}") +def test_extractlevel_single(midx): + sidx = Index(["e|foo", "e|bar", "bar"]) + assert_index_equal( + extractlevel(sidx, "{e}|{typ}", drop=True), + MultiIndex.from_arrays([["e", "e"], ["foo", "bar"]], names=["e", "typ"]), + ) + + sidx = Index(["e|foo", "e|bar", "bar"], name="named") + assert_index_equal( + extractlevel(sidx, "{e}|{typ}", drop=True), + MultiIndex.from_arrays([["e", "e"], ["foo", "bar"]], names=["e", "typ"]), + ) + + with pytest.raises(ValueError): + # MultiIndex input with single level template + extractlevel(midx, "{e}|{typ}") + + def test_concat(mdf): assert_frame_equal(concat([mdf.iloc[:1], mdf.iloc[1:]]), mdf)