From d769d80983298a55b8a51412694c00ce01592124 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcos=20V=C3=A1zquez?= Date: Wed, 29 Nov 2023 12:01:35 +0100 Subject: [PATCH] Pandas API Addons: add_prefix and add_sufix --- docs/user-guide/advanced/Pandas_API.ipynb | 122 +++++++++++++++++++++- src/pykx/pandas_api/pandas_indexing.py | 54 +++++++++- tests/test_pandas_api.py | 41 ++++++++ 3 files changed, 215 insertions(+), 2 deletions(-) diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb index cb98590..5262d0f 100644 --- a/docs/user-guide/advanced/Pandas_API.ipynb +++ b/docs/user-guide/advanced/Pandas_API.ipynb @@ -1519,6 +1519,126 @@ "tab.rename(columns={'y': 'symbol', 'z': 'price'}).head()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Table.add_prefix()\n", + "\n", + "```\n", + "Table.add_prefix(columns)\n", + "```\n", + "\n", + "Rename columns adding a prefix in a table and return the resulting Table object.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :-----: | :-------------: | :------------------------------------------------------------------ | :--------: |\n", + "| prefix | str | The string that will be concatenated with the name of the columns | _required_ |\n", + "| axis | int | Axis to add prefix on. | 0 |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :---: | :----------------------------------------------------------------- |\n", + "| Table | A table with the given column(s) renamed adding a prefix. |" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Examples:**\n", + "\n", + "he initial table to which a prefix will be added to its columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tab.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add \"col_\" to table columns:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tab.add_prefix(prefix=\"col_\").head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Table.add_suffix()\n", + "\n", + "```\n", + "Table.add_sufix(suffix, axis)\n", + "```\n", + "\n", + "Rename columns adding a suffix in a table and return the resulting Table object.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :-----: | :-------------: | :------------------------------------------------------------------ | :--------: |\n", + "| suffix | str | The string that will be concatenated with the name of the columns | _required_ |\n", + "| axis | int | Axis to add suffix on. | 0 |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :---: | :----------------------------------------------------------------- |\n", + "| Table | A table with the given column(s) renamed adding a suffix. |" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Examples:**\n", + "\n", + "The initial table to which a suffix will be added to its columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tab.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add \"_col\" to table columns:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tab.add_suffix(prefix=\"_col\").head()" + ] + }, { "cell_type": "markdown", "id": "10582eaa", @@ -2989,7 +3109,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/src/pykx/pandas_api/pandas_indexing.py b/src/pykx/pandas_api/pandas_indexing.py index 954896d..a310efd 100644 --- a/src/pykx/pandas_api/pandas_indexing.py +++ b/src/pykx/pandas_api/pandas_indexing.py @@ -328,6 +328,32 @@ def _rename_columns(tab, labels): tab, labels) # noqa else: return q('{c:cols x; c:@[c;c?key y;y]; c xcol x}', tab, labels) + + +def _pre_suf_fix_columns(tab, fix, suf= True): + if "Keyed" in str(type(tab)): + f = ("c: `$ (string c) ,\: string y;" if suf + else "c: `$(string y) ,/: string c;") + return q("{c:cols value x;" + + f + + "key[x]!c xcol value x}", + tab, fix) # noqa + else: + f = ("c: `$(string c) ,\: string y;" if suf + else "c: `$ (string y) ,/: string c;") + return q('{c:cols x;' + f + 'c xcol x}', tab, fix) + + +def _pre_suf_fix_index(tab, fix, suf= True): + if "Keyed" in str(type(tab)): + f = ("idx: `$(string idx) ,\: string y;" if suf + else " idx: `$(string y) ,/: string idx;" ) + return q("{idx:first flip key x;" + + f + + "([] idx)!value x}", + tab, fix) # noqa + else: + return ValueError('nyi') class PandasIndexing: @@ -453,6 +479,32 @@ def rename(self, labels=None, index=None, columns=None, axis=0, t = _rename_columns(t, columns) return t + + def add_suffix(self, suffix=None, axis=0): + t = self + if suffix: + if axis == 0: + t = _pre_suf_fix_columns(t, suffix, suf=True) + elif axis == 1: + t = _pre_suf_fix_index(t, suffix, suf=True) + else: + raise ValueError(f'No axis named {axis}') + else: + raise ValueError("missing 1 required positional argument: 'suffix'") + return t + + def add_prefix(self, prefix=None, axis=0): + t = self + if prefix: + if axis == 0: + t = _pre_suf_fix_columns(t, prefix, suf=False) + elif axis == 1: + t = _pre_suf_fix_index(t, prefix, suf=False) + else: + raise ValueError(f'No axis named {axis}') + else: + raise ValueError("missing 1 required positional argument: 'prefix'") + return t def sample(self, n=None, frac=None, replace=False, weights=None, random_state=None, axis=None, ignore_index=False): @@ -566,7 +618,7 @@ def __init__(self, tab): def __getitem__(self, loc): if not isinstance(loc, tuple) or len(loc) != 2: raise ValueError('Expected 2 values for call to Table.at[]') - if q('{y in keys x}', self.tab, loc[1]): + if q('{y in keys x(string y)}', self.tab, loc[1]): raise QError('Can\'t get the value of a key in a KeyedTable using at.') return q('{x[y][z]}', self.tab, loc[0], loc[1]) diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py index acfe55f..b5d190f 100644 --- a/tests/test_pandas_api.py +++ b/tests/test_pandas_api.py @@ -1402,6 +1402,47 @@ def test_df_rename(kx, q): t.rename(columns={'x': 'xXx'}, errors='raise') +def test_df_add_prefix(kx, q): + q('sym:`aaa`bbb`ccc') + t = q('([] 10?sym; til 10; 10?10; 10?1f)') + + rez = t.add_prefix("col_") + + assert(q('{x~y}', rez, t.pd().add_prefix("col_"))) + + kt = kx.q('([idx:til 5] til 5; 5?5; 5?1f; (5;5)#100?" ")') + kt_res = kx.q('([idx: `col_0`col_1`col_2`col_3`col_4] til 5)') + + rez = kt.add_prefix("col_") + assert(q('{x~y}', rez, kt.pd().add_prefix("col_"))) + + rez = kt.add_prefix("col_", axis=1) + assert(q('{x~y}', kx.q("{(0!x) `idx}",rez), kx.q("{(0!x) `idx}",kt_res))) + + with pytest.raises(ValueError): + t.add_prefix() + +def test_df_add_suffix(kx, q): + q('sym:`aaa`bbb`ccc') + t = q('([] 10?sym; til 10; 10?10; 10?1f)') + + rez = t.add_suffix("_col") + + assert(q('{x~y}', rez, t.pd().add_suffix("_col"))) + + kt = kx.q('([idx:til 5] til 5; 5?5; 5?1f; (5;5)#100?" ")') + kt_res = kx.q('([idx: `0_col`1_col`2_col`3_col`4_col] til 5)') + + rez = kt.add_suffix("_col", axis=1) + assert(q('{x~y}', kx.q("{(0!x) `idx}",rez), kx.q("{(0!x) `idx}",kt_res))) + + + rez = kt.add_suffix("_col") + assert(q('{x~y}', rez, kt.pd().add_suffix("_col"))) + + with pytest.raises(ValueError): + t.add_suffix() + @pytest.mark.pandas_api @pytest.mark.xfail(reason='Flaky randomization') def test_df_sample(kx, q):