Skip to content

Commit

Permalink
Addition of kurt function
Browse files Browse the repository at this point in the history
  • Loading branch information
Miguel Gómez committed Jan 15, 2024
1 parent d28e93a commit 99bdb1b
Show file tree
Hide file tree
Showing 3 changed files with 349 additions and 0 deletions.
249 changes: 249 additions & 0 deletions docs/user-guide/advanced/Pandas_API.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,255 @@
"tab.mean(axis=1)"
]
},
{
"cell_type": "markdown",
"id": "fe565b65-fbf2-47ba-a26e-791d09fd4f55",
"metadata": {},
"source": [
"### Table.kurt()\n",
"\n",
"```\n",
"Table.kurt(axis=0, skipna=True, numeric_only=False)\n",
"```\n",
"\n",
"Return unbiased kurtosis over requested axis. Kurtosis obtained using Fisher’s definition of kurtosis (kurtosis of normal == 0.0). Normalized by N-1.\n",
"\n",
"\n",
"**Parameters:**\n",
"\n",
"| Name | Type | Description | Default |\n",
"| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n",
"| axis | int | Axis for the function to be applied on. 0 is columns, 1 is rows. | 0 |\n",
"| skipna | bool | not yet implemented | True |\n",
"| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n",
"\n",
"**Returns:**\n",
"\n",
"| Type | Description |\n",
"| :--------: | :--------------------------------------------------------------------------------------- |\n",
"| Dictionary | Map of columns and their yielded kurtosis values |"
]
},
{
"cell_type": "markdown",
"id": "e6069cac-d260-4f80-9688-3d1ec273cd22",
"metadata": {},
"source": [
"**Examples:**\n",
"\n",
"Calculate the kurt across the columns of a table"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "4219c826-a84b-4722-9847-372d3837acdb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>8</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>6</td>\n",
" <td>9</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>10</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"pykx.Table(pykx.q('\n",
"a b c d \n",
"---------\n",
"1 1 7 7 \n",
"2 2 8 11\n",
"2 6 9 14\n",
"4 7 10 14\n",
"'))"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tab = kx.Table(data=\n",
" {\n",
" 'a': [1, 2, 2, 4],\n",
" 'b': [1, 2, 6, 7],\n",
" 'c': [7, 8, 9, 10],\n",
" 'd': [7, 11, 14, 14]\n",
" }\n",
")\n",
"tab"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "437ab485-bf73-4209-b63e-aa0d1bfa5d58",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>a</th>\n",
" <td>2.227147</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <td>-4.890533</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c</th>\n",
" <td>-1.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>d</th>\n",
" <td>-0.04958678</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"pykx.Dictionary(pykx.q('\n",
"a| 2.227147\n",
"b| -4.890533\n",
"c| -1.2\n",
"d| -0.04958678\n",
"'))"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tab.kurt()"
]
},
{
"cell_type": "markdown",
"id": "ea3e1cf6-2304-4061-a846-1cbc0572ea9d",
"metadata": {},
"source": [
"Calculate the kurtosis across the rows of a table"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "63312e8b-76f0-46eb-b4d7-b2213561c86e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-6f</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>-3.901235</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-0.1014759</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>-0.6838056</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"pykx.Dictionary(pykx.q('\n",
"0| -6\n",
"1| -3.901235\n",
"2| -0.1014759\n",
"3| -0.6838056\n",
"'))"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tab.kurt(axis=1)"
]
},
{
"cell_type": "markdown",
"id": "7bf853c5",
Expand Down
27 changes: 27 additions & 0 deletions src/pykx/pandas_api/pandas_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,33 @@ def mean(self, axis: int = 0, numeric_only: bool = False):
tab
)

@api_return
def kurt(self, axis: int = 0, numeric_only: bool = False):
tab = self
if 'Keyed' in str(type(tab)):
tab = q.value(tab)
if numeric_only:
tab = _get_numeric_only_subtable(tab)

key_str = '' if axis == 0 else '`$string '
val_str = '' if axis == 0 else '"f"$value '
query_str = 'cols tab' if axis == 0 else 'til count tab'
where_str = ' where not (::)~/:r[;1]'
kurt_str = ('{res: x - avg x;'
'n: count x;'
'm2: sum res_sq: res xexp 2;'
'm4: sum res_sq xexp 2;'
'adj: 3 * xexp[n - 1;2] % (n - 2) * (n - 3);'
'num: n * (n + 1) * (n - 1) * m4;'
'den: (n - 2) * (n - 3) * m2 xexp 2;'
'(num % den) - adj}')
return q(
'{[tab]'
f'r:{{[tab; x] ({key_str}x; {kurt_str} {val_str}tab[x])}}[tab;] each {query_str};'
f'(,/) {{(enlist x 0)!(enlist x 1)}} each r{where_str}}}',
tab
)

@api_return
def median(self, axis: int = 0, numeric_only: bool = False):
tab = self
Expand Down
73 changes: 73 additions & 0 deletions tests/test_pandas_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1543,6 +1543,79 @@ def test_mean(kx, q):
q_m = tab.mean(axis=1)


def test_kurt(kx, q):
df = pd.DataFrame(
{
'a': [1, 2, 2, 4],
'b': [1, 2, 6, 7],
'c': [7, 8, 9, 10],
'd': [7, 11, 14, 14]
}
)
tab = kx.toq(df)
p_m = df.kurt()
q_m = tab.kurt()
for c in q.key(q_m).py():
assert p_m[c] == q_m[c].py()
p_m = df.kurt(axis=1)
q_m = tab.kurt(axis=1)
for c in range(len(q.cols(tab))):
assert p_m[c] == q_m[q('{`$string x}', c)].py()

q['tab'] = kx.toq(df)
tab = q('1!`idx xcols update idx: til count tab from tab')
p_m = df.kurt()
q_m = tab.kurt()
for c in q.key(q_m).py():
assert p_m[c] == q_m[c].py()
p_m = df.kurt(axis=1)
q_m = tab.kurt(axis=1)
for c in range(len(q.cols(tab)) - 1):
assert p_m[c] == q_m[q('{`$string x}', c)].py()

df = pd.DataFrame(
{
'a': [1, 2, 2, 4],
'b': [1, 2, 6, 7],
'c': [7, 8, 9, 10],
'd': ['foo', 'bar', 'baz', 'qux']
}
)
tab = kx.toq(df)
p_m = df.kurt(numeric_only=True)
q_m = tab.kurt(numeric_only=True)
for c in q.key(q_m).py():
assert p_m[c] == q_m[c].py()
p_m = df.kurt(axis=1, numeric_only=True)
q_m = tab.kurt(axis=1, numeric_only=True)
for c in range(len(q.cols(tab))):
assert np.isnan(p_m[c]) & np.isnan(q_m[q('{`$string x}', c)].py())

df = pd.DataFrame(
{
'a': [1, 2, 2, 4],
'b': [1, 2, 6, 7],
'c': [7, 8, 9, 10],
'd': [11, 12, 13, 14],
'e': ['foo', 'bar', 'baz', 'qux']
}
)
tab = kx.toq(df)
p_m = df.kurt(numeric_only=True)
q_m = tab.kurt(numeric_only=True)
for c in q.key(q_m).py():
assert p_m[c] == q_m[c].py()
p_m = df.kurt(axis=1, numeric_only=True)
q_m = tab.kurt(axis=1, numeric_only=True)
for c in range(len(q.cols(tab)) - 1):
assert p_m[c] == q_m[q('{`$string x}', c)].py()

with pytest.raises(kx.QError):
q_m = tab.kurt()
with pytest.raises(kx.QError):
q_m = tab.kurt(axis=1)


def test_median(kx, q):
df = pd.DataFrame(
{
Expand Down

0 comments on commit 99bdb1b

Please sign in to comment.