diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb
index 239c4c8..16babc3 100644
--- a/docs/user-guide/advanced/Pandas_API.ipynb
+++ b/docs/user-guide/advanced/Pandas_API.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
- "id": "2d0c8656",
+ "id": "12a1e6cd",
"metadata": {},
"source": [
"# Pandas API\n",
@@ -23,7 +23,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "17f28b87",
+ "id": "2f4437ca",
"metadata": {},
"outputs": [],
"source": [
@@ -38,7 +38,7 @@
},
{
"cell_type": "markdown",
- "id": "774122a0",
+ "id": "13e03cb2",
"metadata": {},
"source": [
"## Constructing Tables"
@@ -46,7 +46,7 @@
},
{
"cell_type": "markdown",
- "id": "0fd8910c",
+ "id": "e76d1fc5",
"metadata": {},
"source": [
"### Table\n",
@@ -75,7 +75,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "9a748c27",
+ "id": "7b5f69f2",
"metadata": {},
"outputs": [],
"source": [
@@ -84,7 +84,7 @@
},
{
"cell_type": "markdown",
- "id": "231a5e28",
+ "id": "673d9e9a",
"metadata": {},
"source": [
"Create a Table from an array like object."
@@ -93,7 +93,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "7e43d716",
+ "id": "c701c1b9",
"metadata": {},
"outputs": [],
"source": [
@@ -102,7 +102,7 @@
},
{
"cell_type": "markdown",
- "id": "1e426cda",
+ "id": "c8eb1622",
"metadata": {},
"source": [
"Create a Table from an array like object and provide names for the columns to use."
@@ -111,7 +111,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "2b3c2edf",
+ "id": "103f866d",
"metadata": {},
"outputs": [],
"source": [
@@ -120,7 +120,7 @@
},
{
"cell_type": "markdown",
- "id": "be094191",
+ "id": "bfd1da8e",
"metadata": {},
"source": [
"### Keyed Table\n",
@@ -150,7 +150,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "d93e73d3",
+ "id": "765be7de",
"metadata": {},
"outputs": [],
"source": [
@@ -159,7 +159,7 @@
},
{
"cell_type": "markdown",
- "id": "119c2e1f",
+ "id": "f8082a5a",
"metadata": {},
"source": [
"Create a keyed table from a list of rows."
@@ -168,7 +168,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "959fcd3d",
+ "id": "ecd10819",
"metadata": {},
"outputs": [],
"source": [
@@ -177,7 +177,7 @@
},
{
"cell_type": "markdown",
- "id": "9d83854e",
+ "id": "7be93c23",
"metadata": {},
"source": [
"Create a keyed table from a list of rows and provide names for the resulting columns."
@@ -186,7 +186,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "4b2c6989",
+ "id": "51d94e4d",
"metadata": {},
"outputs": [],
"source": [
@@ -195,7 +195,7 @@
},
{
"cell_type": "markdown",
- "id": "356b29d8",
+ "id": "8157961c",
"metadata": {},
"source": [
"Create a keyed table with a specified index column."
@@ -204,7 +204,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "acbe339c",
+ "id": "2405a759",
"metadata": {},
"outputs": [],
"source": [
@@ -213,7 +213,7 @@
},
{
"cell_type": "markdown",
- "id": "95a04686",
+ "id": "e9ff8aa6",
"metadata": {},
"source": [
"## Metadata"
@@ -222,7 +222,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "a52fdc82",
+ "id": "fed3a938",
"metadata": {},
"outputs": [],
"source": [
@@ -233,7 +233,7 @@
},
{
"cell_type": "markdown",
- "id": "280baf05",
+ "id": "3e5de382",
"metadata": {},
"source": [
"### Table.columns\n",
@@ -244,7 +244,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "a2ee3fad",
+ "id": "a355d654",
"metadata": {},
"outputs": [],
"source": [
@@ -253,7 +253,7 @@
},
{
"cell_type": "markdown",
- "id": "40da029e",
+ "id": "9baab247",
"metadata": {},
"source": [
"### Table.dtypes\n",
@@ -264,7 +264,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "70bd32d2",
+ "id": "f72c7071",
"metadata": {},
"outputs": [],
"source": [
@@ -273,7 +273,7 @@
},
{
"cell_type": "markdown",
- "id": "00e49e84",
+ "id": "5393cbb5",
"metadata": {},
"source": [
"### Table.empty\n",
@@ -284,7 +284,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "9dc49e08",
+ "id": "d62ce629",
"metadata": {},
"outputs": [],
"source": [
@@ -293,7 +293,7 @@
},
{
"cell_type": "markdown",
- "id": "c00e46ef",
+ "id": "de0a60d6",
"metadata": {},
"source": [
"### Table.ndim\n",
@@ -304,7 +304,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "db113636",
+ "id": "27aa4a92",
"metadata": {},
"outputs": [],
"source": [
@@ -313,7 +313,7 @@
},
{
"cell_type": "markdown",
- "id": "5ea4b315",
+ "id": "d8b6533c",
"metadata": {},
"source": [
"### Table.shape\n",
@@ -324,7 +324,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "78125654",
+ "id": "3bd69cae",
"metadata": {},
"outputs": [],
"source": [
@@ -333,7 +333,7 @@
},
{
"cell_type": "markdown",
- "id": "1e3f85a5",
+ "id": "50f7c03c",
"metadata": {},
"source": [
"### Table.size\n",
@@ -344,7 +344,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "c77c5bc7",
+ "id": "072269ec",
"metadata": {
"scrolled": false
},
@@ -355,7 +355,7 @@
},
{
"cell_type": "markdown",
- "id": "2be2ece3",
+ "id": "394b9b9d",
"metadata": {},
"source": [
"### Table.mean()\n",
@@ -382,7 +382,7 @@
},
{
"cell_type": "markdown",
- "id": "cb8c5ef8",
+ "id": "7d0ae9ce",
"metadata": {},
"source": [
"**Examples:**\n",
@@ -393,7 +393,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "0c3e5d76",
+ "id": "0a3883ab",
"metadata": {},
"outputs": [],
"source": [
@@ -411,7 +411,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "9986a550",
+ "id": "65f9d7ed",
"metadata": {},
"outputs": [],
"source": [
@@ -420,7 +420,7 @@
},
{
"cell_type": "markdown",
- "id": "24ac0b99",
+ "id": "94f312a5",
"metadata": {},
"source": [
"Calculate the mean across the rows of a table"
@@ -429,7 +429,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "41f6f669",
+ "id": "3d96dc12",
"metadata": {},
"outputs": [],
"source": [
@@ -438,7 +438,7 @@
},
{
"cell_type": "markdown",
- "id": "7bf853c5",
+ "id": "d102ec1b",
"metadata": {},
"source": [
"### Table.median()\n",
@@ -465,7 +465,7 @@
},
{
"cell_type": "markdown",
- "id": "98da458a",
+ "id": "e2341a7c",
"metadata": {},
"source": [
"**Examples:**\n",
@@ -476,7 +476,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "bff5ac07",
+ "id": "d9a17a3b",
"metadata": {},
"outputs": [],
"source": [
@@ -494,7 +494,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "579c8b33",
+ "id": "d93621d4",
"metadata": {},
"outputs": [],
"source": [
@@ -503,7 +503,7 @@
},
{
"cell_type": "markdown",
- "id": "f6698350",
+ "id": "b9822d60",
"metadata": {},
"source": [
"Calculate the median across the rows of a table"
@@ -512,7 +512,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "5664bd93",
+ "id": "ab0b5159",
"metadata": {
"scrolled": false
},
@@ -523,7 +523,7 @@
},
{
"cell_type": "markdown",
- "id": "33af56bb",
+ "id": "7041b59d",
"metadata": {},
"source": [
"### Table.mode()\n",
@@ -551,7 +551,7 @@
},
{
"cell_type": "markdown",
- "id": "4201c9af",
+ "id": "100d30fa",
"metadata": {},
"source": [
"**Examples:**\n",
@@ -562,7 +562,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "b4bfe36c",
+ "id": "806786d9",
"metadata": {},
"outputs": [],
"source": [
@@ -580,7 +580,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "e1a7eeb1",
+ "id": "7ccc77a8",
"metadata": {
"scrolled": true
},
@@ -591,7 +591,7 @@
},
{
"cell_type": "markdown",
- "id": "6a47af49",
+ "id": "3bf74453",
"metadata": {},
"source": [
"Calculate the median across the rows of a table"
@@ -600,7 +600,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "130081ce",
+ "id": "d3c86b05",
"metadata": {
"scrolled": false
},
@@ -611,7 +611,7 @@
},
{
"cell_type": "markdown",
- "id": "29dffe0d",
+ "id": "ab19909d",
"metadata": {},
"source": [
"Calculate the mode across columns and keep null values."
@@ -620,7 +620,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "53a8251a",
+ "id": "cf30480e",
"metadata": {
"scrolled": true
},
@@ -639,7 +639,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "f8558148",
+ "id": "f5e48708",
"metadata": {},
"outputs": [],
"source": [
@@ -648,7 +648,103 @@
},
{
"cell_type": "markdown",
- "id": "7e2813b4",
+ "metadata": {},
+ "source": [
+ "### Table.std()\n",
+ "\n",
+ "```\n",
+ "Table.std(axis=0, skipna=True, numeric_only=False, ddof=0)\n",
+ "```\n",
+ "\n",
+ "Return sample standard deviation over requested axis. Normalized by N-1 by default. This can be changed using the ddof argument.\n",
+ "\n",
+ "\n",
+ "**Parameters:**\n",
+ "\n",
+ "| Name | Type | Description | Default |\n",
+ "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n",
+ "| axis | int | The axis to calculate the sum across 0 is columns, 1 is rows. | 0 |\n",
+ "| skipna | bool | not yet implemented | True |\n",
+ "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n",
+ "| ddof | int | Delta Degrees of Freedom. The divisor used in calculations is N - ddof, where N represents the number of elements. | 1 |\n",
+ "\n",
+ "**Returns:**\n",
+ "\n",
+ "| Type | Description |\n",
+ "| :----------------: | :------------------------------------------------------------------- |\n",
+ "| Dictionary | The std across each row / column with the key corresponding to the row number or column name. |"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Examples:**\n",
+ "\n",
+ "Calculate the std across the columns of a table"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab = kx.Table(data=\n",
+ " {\n",
+ " 'a': [1, 2, 2, 4],\n",
+ " 'b': [1, 2, 6, 7],\n",
+ " 'c': [7, 8, 9, 10],\n",
+ " 'd': [7, 11, 14, 14]\n",
+ " }\n",
+ ")\n",
+ "tab"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab.std()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Calculate the std across the rows of a table"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab.std(axis=2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Calculate std accross columns with ddof=0:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab.std(ddof=0)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "24cf11d3",
"metadata": {},
"source": [
"## Indexing"
@@ -657,7 +753,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "77ab64ab",
+ "id": "6fb377dc",
"metadata": {},
"outputs": [],
"source": [
@@ -669,7 +765,7 @@
},
{
"cell_type": "markdown",
- "id": "69313988",
+ "id": "c1c04832",
"metadata": {},
"source": [
"### Table.head()\n",
@@ -695,7 +791,7 @@
},
{
"cell_type": "markdown",
- "id": "edf33458",
+ "id": "3a1376fd",
"metadata": {},
"source": [
"**Examples:**\n",
@@ -706,7 +802,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "916fcf4d",
+ "id": "f0bf8f86",
"metadata": {
"scrolled": false
},
@@ -717,7 +813,7 @@
},
{
"cell_type": "markdown",
- "id": "cb58279a",
+ "id": "93e184ff",
"metadata": {},
"source": [
"Return the first 10 rows of the table."
@@ -726,7 +822,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "bf32db40",
+ "id": "9e3e5b67",
"metadata": {},
"outputs": [],
"source": [
@@ -735,7 +831,7 @@
},
{
"cell_type": "markdown",
- "id": "a5c4a5e9",
+ "id": "76e7a8fe",
"metadata": {},
"source": [
"### Table.tail()\n",
@@ -761,7 +857,7 @@
},
{
"cell_type": "markdown",
- "id": "4e3fee46",
+ "id": "bc99337e",
"metadata": {},
"source": [
"**Examples:**\n",
@@ -772,7 +868,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "a0d34e0b",
+ "id": "3b261b98",
"metadata": {},
"outputs": [],
"source": [
@@ -781,7 +877,7 @@
},
{
"cell_type": "markdown",
- "id": "e223e705",
+ "id": "9871118a",
"metadata": {},
"source": [
"Return the last 10 rows of the table."
@@ -790,7 +886,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "4edae0c3",
+ "id": "dd3970b3",
"metadata": {},
"outputs": [],
"source": [
@@ -799,7 +895,7 @@
},
{
"cell_type": "markdown",
- "id": "c87325f8",
+ "id": "507b8049",
"metadata": {},
"source": [
"### Table.get()\n",
@@ -826,7 +922,7 @@
},
{
"cell_type": "markdown",
- "id": "7c96cd34",
+ "id": "ec0f77c7",
"metadata": {},
"source": [
"**Examples:**\n",
@@ -837,7 +933,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "7f64d914",
+ "id": "5a5a6d38",
"metadata": {
"scrolled": true
},
@@ -848,7 +944,7 @@
},
{
"cell_type": "markdown",
- "id": "88ee5698",
+ "id": "528ef898",
"metadata": {},
"source": [
"Get the `y` and `z` columns from the table."
@@ -857,7 +953,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "daef6ce6",
+ "id": "50dc3d41",
"metadata": {
"scrolled": true
},
@@ -868,7 +964,7 @@
},
{
"cell_type": "markdown",
- "id": "26a53f6d",
+ "id": "5671306b",
"metadata": {},
"source": [
"Attempt to get the `q` column from the table and recieve none as that column does not exist."
@@ -877,7 +973,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "3856084d",
+ "id": "f4e793d7",
"metadata": {},
"outputs": [],
"source": [
@@ -886,7 +982,7 @@
},
{
"cell_type": "markdown",
- "id": "91932d32",
+ "id": "4ae7804e",
"metadata": {},
"source": [
"Attempt to get the `q` column from the table and recieve the default value `not found` as that column does not exist."
@@ -895,7 +991,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "7d2a2bcf",
+ "id": "4fb7dafd",
"metadata": {},
"outputs": [],
"source": [
@@ -904,7 +1000,7 @@
},
{
"cell_type": "markdown",
- "id": "9e831e14",
+ "id": "b9bffb97",
"metadata": {},
"source": [
"### Table.at[]\n",
@@ -922,7 +1018,7 @@
},
{
"cell_type": "markdown",
- "id": "97519657",
+ "id": "631f538b",
"metadata": {},
"source": [
"**Examples:**\n",
@@ -933,7 +1029,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "9cd275bf",
+ "id": "e88eb0b9",
"metadata": {},
"outputs": [],
"source": [
@@ -942,7 +1038,7 @@
},
{
"cell_type": "markdown",
- "id": "1fd39083",
+ "id": "3b2a6f91",
"metadata": {},
"source": [
"Reassign the value of the `z` column in the 997th row to `3.14159`."
@@ -951,7 +1047,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "814fa8e0",
+ "id": "514ceeb0",
"metadata": {},
"outputs": [],
"source": [
@@ -961,7 +1057,7 @@
},
{
"cell_type": "markdown",
- "id": "7815e8c3",
+ "id": "9d807946",
"metadata": {},
"source": [
"### Table.loc[]\n",
@@ -997,7 +1093,7 @@
},
{
"cell_type": "markdown",
- "id": "5ee06186",
+ "id": "fc696884",
"metadata": {},
"source": [
"**Examples:**\n",
@@ -1008,7 +1104,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "12fc6807",
+ "id": "076e08a4",
"metadata": {
"scrolled": true
},
@@ -1019,7 +1115,7 @@
},
{
"cell_type": "markdown",
- "id": "97206dd7",
+ "id": "dd6c4a2f",
"metadata": {},
"source": [
"Get all rows of the table where the value in the `z` column is greater than `250.0`"
@@ -1028,7 +1124,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "a6c9add0",
+ "id": "eec5938e",
"metadata": {},
"outputs": [],
"source": [
@@ -1037,7 +1133,7 @@
},
{
"cell_type": "markdown",
- "id": "a32aca6b",
+ "id": "8ce7195e",
"metadata": {},
"source": [
"Replace all null values in the column `v` with the value `-100`."
@@ -1046,7 +1142,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "c1ad3a23",
+ "id": "4fec8625",
"metadata": {
"scrolled": true
},
@@ -1058,7 +1154,7 @@
},
{
"cell_type": "markdown",
- "id": "447b9fd2",
+ "id": "81343ea4",
"metadata": {},
"source": [
"Replace all locations in column `v` where the value is `-100` with a null."
@@ -1067,7 +1163,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "31ea02c9",
+ "id": "d49ba7ff",
"metadata": {},
"outputs": [],
"source": [
@@ -1077,7 +1173,7 @@
},
{
"cell_type": "markdown",
- "id": "ac4c5e4b",
+ "id": "dirty-deviation",
"metadata": {},
"source": [
"Usage of the `loc` functionality under the hood additionally allows users to set columns within a table for single or multiple columns. Data passed for this can be q/Python."
@@ -1086,7 +1182,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "f378ba4a",
+ "id": "economic-administration",
"metadata": {},
"outputs": [],
"source": [
@@ -1096,7 +1192,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "0f2936b9",
+ "id": "parliamentary-simon",
"metadata": {},
"outputs": [],
"source": [
@@ -1105,7 +1201,7 @@
},
{
"cell_type": "markdown",
- "id": "a3368987",
+ "id": "8aeb5b10",
"metadata": {},
"source": [
"### Table.iloc[]\n",
@@ -1135,7 +1231,7 @@
},
{
"cell_type": "markdown",
- "id": "0ef4d8cf",
+ "id": "a6e24ecf",
"metadata": {},
"source": [
"**Examples:**\n",
@@ -1146,7 +1242,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "683ab48b",
+ "id": "a3460c85",
"metadata": {
"scrolled": true
},
@@ -1157,7 +1253,7 @@
},
{
"cell_type": "markdown",
- "id": "e71bebdb",
+ "id": "2bdb5d71",
"metadata": {},
"source": [
"Get the first 5 rows from a table."
@@ -1166,7 +1262,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "a13730fd",
+ "id": "4ef3767c",
"metadata": {
"scrolled": false
},
@@ -1177,7 +1273,7 @@
},
{
"cell_type": "markdown",
- "id": "60f892e0",
+ "id": "f869425e",
"metadata": {},
"source": [
"Get all rows of the table where the `y` column is equal to `AAPL`."
@@ -1186,7 +1282,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "d7afdf65",
+ "id": "bd3d1613",
"metadata": {
"scrolled": true
},
@@ -1197,7 +1293,7 @@
},
{
"cell_type": "markdown",
- "id": "8b3b9279",
+ "id": "bcc638af",
"metadata": {},
"source": [
"Get all rows of the table where the `y` column is equal to `AAPL`, and only return the `y`, `z` and `w` columns."
@@ -1206,7 +1302,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "a0d9f08d",
+ "id": "19491b1a",
"metadata": {},
"outputs": [],
"source": [
@@ -1215,7 +1311,7 @@
},
{
"cell_type": "markdown",
- "id": "045bc156",
+ "id": "7a7bcdd8",
"metadata": {},
"source": [
"Replace all null values in the column `v` with the value `-100`."
@@ -1224,7 +1320,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "7e21c163",
+ "id": "8dbd832b",
"metadata": {},
"outputs": [],
"source": [
@@ -1234,7 +1330,7 @@
},
{
"cell_type": "markdown",
- "id": "76021266",
+ "id": "37ad1ee6",
"metadata": {},
"source": [
"### Table.pop()\n",
@@ -1260,7 +1356,7 @@
},
{
"cell_type": "markdown",
- "id": "e5fdfbd3",
+ "id": "a7e8dc98",
"metadata": {},
"source": [
"**Examples:**\n",
@@ -1271,7 +1367,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "7a960191",
+ "id": "cc748ad8",
"metadata": {
"scrolled": true
},
@@ -1286,7 +1382,7 @@
},
{
"cell_type": "markdown",
- "id": "35062560",
+ "id": "231ebfbb",
"metadata": {},
"source": [
"Remove the `z` and `w` columns from the table and return them."
@@ -1295,7 +1391,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "a46189b2",
+ "id": "2aea8b3e",
"metadata": {
"scrolled": false
},
@@ -1310,7 +1406,7 @@
},
{
"cell_type": "markdown",
- "id": "f71b6917",
+ "id": "56ce1b1d",
"metadata": {},
"source": [
"## Reindexing"
@@ -1319,7 +1415,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "a2b1a198",
+ "id": "a4c1d74b",
"metadata": {
"scrolled": true
},
@@ -1333,7 +1429,7 @@
},
{
"cell_type": "markdown",
- "id": "f5a7ac0e",
+ "id": "e47a4340",
"metadata": {},
"source": [
"### Table.drop()\n",
@@ -1360,7 +1456,7 @@
},
{
"cell_type": "markdown",
- "id": "008a2e74",
+ "id": "a0417a0f",
"metadata": {},
"source": [
"**Examples:**\n",
@@ -1371,7 +1467,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "0f74d3f2",
+ "id": "2932fb5f",
"metadata": {},
"outputs": [],
"source": [
@@ -1380,7 +1476,7 @@
},
{
"cell_type": "markdown",
- "id": "cb4e82aa",
+ "id": "5368f9f1",
"metadata": {},
"source": [
"Drop columns from a table."
@@ -1389,7 +1485,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "57ad6a64",
+ "id": "02c1221f",
"metadata": {},
"outputs": [],
"source": [
@@ -1398,7 +1494,7 @@
},
{
"cell_type": "markdown",
- "id": "90db87b0",
+ "id": "a88ea856",
"metadata": {},
"source": [
"### Table.drop_duplicates()\n",
@@ -1418,7 +1514,7 @@
},
{
"cell_type": "markdown",
- "id": "3af33f03",
+ "id": "90493dae",
"metadata": {},
"source": [
"**Examples:**\n",
@@ -1429,7 +1525,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "af182307",
+ "id": "baccc6bd",
"metadata": {},
"outputs": [],
"source": [
@@ -1439,7 +1535,7 @@
},
{
"cell_type": "markdown",
- "id": "48143d51",
+ "id": "cd94c2b6",
"metadata": {},
"source": [
"Drop all duplicate rows from the table."
@@ -1448,7 +1544,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "eeff16e7",
+ "id": "c6dfca99",
"metadata": {},
"outputs": [],
"source": [
@@ -1457,7 +1553,7 @@
},
{
"cell_type": "markdown",
- "id": "6d71c8c0",
+ "id": "ece21d55",
"metadata": {},
"source": [
"### Table.rename()\n",
@@ -1483,7 +1579,7 @@
},
{
"cell_type": "markdown",
- "id": "73260da1",
+ "id": "d49a17ce",
"metadata": {},
"source": [
"**Examples:**\n",
@@ -1494,7 +1590,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "3cc68fa6",
+ "id": "585ea2e9",
"metadata": {},
"outputs": [],
"source": [
@@ -1503,7 +1599,7 @@
},
{
"cell_type": "markdown",
- "id": "eef94948",
+ "id": "b88b46fd",
"metadata": {},
"source": [
"Rename column `y` to `symbol` and `z` to `price`."
@@ -1512,7 +1608,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "d5e76248",
+ "id": "ed9c511f",
"metadata": {},
"outputs": [],
"source": [
@@ -1521,7 +1617,137 @@
},
{
"cell_type": "markdown",
- "id": "05124590",
+ "metadata": {},
+ "source": [
+ "### Table.add_prefix()\n",
+ "\n",
+ "```\n",
+ "Table.add_prefix(columns)\n",
+ "```\n",
+ "\n",
+ "Rename columns adding a prefix in a table and return the resulting Table object.\n",
+ "\n",
+ "**Parameters:**\n",
+ "\n",
+ "| Name | Type | Description | Default |\n",
+ "| :-----: | :-------------: | :------------------------------------------------------------------ | :--------: |\n",
+ "| prefix | str | The string that will be concatenated with the name of the columns | _required_ |\n",
+ "| axis | int | Axis to add prefix on. | 0 |\n",
+ "\n",
+ "**Returns:**\n",
+ "\n",
+ "| Type | Description |\n",
+ "| :---: | :----------------------------------------------------------------- |\n",
+ "| Table | A table with the given column(s) renamed adding a prefix. |"
+ ],
+ "id": "f8ea67e44e518022"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Examples:**\n",
+ "\n",
+ "he initial table to which a prefix will be added to its columns"
+ ],
+ "id": "96a58dc47e716cb7"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab.head()"
+ ],
+ "id": "e0724f3baa9ea5b5"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Add \"col_\" to table columns:"
+ ],
+ "id": "4041dabadcec3425"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab.add_prefix(prefix=\"col_\").head()"
+ ],
+ "id": "185520cd5ecc7034"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Table.add_suffix()\n",
+ "\n",
+ "```\n",
+ "Table.add_sufix(suffix, axis)\n",
+ "```\n",
+ "\n",
+ "Rename columns adding a suffix in a table and return the resulting Table object.\n",
+ "\n",
+ "**Parameters:**\n",
+ "\n",
+ "| Name | Type | Description | Default |\n",
+ "| :-----: | :-------------: | :------------------------------------------------------------------ | :--------: |\n",
+ "| suffix | str | The string that will be concatenated with the name of the columns | _required_ |\n",
+ "| axis | int | Axis to add suffix on. | 0 |\n",
+ "\n",
+ "**Returns:**\n",
+ "\n",
+ "| Type | Description |\n",
+ "| :---: | :----------------------------------------------------------------- |\n",
+ "| Table | A table with the given column(s) renamed adding a suffix. |"
+ ],
+ "id": "97c63cf5215a9e81"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Examples:**\n",
+ "\n",
+ "The initial table to which a suffix will be added to its columns"
+ ],
+ "id": "cbf132712b7cec72"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab.head()"
+ ],
+ "id": "7b27e8f331b01a7"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Add \"_col\" to table columns:"
+ ],
+ "id": "315996d38b7d91d3"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab.add_suffix(prefix=\"_col\").head()"
+ ],
+ "id": "254a6d08b139a110"
+ },
+ {
+ "cell_type": "markdown",
+ "id": "10582eaa",
"metadata": {},
"source": [
"### Table.sample()\n",
@@ -1553,7 +1779,7 @@
},
{
"cell_type": "markdown",
- "id": "e8f78917",
+ "id": "0271484d",
"metadata": {},
"source": [
"**Examples:**\n",
@@ -1564,7 +1790,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "d88ab348",
+ "id": "187059eb",
"metadata": {},
"outputs": [],
"source": [
@@ -1573,7 +1799,7 @@
},
{
"cell_type": "markdown",
- "id": "78e03554",
+ "id": "d5d52b8b",
"metadata": {},
"source": [
"Sample 10% of the rows."
@@ -1582,7 +1808,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "8585d62e",
+ "id": "f6aa2289",
"metadata": {},
"outputs": [],
"source": [
@@ -1591,7 +1817,7 @@
},
{
"cell_type": "markdown",
- "id": "c77712d3",
+ "id": "a9d80fe9",
"metadata": {},
"source": [
"Sample 10% of the rows and allow the same row to be sampled twice."
@@ -1600,7 +1826,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "b138f770",
+ "id": "17096534",
"metadata": {},
"outputs": [],
"source": [
@@ -1609,7 +1835,7 @@
},
{
"cell_type": "markdown",
- "id": "6f6f5672",
+ "id": "32794d29",
"metadata": {},
"source": [
"### Table.select_dtypes()\n",
@@ -1638,7 +1864,7 @@
},
{
"cell_type": "markdown",
- "id": "6a703c57",
+ "id": "a94cc6e5",
"metadata": {},
"source": [
"**Examples:**\n",
@@ -1649,7 +1875,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "5e9734f7",
+ "id": "8fcaa4fb",
"metadata": {},
"outputs": [],
"source": [
@@ -1658,7 +1884,7 @@
},
{
"cell_type": "markdown",
- "id": "42d9ffa6",
+ "id": "3dc6ef75",
"metadata": {},
"source": [
"Exclude columns contatining symbols"
@@ -1667,7 +1893,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "3d934cf0",
+ "id": "7d1a3e61",
"metadata": {},
"outputs": [],
"source": [
@@ -1676,7 +1902,7 @@
},
{
"cell_type": "markdown",
- "id": "e4302f7d",
+ "id": "7009cb76",
"metadata": {},
"source": [
"Include a list of column types"
@@ -1685,7 +1911,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "f698f5f0",
+ "id": "b71e87fa",
"metadata": {},
"outputs": [],
"source": [
@@ -1694,7 +1920,7 @@
},
{
"cell_type": "markdown",
- "id": "5590d1ca",
+ "id": "54417754",
"metadata": {},
"source": [
"### Table.astype()\n",
@@ -1723,7 +1949,7 @@
},
{
"cell_type": "markdown",
- "id": "f9ca98d2",
+ "id": "20546f87",
"metadata": {},
"source": [
"**Examples:**\n",
@@ -1734,7 +1960,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "831836c8",
+ "id": "33e3cf56",
"metadata": {},
"outputs": [],
"source": [
@@ -1743,7 +1969,7 @@
},
{
"cell_type": "markdown",
- "id": "0bf0d78f",
+ "id": "16b2ee25",
"metadata": {},
"source": [
"Cast all columns to dtype LongVector"
@@ -1752,7 +1978,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "6833400a",
+ "id": "ac977798",
"metadata": {},
"outputs": [],
"source": [
@@ -1761,7 +1987,7 @@
},
{
"cell_type": "markdown",
- "id": "7a2bfcd3",
+ "id": "51850e87",
"metadata": {},
"source": [
"Casting as specified in the dcitionary supplied with given dtype per column"
@@ -1770,7 +1996,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "872db9aa",
+ "id": "d8c2f1f9",
"metadata": {},
"outputs": [],
"source": [
@@ -1779,7 +2005,7 @@
},
{
"cell_type": "markdown",
- "id": "ef3b4225",
+ "id": "ada1bfd4",
"metadata": {},
"source": [
"The next example will use this table"
@@ -1788,7 +2014,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "6a20abdd",
+ "id": "ab8261b8",
"metadata": {},
"outputs": [],
"source": [
@@ -1797,7 +2023,7 @@
},
{
"cell_type": "markdown",
- "id": "908fa4ea",
+ "id": "a2972dc0",
"metadata": {},
"source": [
"Casting char and string columns to symbol columns"
@@ -1806,7 +2032,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "5ea7fe9e",
+ "id": "c2c019bc",
"metadata": {},
"outputs": [],
"source": [
@@ -1815,7 +2041,7 @@
},
{
"cell_type": "markdown",
- "id": "718584f8",
+ "id": "e0b57863",
"metadata": {},
"source": [
"## Merging"
@@ -1823,7 +2049,7 @@
},
{
"cell_type": "markdown",
- "id": "ef401426",
+ "id": "8b1c0dc5",
"metadata": {},
"source": [
"### Table.merge()\n",
@@ -1875,7 +2101,7 @@
},
{
"cell_type": "markdown",
- "id": "9e613e3c",
+ "id": "9542857c",
"metadata": {},
"source": [
"**Examples:**\n",
@@ -1886,7 +2112,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "a3b0ec9f",
+ "id": "847941a9",
"metadata": {
"scrolled": true
},
@@ -1899,7 +2125,7 @@
},
{
"cell_type": "markdown",
- "id": "6e32596c",
+ "id": "4b6793c9",
"metadata": {},
"source": [
"Merge tab1 and tab2 on the lkey and rkey columns using a native q inner join. The value columns have the default suffixes, \\_x and \\_y, appended."
@@ -1908,7 +2134,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "8ea253c9",
+ "id": "023b555b",
"metadata": {},
"outputs": [],
"source": [
@@ -1917,7 +2143,7 @@
},
{
"cell_type": "markdown",
- "id": "2d9240b3",
+ "id": "9449f90b",
"metadata": {},
"source": [
"Merge tab1 and tab2 with specified left and right suffixes appended to any overlapping columns."
@@ -1926,7 +2152,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "64425a1d",
+ "id": "87ad643d",
"metadata": {},
"outputs": [],
"source": [
@@ -1935,7 +2161,7 @@
},
{
"cell_type": "markdown",
- "id": "e749c7e0",
+ "id": "49deadfd",
"metadata": {},
"source": [
"Merge tab1 and tab2 but raise an exception if the Tables have any overlapping columns."
@@ -1944,7 +2170,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "a938230d",
+ "id": "a3d45ec4",
"metadata": {
"scrolled": true
},
@@ -1959,7 +2185,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "b1d99a31",
+ "id": "ee3ef9a8",
"metadata": {},
"outputs": [],
"source": [
@@ -1969,7 +2195,7 @@
},
{
"cell_type": "markdown",
- "id": "385c0465",
+ "id": "b62f2e3b",
"metadata": {},
"source": [
"Merge tab1 and tab2 on the `a` column using an inner join."
@@ -1978,7 +2204,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "7431a148",
+ "id": "8f17070f",
"metadata": {
"scrolled": true
},
@@ -1989,7 +2215,7 @@
},
{
"cell_type": "markdown",
- "id": "230a7666",
+ "id": "2aaf6b4b",
"metadata": {},
"source": [
"Merge tab1 and tab2 on the `a` column using a left join."
@@ -1998,7 +2224,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "04b96b08",
+ "id": "64cfb314",
"metadata": {},
"outputs": [],
"source": [
@@ -2007,7 +2233,7 @@
},
{
"cell_type": "markdown",
- "id": "d991656c",
+ "id": "374c2905",
"metadata": {},
"source": [
"Merge tab1 and tab2 using a cross join."
@@ -2016,7 +2242,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "09886503",
+ "id": "51f94109",
"metadata": {
"scrolled": true
},
@@ -2029,7 +2255,7 @@
},
{
"cell_type": "markdown",
- "id": "b2f4aff1",
+ "id": "d4293b88",
"metadata": {},
"source": [
"### Table.merge_asof()\n",
@@ -2086,7 +2312,7 @@
},
{
"cell_type": "markdown",
- "id": "fc696ccf",
+ "id": "bf6bc139",
"metadata": {},
"source": [
"**Examples:**\n",
@@ -2097,7 +2323,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "6cb634e0",
+ "id": "338f553c",
"metadata": {},
"outputs": [],
"source": [
@@ -2109,7 +2335,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "81b10932",
+ "id": "2d7b0d9b",
"metadata": {},
"outputs": [],
"source": [
@@ -2119,7 +2345,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "411d19d2",
+ "id": "5949e8b3",
"metadata": {},
"outputs": [],
"source": [
@@ -2128,7 +2354,7 @@
},
{
"cell_type": "markdown",
- "id": "324d24ec",
+ "id": "6976cfc0",
"metadata": {},
"source": [
"Perform a asof join on two tables but first merge them on the by column."
@@ -2137,7 +2363,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "d805fa5c",
+ "id": "7e9c7ee0",
"metadata": {},
"outputs": [],
"source": [
@@ -2183,7 +2409,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "665d0e74",
+ "id": "444c426a",
"metadata": {},
"outputs": [],
"source": [
@@ -2193,7 +2419,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "9398ab6a",
+ "id": "d2e16d4a",
"metadata": {},
"outputs": [],
"source": [
@@ -2202,7 +2428,7 @@
},
{
"cell_type": "markdown",
- "id": "acca5289",
+ "id": "ca5c9a5f",
"metadata": {},
"source": [
"## Computations"
@@ -2211,7 +2437,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "852b5f34",
+ "id": "674cb468",
"metadata": {},
"outputs": [],
"source": [
@@ -2223,7 +2449,7 @@
},
{
"cell_type": "markdown",
- "id": "93a50ee2",
+ "id": "92136283",
"metadata": {},
"source": [
"### Table.abs()\n",
@@ -2250,7 +2476,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "7da5d72b",
+ "id": "7b08d857",
"metadata": {
"scrolled": true
},
@@ -2261,7 +2487,72 @@
},
{
"cell_type": "markdown",
- "id": "cbcdf84e",
+ "id": "499cac52",
+ "metadata": {},
+ "source": [
+ "### Table.round()\n",
+ "\n",
+ "```\n",
+ "Table.round(self, decimals: Union[int, Dict[str, int]] = 0)\n",
+ "```\n",
+ "\n",
+ "Round a Table to a variable number of decimal places.\n",
+ "\n",
+ "\n",
+ "**Parameters:**\n",
+ "\n",
+ "| Name | Type | Description | Default |\n",
+ "| :--------------: | :-----------------: | :------------------------------------------------------------ | :-----: |\n",
+ "| decimals | int or Dict or list | Number of decimal places to round each column to. If an int is given, round each column to the same number of places. Otherwise dict and list round to variable numbers of places. Column names should be in the keys if decimals is a dict-like, or in the index if decimals is a list. Any columns not included in decimals will be left as is. Elements of decimals which are not columns of the input will be ignored.| 0 |\n",
+ "\n",
+ "**Note: functionality for list nyi**\n",
+ "\n",
+ "**Returns:**\n",
+ "\n",
+ "| Type | Description |\n",
+ "| :--------: | :--------------------------------------------------------------------------------------- |\n",
+ "| Table | A Table with the affected columns rounded to the specified number of decimal places. |\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1b629def",
+ "metadata": {},
+ "source": [
+ "If an integer is provided it rounds every float column to set decimals."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "08c182c9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab.round(1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "28853fc0",
+ "metadata": {},
+ "source": [
+ "If a dict whose keys are the column names and its values are the decimals to round set column is provided, it will round them accordingly.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7640df4c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab.round({\"price\": 1, \"traded\": 0})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ad57d9cf",
"metadata": {},
"source": [
"### Table.all()\n",
@@ -2290,7 +2581,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "7d0b0cd3",
+ "id": "b37d2e15",
"metadata": {},
"outputs": [],
"source": [
@@ -2299,7 +2590,7 @@
},
{
"cell_type": "markdown",
- "id": "aa02cf1c",
+ "id": "fd14012f",
"metadata": {},
"source": [
"### Table.any()\n",
@@ -2328,7 +2619,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "a4806993",
+ "id": "581cf133",
"metadata": {},
"outputs": [],
"source": [
@@ -2337,7 +2628,7 @@
},
{
"cell_type": "markdown",
- "id": "a3c3fccd",
+ "id": "c42f7ec0",
"metadata": {},
"source": [
"### Table.max()\n",
@@ -2366,7 +2657,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "8e9abf02",
+ "id": "dd8a84e1",
"metadata": {},
"outputs": [],
"source": [
@@ -2375,7 +2666,7 @@
},
{
"cell_type": "markdown",
- "id": "301ab2c2",
+ "id": "fb28288f",
"metadata": {},
"source": [
"### Table.min()\n",
@@ -2404,7 +2695,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "c1255ac7",
+ "id": "0fe40c65",
"metadata": {},
"outputs": [],
"source": [
@@ -2413,7 +2704,7 @@
},
{
"cell_type": "markdown",
- "id": "a389f7aa",
+ "id": "af783468",
"metadata": {},
"source": [
"### Table.sum()\n",
@@ -2443,7 +2734,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "af638f53",
+ "id": "32519605",
"metadata": {},
"outputs": [],
"source": [
@@ -2452,7 +2743,44 @@
},
{
"cell_type": "markdown",
- "id": "9bf62b1a",
+ "metadata": {},
+ "source": [
+ "### Table.count()\n",
+ "\n",
+ "```\n",
+ "Table.count(axis=0, numeric_only=False)\n",
+ "```\n",
+ "\n",
+ "Returns the ount non-NA values across the given axis.\n",
+ "\n",
+ "**Parameters:**\n",
+ "\n",
+ "| Name | Type | Description | Default |\n",
+ "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n",
+ "| axis | int | The axis to calculate the product across 0 is columns, 1 is rows. | 0 |\n",
+ "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n",
+ "\n",
+ "**Returns:**\n",
+ "\n",
+ "| Type | Description |\n",
+ "| :----------------: | :------------------------------------------------------------------- |\n",
+ "| Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling `count` on that column / row. |"
+ ],
+ "id": "c60a0676f33f2d7d"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab.count()"
+ ],
+ "id": "77a06a8fad19e21c"
+ },
+ {
+ "cell_type": "markdown",
+ "id": "621766f6",
"metadata": {},
"source": [
"### Table.prod()\n",
@@ -2482,7 +2810,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "0ddad367",
+ "id": "97c7c26b",
"metadata": {
"scrolled": true
},
@@ -2500,7 +2828,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "151411e2",
+ "id": "9222c8ba",
"metadata": {},
"outputs": [],
"source": [
@@ -2509,7 +2837,44 @@
},
{
"cell_type": "markdown",
- "id": "499025cb",
+ "metadata": {},
+ "source": [
+ "### Table.skew()\n",
+ "\n",
+ "```\n",
+ "Table.skew(axis=0, skipna=True, numeric_only=False)\n",
+ "```\n",
+ "\n",
+ "Returns the skewness of all values across the given axis.\n",
+ "\n",
+ "**Parameters:**\n",
+ "\n",
+ "| Name | Type | Description | Default |\n",
+ "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n",
+ "| axis | int | The axis to calculate the product across 0 is columns, 1 is rows. | 0 |\n",
+ "| skipna | bool | Ignore any null values along the axis. | True |\n",
+ "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n",
+ "\n",
+ "\n",
+ "**Returns:**\n",
+ "\n",
+ "| Type | Description |\n",
+ "| :----------------: | :------------------------------------------------------------------- |\n",
+ "| Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling `skew` on that column / row. |"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab.skew(numeric_only=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "655c3ad2",
"metadata": {},
"source": [
"## Setting Indexes"
@@ -2517,7 +2882,7 @@
},
{
"cell_type": "markdown",
- "id": "4dc576e8",
+ "id": "6ad74ce0",
"metadata": {},
"source": [
"### Table.set_index()\n",
@@ -2558,7 +2923,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "42a288f7",
+ "id": "bdd21889",
"metadata": {},
"outputs": [],
"source": [
@@ -2569,7 +2934,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "f744959e",
+ "id": "cf782f61",
"metadata": {},
"outputs": [],
"source": [
@@ -2580,7 +2945,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "00c31275",
+ "id": "0c54f760",
"metadata": {},
"outputs": [],
"source": [
@@ -2591,7 +2956,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "858bbeb2",
+ "id": "825afd87",
"metadata": {},
"outputs": [],
"source": [
@@ -2602,7 +2967,7 @@
},
{
"cell_type": "markdown",
- "id": "450c30ee",
+ "id": "72efa53c",
"metadata": {},
"source": [
"Appending:"
@@ -2611,7 +2976,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "b475c811",
+ "id": "d2b9b266",
"metadata": {},
"outputs": [],
"source": [
@@ -2622,7 +2987,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "0fb2c59c",
+ "id": "af569818",
"metadata": {},
"outputs": [],
"source": [
@@ -2632,7 +2997,7 @@
},
{
"cell_type": "markdown",
- "id": "887ffb99",
+ "id": "3224889a",
"metadata": {},
"source": [
"Verify Integrity:"
@@ -2641,7 +3006,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "49367c46",
+ "id": "74347bef",
"metadata": {},
"outputs": [],
"source": [
@@ -2652,7 +3017,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "7bb2aaf0",
+ "id": "edca3507",
"metadata": {},
"outputs": [],
"source": [
@@ -2665,7 +3030,7 @@
},
{
"cell_type": "markdown",
- "id": "7e415861",
+ "id": "e9d74bb5",
"metadata": {},
"source": [
"## Group By"
@@ -2673,7 +3038,7 @@
},
{
"cell_type": "markdown",
- "id": "8b2d72fb",
+ "id": "ae3ec2eb",
"metadata": {},
"source": [
"### Table.groupby()\n",
@@ -2724,7 +3089,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "0789d3f4",
+ "id": "189eabbe",
"metadata": {
"scrolled": true
},
@@ -2741,7 +3106,7 @@
},
{
"cell_type": "markdown",
- "id": "8baae3c9",
+ "id": "d805052d",
"metadata": {},
"source": [
"Group on the `Animal` column and calculate the mean of the resulting `Max Speed` and `Max Altitude` columns."
@@ -2750,7 +3115,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "734cb6ff",
+ "id": "00cc7660",
"metadata": {
"scrolled": true
},
@@ -2761,7 +3126,7 @@
},
{
"cell_type": "markdown",
- "id": "b3b759af",
+ "id": "c7ef160d",
"metadata": {},
"source": [
"Example table with multiple columns to group on."
@@ -2770,7 +3135,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "7966c28c",
+ "id": "3204bd59",
"metadata": {},
"outputs": [],
"source": [
@@ -2786,7 +3151,7 @@
},
{
"cell_type": "markdown",
- "id": "e3ab5b1f",
+ "id": "77008f71",
"metadata": {},
"source": [
"Group on multiple columns using thier indexes."
@@ -2795,7 +3160,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "c01d3cc9",
+ "id": "cfd22d01",
"metadata": {},
"outputs": [],
"source": [
@@ -2804,7 +3169,7 @@
},
{
"cell_type": "markdown",
- "id": "d46304f0",
+ "id": "58e77d29",
"metadata": {},
"source": [
"Example table with Nulls."
@@ -2813,7 +3178,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "dc222240",
+ "id": "96bb6e4d",
"metadata": {},
"outputs": [],
"source": [
@@ -2831,7 +3196,7 @@
},
{
"cell_type": "markdown",
- "id": "4c38e902",
+ "id": "a13c11f4",
"metadata": {},
"source": [
"Group on column `a` and keep null groups."
@@ -2840,7 +3205,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "833e4a92",
+ "id": "95d7734a",
"metadata": {
"scrolled": true
},
@@ -2851,7 +3216,7 @@
},
{
"cell_type": "markdown",
- "id": "c26a98ff",
+ "id": "1645ae2b",
"metadata": {},
"source": [
"Group on column `a` keeping null groups and not using the groups as an index column."
@@ -2860,7 +3225,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "bb5d1bac",
+ "id": "bf8dc14c",
"metadata": {},
"outputs": [],
"source": [
@@ -2869,7 +3234,7 @@
},
{
"cell_type": "markdown",
- "id": "af8fad39",
+ "id": "undefined-bruce",
"metadata": {},
"source": [
"## Apply\n",
@@ -2917,7 +3282,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "02f41281",
+ "id": "cooperative-construction",
"metadata": {},
"outputs": [],
"source": [
@@ -2928,7 +3293,7 @@
},
{
"cell_type": "markdown",
- "id": "cf555661",
+ "id": "micro-dodge",
"metadata": {},
"source": [
"Apply square root on each item within a column"
@@ -2937,7 +3302,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "173acc13",
+ "id": "handmade-bridal",
"metadata": {},
"outputs": [],
"source": [
@@ -2946,7 +3311,7 @@
},
{
"cell_type": "markdown",
- "id": "a00dda0c",
+ "id": "accepted-planning",
"metadata": {},
"source": [
"Apply a reducing function sum on either axis"
@@ -2955,7 +3320,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "4936ea30",
+ "id": "acquired-wholesale",
"metadata": {},
"outputs": [],
"source": [
@@ -2965,60 +3330,17 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "5df4a3ac",
+ "id": "informal-algebra",
"metadata": {},
"outputs": [],
"source": [
"tab.apply(lambda x: sum(x), axis=1)"
]
- },
- {
- "cell_type": "markdown",
- "id": "8da6da7c",
- "metadata": {},
- "source": [
- "## Aggregate\n",
- "\n",
- "### Table.agg()\n",
- "\n",
- "```\n",
- "Table.agg(\n",
- " func,\n",
- " axis=0,\n",
- " *args,\n",
- " **kwargs\n",
- ")\n",
- "```\n",
- "\n",
- "Aggregate data using one or more operations over a specified axis\n",
- "\n",
- "Objects passed to a function are passed as kx vector/list objects.\n",
- "\n",
- "**Parameters:**\n",
- "\n",
- "| Name | Type | Description | Default |\n",
- "| :--------------: | :---------------------------------: | :-------------------------------------------------------------------------- | :------: |\n",
- "| func | function, str, list or dict | Function to use for aggregating the data. If a function this must either work when passed a `Table` or when passed to `Table.apply`
Accepted combinations are:
- function
- string function name
- list of functions and/or function names, e.g. `[kx.q.sum, 'mean']`
- dict of axis labels -> functions or function names
| |\n",
- "| `*args` | any | Positional arguments to pass to `func` in addition to the kx list. | |\n",
- "| axis | int | The axis along which the function is applied, `0` applies function to each column, at present row based application is not supported. | 0 | \n",
- "| `**kwargs` | dict | Additional keyword arguments to pass as keywords to `func`, this argument is not implemented in the case `func` is a kx callable function. | None | \n",
- "\n",
- "\n",
- "**Returns:**\n",
- "\n",
- "| Type | Description |\n",
- "| :-----------------------: | :---------------------------------------------- |\n",
- "| List, Dictionary or Table | Result of applying `func` along the giveen axis of the `kx.Table`. |\n",
- "\n",
- "**Examples:**\n",
- "\n",
- "Example Table."
- ]
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
+ "display_name": "Python 3",
"language": "python",
"name": "python3"
},
@@ -3032,7 +3354,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.3"
+ "version": "3.11.5"
}
},
"nbformat": 4,
diff --git a/src/pykx/pandas_api/pandas_indexing.py b/src/pykx/pandas_api/pandas_indexing.py
index 954896d..a310efd 100644
--- a/src/pykx/pandas_api/pandas_indexing.py
+++ b/src/pykx/pandas_api/pandas_indexing.py
@@ -328,6 +328,32 @@ def _rename_columns(tab, labels):
tab, labels) # noqa
else:
return q('{c:cols x; c:@[c;c?key y;y]; c xcol x}', tab, labels)
+
+
+def _pre_suf_fix_columns(tab, fix, suf= True):
+ if "Keyed" in str(type(tab)):
+ f = ("c: `$ (string c) ,\: string y;" if suf
+ else "c: `$(string y) ,/: string c;")
+ return q("{c:cols value x;"
+ + f
+ + "key[x]!c xcol value x}",
+ tab, fix) # noqa
+ else:
+ f = ("c: `$(string c) ,\: string y;" if suf
+ else "c: `$ (string y) ,/: string c;")
+ return q('{c:cols x;' + f + 'c xcol x}', tab, fix)
+
+
+def _pre_suf_fix_index(tab, fix, suf= True):
+ if "Keyed" in str(type(tab)):
+ f = ("idx: `$(string idx) ,\: string y;" if suf
+ else " idx: `$(string y) ,/: string idx;" )
+ return q("{idx:first flip key x;"
+ + f
+ + "([] idx)!value x}",
+ tab, fix) # noqa
+ else:
+ return ValueError('nyi')
class PandasIndexing:
@@ -453,6 +479,32 @@ def rename(self, labels=None, index=None, columns=None, axis=0,
t = _rename_columns(t, columns)
return t
+
+ def add_suffix(self, suffix=None, axis=0):
+ t = self
+ if suffix:
+ if axis == 0:
+ t = _pre_suf_fix_columns(t, suffix, suf=True)
+ elif axis == 1:
+ t = _pre_suf_fix_index(t, suffix, suf=True)
+ else:
+ raise ValueError(f'No axis named {axis}')
+ else:
+ raise ValueError("missing 1 required positional argument: 'suffix'")
+ return t
+
+ def add_prefix(self, prefix=None, axis=0):
+ t = self
+ if prefix:
+ if axis == 0:
+ t = _pre_suf_fix_columns(t, prefix, suf=False)
+ elif axis == 1:
+ t = _pre_suf_fix_index(t, prefix, suf=False)
+ else:
+ raise ValueError(f'No axis named {axis}')
+ else:
+ raise ValueError("missing 1 required positional argument: 'prefix'")
+ return t
def sample(self, n=None, frac=None, replace=False, weights=None,
random_state=None, axis=None, ignore_index=False):
@@ -566,7 +618,7 @@ def __init__(self, tab):
def __getitem__(self, loc):
if not isinstance(loc, tuple) or len(loc) != 2:
raise ValueError('Expected 2 values for call to Table.at[]')
- if q('{y in keys x}', self.tab, loc[1]):
+ if q('{y in keys x(string y)}', self.tab, loc[1]):
raise QError('Can\'t get the value of a key in a KeyedTable using at.')
return q('{x[y][z]}', self.tab, loc[0], loc[1])
diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py
index 39668d5..819fc54 100644
--- a/src/pykx/pandas_api/pandas_meta.py
+++ b/src/pykx/pandas_api/pandas_meta.py
@@ -1,5 +1,6 @@
from . import api_return
from ..exceptions import QError
+from typing import Union, Dict
def _init(_q):
@@ -154,6 +155,29 @@ def mean(self, axis: int = 0, numeric_only: bool = False):
tab
)
+ @api_return
+ def std(self, axis: int = 0, ddof: int = 1, numeric_only: bool = False):
+ tab = self
+ if 'Keyed' in str(type(tab)):
+ tab = q('{(keys x) _ 0!x}', tab)
+ if numeric_only:
+ tab = _get_numeric_only_subtable(tab)
+ key_str = '' if axis == 0 else '`$string '
+ val_str = '' if axis == 0 else '"f"$value '
+ query_str = 'cols[tab]' if axis == 0 else 'til[count[tab]]'
+ where_str = ' where not (::)~/:r[;1]'
+
+ res = q(f'{{[tab]{query_str}!count[{query_str}]#0n}}', tab)
+ if ddof != len(tab.pd()):
+ res = q(
+ '{[tab]'
+ f'r:{{[tab; x] ({key_str}x; {{avg sqrt (sum xexp[x-(avg x);2]) % count[x]-{ddof}}} {val_str}tab[x])}}[tab;] each {query_str};'
+ f'(,/) {{(enlist x 0)!(enlist x 1)}} each r{where_str}}}',
+ tab
+ )
+ return res
+
+
@api_return
def median(self, axis: int = 0, numeric_only: bool = False):
tab = self
@@ -210,6 +234,27 @@ def abs(self, numeric_only=False):
tab = _get_numeric_only_subtable(self)
return q.abs(tab)
+ @api_return
+ def round(self, decimals: Union[int, Dict[str, int]] = 0):
+ tab = self
+ if 'Keyed' in str(type(tab)):
+ tab = q('{(keys x) _ 0!x}', tab)
+
+ return q("""{[t;d]
+ generate_ops:{[vdic]
+ tuples:{flip(2;count[x])#key[x],value[x]}[vdic];
+ key[vdic]!({(({"F"$.Q.f[y]x}[;x[1]])';x[0])}')tuples};
+ get_float_cols:{(key[ct]@where 9=value[ct:abs type each first x])};
+ fcols:get_float_cols[t];
+ ops:$[-7h=type d;
+ [$[0=d;
+ fcols!({(_:';x)}')fcols;
+ [vdic:fcols!count[fcols]#d;
+ generate_ops vdic]]];
+ [vdic:(key[d]i)!value[d]i:where key[d] in fcols;
+ generate_ops vdic]];
+ ![t;();0b;ops]}""", tab, decimals)
+
@convert_result
def all(self, axis=0, bool_only=False, skipna=True):
res, cols = preparse_computations(self, axis, skipna, bool_only=bool_only)
@@ -245,6 +290,17 @@ def prod(self, axis=0, skipna=True, numeric_only=False, min_count=0):
min_count
), cols)
+ @convert_result
+ def skew(self, axis=0, skipna=True, numeric_only=False):
+ res, cols = preparse_computations(self, axis, skipna, numeric_only)
+ return (q(
+ '{[row]'
+ 'm:{(sum(y-avg y)xexp x)%count y};'
+ 'u:{sqrt[n*n-1]%neg[2]+n:count x};'
+ '{[u;m;x](u[x]*m[3][x]%(m[2][x]xexp 3%2))}[u;m]each row}',
+ res
+ ), cols)
+
@convert_result
def sum(self, axis=0, skipna=True, numeric_only=False, min_count=0):
res, cols = preparse_computations(self, axis, skipna, numeric_only)
@@ -311,3 +367,8 @@ def agg(self, func, axis=0, *args, **kwargs): # noqa: C901
return data
else:
return (q('{(flip enlist[`function]!enlist x)!y}', keyname, data))
+
+ @convert_result
+ def count(self, axis=0, numeric_only=False):
+ res, cols = preparse_computations(self, axis, True, numeric_only)
+ return (q('{[row] count each row}',res), cols)
\ No newline at end of file
diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py
index acfe55f..a5fffbe 100644
--- a/tests/test_pandas_api.py
+++ b/tests/test_pandas_api.py
@@ -1,2031 +1,2187 @@
-"""Tests for the Pandas API."""
-
-import sys
-
-import numpy as np
-import pandas as pd
-import pytest
-
-
-def check_result_and_type(kx, tab, result):
- if ((isinstance(tab, kx.Table) or isinstance(tab, kx.KeyedTable))
- and tab.py() == result.py() if isinstance(result, kx.K) else result):
- return True
- return False
-
-
-def test_api_meta_error(kx):
- with pytest.raises(Exception):
- kx.PandasAPI()
-
-
-def test_df_columns(q):
- df = q('([] til 10; 10?10)')
- assert all(df.columns == df.pd().columns)
-
-
-def test_df_dtypes(q):
- df = q('([] til 10; 10?0Ng; 10?1f;0f,til 9;10?("abc";"def");10?1e)')
- assert all(df.dtypes.columns == ['columns', 'type'])
- assert q('{x~y}',
- q('("kx.LongAtom";"kx.GUIDAtom";"kx.FloatAtom";"kx.List";"kx.CharVector";"kx.RealAtom")'), # noqa: E501
- df.dtypes['type'])
-
-
-def test_df_empty(q):
- df = q('([] til 10; 10?10)')
- assert df.empty == df.pd().empty
- df = q('([] `long$(); `long$())')
- assert df.empty == df.pd().empty
-
-
-def test_df_ndim(q):
- df = q('([] til 10; 10?10)')
- assert(df.ndim == df.pd().ndim)
-
-
-def test_df_ndim_multicol(q):
- df = q('([] til 10; 10?10; 10?1f)')
- assert(df.ndim == df.pd().ndim)
-
-
-def test_df_shape(q):
- df = q('([] til 10; 10?10)')
- assert (df.shape == df.pd().shape)
-
-
-def test_df_size(q):
- df = q('([] til 10; 10?10)')
- assert (df.size == df.pd().size)
-
-
-def test_df_head(kx, q):
- df = q('([] til 10; 10 - til 10)')
- assert check_result_and_type(kx, df.head(), q('5 # ([] til 10; 10 - til 10)'))
- assert check_result_and_type(kx, df.head(2), q('2 # ([] til 10; 10 - til 10)'))
- df = q('([til 10] 10 - til 10)')
- assert check_result_and_type(kx, df.head(), q('5 # ([til 10] 10 - til 10)'))
- assert check_result_and_type(kx, df.head(2), q('2 # ([til 10] 10 - til 10)'))
-
-
-def test_df_tail(kx, q):
- df = q('([] til 10; 10 - til 10)')
- assert check_result_and_type(kx, df.tail(), q('5 _ ([] til 10; 10 - til 10)'))
- assert check_result_and_type(kx, df.tail(2), q('8 _ ([] til 10; 10 - til 10)'))
- df = q('([til 10] 10 - til 10)')
- assert check_result_and_type(kx, df.tail(), q('5 _ ([til 10] 10 - til 10)'))
- assert check_result_and_type(kx, df.tail(2), q('8 _ ([til 10] 10 - til 10)'))
-
-
-def test_df_pop(kx, q):
- df = q('([] x: til 10; y: 10 - til 10; z: 10?`a`b`c`d)')
- assert check_result_and_type(kx, df.pop('x'), {'x': [x for x in range(10)]})
- assert check_result_and_type(kx, df.pop('y'), {'y': [10 - x for x in range(10)]})
- df = q('([] x: til 10; y: 10 - til 10; z: 10?`a`b`c`d)')
- df.pop('z')
- assert check_result_and_type(
- kx,
- df.head(),
- {
- 'x': [x for x in range(5)],
- 'y': [10 - x for x in range(5)]
- }
- )
- df = q('([] x: til 10; y: 10 - til 10; z: 10?`a`b`c`d)')
- df.pop(['y', 'z'])
- assert check_result_and_type(kx, df, {'x': [x for x in range(10)]})
-
-
-def test_df_get(kx, q):
- df = q('([] x: til 10; y: 10 - til 10; z: 10?`a`b`c)')
- assert check_result_and_type(kx, df.get('x'), {'x': [x for x in range(10)]})
- assert check_result_and_type(kx, df.get(kx.SymbolAtom('y')), {'y': [10 - x for x in range(10)]})
- assert check_result_and_type(kx, df.get(['x', 'y']), {
- 'x': [x for x in range(10)],
- 'y': [10 - x for x in range(10)]
- })
- assert df.get(['y', 'z']).py() == df[['y', 'z']].py()
- assert df.get(['x', 'y']).py() == df[['x', 'y']].py()
- assert df.get('r') is None
- assert df.get('r', default=5) == 5
- assert df.get(['x', 'r']) is None
- assert df.get(['x', 'r'], default=5) == 5
-
-
-def test_df_get_keyed(kx, q):
- df = q('([x: til 10] y: 10 - til 10; z: 10?`a`b`c)')
- assert check_result_and_type(kx, df.get('x'), {'x': [x for x in range(10)]})
- assert check_result_and_type(kx, df.get(kx.SymbolAtom('y')), {'y': [10 - x for x in range(10)]})
- assert check_result_and_type(kx, df.get(['x', 'y']), {
- 'x': [x for x in range(10)],
- 'y': [10 - x for x in range(10)]
- })
- assert df.get(['y', 'z']).py() == q.value(df[['y', 'z']]).py()
- assert df.get('r') is None
- assert df.get('r', default=5) == 5
- assert df.get(['x', 'r']) is None
- assert df.get(['x', 'r'], default=5) == 5
-
-
-def test_df_at(q):
- df = q('([] x: til 10; y: 10 - til 10; z: 10?`a`b`c)')
- for i in range(10):
- assert df.at[i, 'y'].py() == 10 - i
- df.at[i, 'y'] = 2
- assert df.at[i, 'y'].py() == 2
- assert not df.replace_self
- with pytest.raises(ValueError):
- df.at[0]
- with pytest.raises(ValueError):
- df.at[0] = 5
-
-
-def test_df_at_keyed(kx, q):
- df = q('([x: til 10] y: 10 - til 10; z: 10?`a`b`c)')
- for i in range(10):
- assert df.at[i, 'y'].py() == 10 - i
- df.at[i, 'y'] = 2
- assert df.at[i, 'y'].py() == 2
- assert not df.replace_self
- with pytest.raises(ValueError):
- df.at[0]
- with pytest.raises(ValueError):
- df.at[0] = 5
- with pytest.raises(kx.QError):
- df.at[0, 'x']
- with pytest.raises(kx.QError):
- df.at[0, 'x'] = 5
-
-
-def test_df_replace_self(q):
- df = q('([x: 0, til 10] y: 0, 10 - til 10; z: 11?`a`b`c)')
- df.replace_self = True
- df.tail(10)
- for i in range(10):
- assert df.at[i, 'y'].py() == 10 - i
- df.at[i, 'y'] = 2
- assert df.at[i, 'y'].py() == 2
- assert df.replace_self
-
-
-def test_df_loc(kx, q):
- df = q('([] x: til 10; y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
- assert check_result_and_type(kx, df.loc[0], {'y': 10, 'z': 'a'})
- assert check_result_and_type(kx, df.loc[[1]], {'y': 9, 'z': 'a'})
- assert check_result_and_type(kx, df.loc[[0, 1]], {'y': [10, 9], 'z': ['a', 'a']})
- assert check_result_and_type(kx, df.loc[0, :], {'y': [10, 9], 'z': ['a', 'a']})
-
-
-def test_df_loc_keyed(kx, q):
- df = q('([x: til 10] y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
- assert check_result_and_type(kx, df.loc[0], {'y': 10, 'z': 'a'})
- assert check_result_and_type(kx, df.loc[[1]], {'y': 9, 'z': 'a'})
- assert check_result_and_type(kx, df.loc[[0, 1]], {'y': [10, 9], 'z': ['a', 'a']})
- assert check_result_and_type(kx, df.loc[df['y'] < 100], df.py())
-
-
-def test_df_loc_cols(kx, q):
- df = q('([x: til 10] y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
- assert check_result_and_type(kx, df.loc[[0, 1], 'z':], {'z': ['a', 'a']})
- assert check_result_and_type(kx, df[[0, 1], :'y'], {'y': [10, 9]})
- assert check_result_and_type(kx, df[[0, 1], 'y':'y'], {'y': [10, 9]})
- assert check_result_and_type(kx, df[[0, 1], :2], {'y': [10, 9]})
-
-
-def test_df_getitem(kx, q):
- df = q('([x: til 10] y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
- assert check_result_and_type(kx, df[0], {'y': 10, 'z': 'a'})
- assert check_result_and_type(kx, df[[1]], {'y': 9, 'z': 'a'})
- assert check_result_and_type(kx, df[[0, 1]], {'y': [10, 9], 'z': ['a', 'a']})
- assert check_result_and_type(kx, df[:], df.py())
- assert check_result_and_type(kx, df[:, ['x', 'y']], q('([x: til 10] y: 10 - til 10)').py())
- df = q('([] x: til 10; y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
- assert check_result_and_type(
- kx,
- df[df['z'] == 'a'],
- {
- 'x': [0, 1],
- 'y': [10, 9],
- 'z': ['a', 'a']
- }
- )
-
-
-def test_df_loc_set(kx, q):
- df = q('([x: til 10] y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
- df.loc[df.loc['z'] == 'a', 'y'] = 99
- assert check_result_and_type(
- kx,
- df,
- q('([x: til 10] y: (99 99),8 - til 8; z: `a`a`b`b`c`c`d`d`e`e)').py()
- )
- df = q('([] x: til 10; y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
- df.loc[df['z'] == 'a', 'y'] = 99
- assert check_result_and_type(
- kx,
- df,
- q('([x: til 10] y: (99 99),8 - til 8; z: `a`a`b`b`c`c`d`d`e`e)').py()
- )
- with pytest.raises(ValueError):
- df.loc[df['z'] == 'a'] = 99
- with pytest.raises(ValueError):
- df.loc[df['z'] == 'a', 3] = 99
- with pytest.raises(ValueError):
- df.loc[df['z'] == 'a', 'y', 'z'] = 99
-
-
-def test_df_set_cols(kx, q):
- qtab = q('([]til 10;10?1f;10?100)')
- df = qtab
- df['x3'] = 99
- assert check_result_and_type(
- kx,
- df,
- q('{update x3:99 from x}', qtab).py()
- )
- df = qtab
- df['x'] = q('reverse til 10')
- assert check_result_and_type(
- kx,
- df,
- q('{update x:reverse til 10 from x}', qtab).py()
- )
- df = qtab
- df['x'] = ['a', 'a', 'b', 'b', 'c', 'c', 'd', 'd', 'e', 'e']
- assert check_result_and_type(
- kx,
- df,
- q('{update x:`a`a`b`b`c`c`d`d`e`e from x}', qtab).py()
- )
- df = qtab
- df[['x', 'x3']] = [q('reverse til 10'), 99]
- assert check_result_and_type(
- kx,
- df,
- q('{update x:reverse til 10, x3:99 from x}', qtab).py()
- )
- df = qtab
- df[['x', 'x3']] = [q('reverse til 10'), ['a', 'a', 'b', 'b', 'c', 'c', 'd', 'd', 'e', 'e']]
- assert check_result_and_type(
- kx,
- df,
- q('{update x:reverse til 10, x3:`a`a`b`b`c`c`d`d`e`e from x}', qtab).py()
- )
-
-
-def test_df_iloc_set(kx, q):
- df = q('([x: til 10] y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
- df.iloc[df.loc['z'] == 'a', 'y'] = 99
- assert check_result_and_type(
- kx,
- df,
- q('([x: til 10] y: (99 99),8 - til 8; z: `a`a`b`b`c`c`d`d`e`e)').py()
- )
- df = q('([] x: til 10; y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
- df.iloc[df['z'] == 'a', 'y'] = 99
- assert check_result_and_type(
- kx,
- df,
- q('([x: til 10] y: (99 99),8 - til 8; z: `a`a`b`b`c`c`d`d`e`e)').py()
- )
- with pytest.raises(ValueError):
- df.iloc[df['z'] == 'a'] = 99
- with pytest.raises(ValueError):
- df.iloc[df['z'] == 'a', 3] = 99
- with pytest.raises(ValueError):
- df.iloc[df['z'] == 'a', 'y', 'z'] = 99
-
-
-def test_df_iloc(kx, q):
- df = q('([x: til 10] y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
- assert check_result_and_type(kx, df.iloc[:], df.py())
- assert check_result_and_type(kx, df.iloc[:, :-1], q('([x: til 10] y: 10 - til 10)').py())
- assert check_result_and_type(kx, df.iloc[df['y'] < 100], df.py())
- df = q('([] x: til 10; y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
- assert check_result_and_type(kx, df.iloc[:-2], df.head(8).py())
- assert check_result_and_type(kx, df.iloc[0], {'x': 0, 'y': 10, 'z': 'a'})
- assert check_result_and_type(kx, df.iloc[[0]], {'x': 0, 'y': 10, 'z': 'a'})
- assert check_result_and_type(
- kx,
- df.iloc[::-1],
- {
- 'x': [10 - x for x in range(10)],
- 'y': [x for x in range(10)],
- 'z': ['e', 'e', 'd', 'd', 'c', 'c', 'b', 'b', 'a', 'a']
- }
- )
- assert check_result_and_type(
- kx,
- df.head(4).iloc[[True, False, True, False]],
- {
- 'x': [0, 2],
- 'y': [10, 8],
- 'z': ['a', 'b']
- })
- assert check_result_and_type(
- kx,
- df.iloc[lambda x: [x % 2 == 0 for x in range(len(x))]],
- {
- 'x': [0, 2, 4, 6, 8],
- 'y': [10, 8, 6, 4, 2],
- 'z': ['a', 'b', 'c', 'd', 'e']
- }
- )
- assert check_result_and_type(
- kx,
- df.iloc[df['y'] > 5],
- {
- 'x': [0, 1, 2, 3, 4],
- 'y': [10, 9, 8, 7, 6],
- 'z': ['a', 'a', 'b', 'b', 'c']
- }
- )
-
-
-def test_df_iloc_with_cols(kx, q):
- df = q('([] x: til 10; y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
- assert check_result_and_type(kx, df.iloc[0, 0], {'x': 0, 'z': 'a'})
- assert check_result_and_type(kx, df.iloc[[0], [2]], {'z': 'a'})
- assert check_result_and_type(
- kx,
- df.iloc[::-1, ::-1],
- {
- 'z': ['e', 'e', 'd', 'd', 'c', 'c', 'b', 'b', 'a', 'a'],
- 'y': [x for x in range(10)],
- 'x': [10 - x for x in range(10)]
- }
- )
- assert check_result_and_type(
- kx,
- df.head(4).iloc[[True, False, True, False], [False, True, False]],
- {
- 'y': [10, 8]
- }
- )
- assert check_result_and_type(
- kx,
- df.iloc[lambda x: [x % 2 == 0 for x in range(len(x))], lambda x: [0, 2]],
- {
- 'x': [0, 2, 4, 6, 8],
- 'z': ['a', 'b', 'c', 'd', 'e']
- }
- )
- assert check_result_and_type(
- kx,
- df.iloc[:, :],
- q('([] x: til 10; y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)').py()
- )
- assert check_result_and_type(
- kx,
- df.iloc[:, 'y':],
- q('([] y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)').py()
- )
- assert check_result_and_type(
- kx,
- df.iloc[:, :'y'],
- q('([] x: til 10; y: 10 - til 10)').py()
- )
- assert check_result_and_type(
- kx,
- df.iloc[:, 1:],
- q('([] y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)').py()
- )
- assert check_result_and_type(
- kx,
- df.iloc[:, :2],
- q('([] x: til 10; y: 10 - til 10)').py()
- )
- assert check_result_and_type(
- kx,
- df.iloc[:, :-2],
- q('([] x: til 10; y: 10 - til 10)').py()
- )
- assert check_result_and_type(kx, df.loc[df['z']=='a', ['x', 'y']], {'x': [0, 1], 'y': [10, 9]})
-
-
-def test_table_validate(kx):
- # Copy kwarg
- df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
- df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
- tab1 = kx.toq(df1)
- tab2 = kx.toq(df2)
- with pytest.raises(ValueError):
- tab1.merge(tab2, left_on='lkey', right_on='rkey', validate='1:1')
- with pytest.raises(ValueError):
- tab1.merge(tab2, left_on='lkey', right_on='rkey', validate='m:1')
- with pytest.raises(ValueError):
- tab1.merge(tab2, left_on='lkey', right_on='rkey', validate='1:m')
-
-
-def test_table_merge_copy(kx, q):
- # Copy kwarg
- df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
- df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
- tab1 = kx.toq(df1)
- tab2 = kx.toq(df2)
- tab1.merge(tab2, left_on='lkey', right_on='rkey', copy=False)
- assert df1.merge(df2, left_on='lkey', right_on='rkey').equals(tab1.pd())
-
- # Replace_self property
- df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
- df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
- tab1 = kx.toq(df1)
- tab1.replace_self = True
- tab2 = kx.toq(df2)
- tab1.merge(tab2, left_on='lkey', right_on='rkey')
- assert df1.merge(df2, left_on='lkey', right_on='rkey').equals(tab1.pd())
-
-
-def test_table_inner_merge(kx, q):
- # Merge on keys
- df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
- df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
- tab1 = kx.toq(df1)
- tab2 = kx.toq(df2)
- assert df1.merge(
- df2,
- left_on='lkey',
- right_on='rkey'
- ).equals(
- tab1.merge(
- tab2,
- left_on='lkey',
- right_on='rkey'
- ).pd()
- )
-
- # Merge on keys KeyedTable
- df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
- df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
- tab1 = q('{1!x}', kx.toq(df1))
- tab2 = q('{1!x}', kx.toq(df2))
- assert df1.merge(
- df2,
- left_on='lkey',
- right_on='rkey'
- ).equals(
- q('{0!x}', tab1.merge(
- tab2,
- left_on='lkey',
- right_on='rkey'
- )).pd()
- )
-
- # Merge on differing keys
- df1 = pd.DataFrame({'a': ['foo', 'bar'], 'b': [1, 2]})
- df2 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]})
- tab1 = kx.toq(df1)
- tab2 = kx.toq(df2)
- assert df1.merge(df2, on='a').equals(tab1.merge(tab2, on='a').pd())
-
- # Merge on same indexes
- df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
- df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
- tab1 = kx.toq(df1)
- tab2 = kx.toq(df2)
- assert df1.merge(
- df2,
- left_index=True,
- right_index=True
- ).equals(
- tab1.merge(
- tab2,
- left_index=True,
- right_index=True
- ).pd()
- )
-
- # Merge on different indexes
- df1 = pd.DataFrame(
- {
- 'lkey': ['foo', 'bar', 'baz', 'foo'],
- 'value': [1, 2, 3, 5]
- },
- index=[4, 3, 2, 1]
- )
- df2 = pd.DataFrame(
- {
- 'rkey': ['foo', 'bar', 'baz', 'foo'],
- 'value': [5, 6, 7, 8]
- },
- index=[0, 1, 2, 3]
- )
- tab1 = q('{`idx xcols update idx: reverse 1 + til count x from x}', tab1)
- tab1 = q('{1!x}', tab1)
- tab2 = q('{`idx xcols update idx: til count x from x}', tab2)
- tab2 = q('{1!x}', tab2)
- res = tab1.merge(tab2, left_index=True, right_index=True)
- assert q(
- '{x~y}',
- tab1.merge(tab2, left_index=True, right_index=True, q_join=True),
- q(
- '{1!x}',
- q.ij(
- q('{0!x}', q.xcol(kx.SymbolVector(['idx', 'lkey', 'value_x']), tab1)),
- q.xcol(kx.SymbolVector(['idx', 'rkey', 'value_y']), tab2)
- )
- )
- )
- assert isinstance(res, kx.KeyedTable)
- df_res = df1.merge(df2, left_index=True, right_index=True)
- # assert our index does match properly before removing it
- assert q('0!', res)['idx'].py() == list(df_res.index)
- # We have idx as a column so we have to remove it to be equal as it won't convert
- # to the pandas index column automatically
- res = q('{(enlist `idx)_(0!x)}', res)
- df_res = df_res.reset_index() # Reset pandas index to default, we already checked it
- df_res.pop('index')
- assert df_res.equals(res.pd())
-
-
-def test_table_left_merge(kx, q):
- if sys.version_info.minor > 7:
- # Merge on keys
- df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
- df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
- tab1 = kx.toq(df1)
- tab2 = kx.toq(df2)
- assert df1.merge(
- df2,
- left_on='lkey',
- right_on='rkey',
- how='left'
- ).equals(
- tab1.merge(
- tab2,
- left_on='lkey',
- right_on='rkey',
- how='left'
- ).pd()
- )
-
- # Merge on keys KeyedTable
- df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
- df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
- tab1 = q('{1!x}', kx.toq(df1))
- tab2 = q('{1!x}', kx.toq(df2))
- assert df1.merge(
- df2,
- left_on='lkey',
- right_on='rkey',
- how='left'
- ).equals(
- q('{0!x}', tab1.merge(
- tab2,
- left_on='lkey',
- right_on='rkey',
- how='left'
- )).pd()
- )
-
- # Merge on differing keys
- df1 = pd.DataFrame({'a': ['foo', 'bar'], 'b': [1, 2]})
- df2 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]})
- tab1 = kx.toq(df1)
- tab2 = kx.toq(df2)
- tab_res = tab1.merge(tab2, on='a', how='left').pd()
- assert str(tab_res.at[1, 'c']) == '--'
- tab_res.at[1, 'c'] = np.NaN
- assert df1.merge(df2, on='a', how='left').equals(tab_res)
-
- # Merge on same indexes
- df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
- df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
- tab1 = kx.toq(df1)
- tab2 = kx.toq(df2)
- assert df1.merge(
- df2,
- left_index=True,
- right_index=True,
- how='left'
- ).equals(
- tab1.merge(
- tab2,
- left_index=True,
- right_index=True,
- how='left'
- ).pd()
- )
-
- # Merge on different indexes
- df1 = pd.DataFrame(
- {
- 'lkey': ['foo', 'bar', 'baz', 'foo'],
- 'value': [1, 2, 3, 5]
- },
- index=[4, 3, 2, 1]
- )
- df2 = pd.DataFrame(
- {
- 'rkey': ['foo', 'bar', 'baz', 'foo'],
- 'value': [5, 6, 7, 8]
- },
- index=[0, 1, 2, 3]
- )
- tab1 = q('{`idx xcols update idx: reverse 1 + til count x from x}', tab1)
- tab1 = q('{1!x}', tab1)
- tab2 = q('{`idx xcols update idx: til count x from x}', tab2)
- tab2 = q('{1!x}', tab2)
- res = tab1.merge(tab2, left_index=True, right_index=True, how='left')
- assert q(
- '{x~y}',
- tab1.merge(tab2, left_index=True, right_index=True, how='left', q_join=True),
- q(
- '{1!x}',
- q.lj(
- q('{0!x}', q.xcol(kx.SymbolVector(['idx', 'lkey', 'value_x']), tab1)),
- q.xcol(kx.SymbolVector(['idx', 'rkey', 'value_y']), tab2)
- )
- )
- )
- assert isinstance(res, kx.KeyedTable)
- df_res = df1.merge(df2, left_index=True, right_index=True, how='left')
- # assert our index does match properly before removing it
- assert q('0!', res)['idx'].py() == list(df_res.index)
- # We have idx as a column so we have to remove it to be equal as it won't convert
- # to the pandas index column automatically
- res = q('{(enlist `idx)_(0!x)}', res).pd()
- df_res = df_res.reset_index() # Reset pandas index to default, we already checked it
- df_res.pop('index')
- res.at[0, 'rkey'] = np.NaN
- res.at[0, 'value_y'] = np.NaN
- assert df_res.equals(res)
-
- df1 = pd.DataFrame(
- {'key': ['foo', 'bar', 'baz', 'foo', 'quz'], 'value': [1, 2, 3, 5, None]}
- )
- df2 = pd.DataFrame({'key': ['foo', 'bar', 'baz', 'foo', None], 'value': [5, 6, 7, 8, 99]})
- tab1 = kx.toq(df1)
- tab2 = kx.toq(df2)
-
- df_res = df1.merge(df2, on='key', how='left')
- res = tab1.merge(tab2, on='key', how='left').pd()
- assert str(res.at[6, 'value_y']) == '--'
- res.at[6, 'value_y'] = np.NaN
- assert res.equals(df_res)
-
-
-def test_table_right_merge(kx, q):
- if sys.version_info.minor > 7:
- # Merge on keys
- df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
- df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
- tab1 = kx.toq(df1)
- tab2 = kx.toq(df2)
- assert df1.merge(
- df2,
- left_on='lkey',
- right_on='rkey',
- how='right'
- ).equals(
- tab1.merge(
- tab2,
- left_on='lkey',
- right_on='rkey',
- how='right'
- ).pd()
- )
-
- # Merge on keys KeyedTable
- df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
- df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
- tab1 = q('{1!x}', kx.toq(df1))
- tab2 = q('{1!x}', kx.toq(df2))
- assert df1.merge(
- df2,
- left_on='lkey',
- right_on='rkey',
- how='right'
- ).equals(
- q('{0!x}', tab1.merge(
- tab2,
- left_on='lkey',
- right_on='rkey',
- how='right'
- )).pd()
- )
-
- # Merge on differing keys
- df1 = pd.DataFrame({'a': ['foo', 'bar'], 'b': [1, 2]})
- df2 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]})
- tab1 = kx.toq(df1)
- tab2 = kx.toq(df2)
- tab_res = tab1.merge(tab2, on='a', how='right').pd()
- assert str(tab_res.at[1, 'b']) == '--'
- tab_res.at[1, 'b'] = np.NaN
- assert df1.merge(df2, on='a', how='right').equals(tab_res)
-
- # Merge on same indexes
- df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
- df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
- tab1 = kx.toq(df1)
- tab2 = kx.toq(df2)
- assert df1.merge(
- df2,
- left_index=True,
- right_index=True,
- how='right'
- ).equals(
- tab1.merge(
- tab2,
- left_index=True,
- right_index=True,
- how='right'
- ).pd()
- )
-
- # Merge on different indexes
- df1 = pd.DataFrame(
- {
- 'lkey': ['foo', 'bar', 'baz', 'foo'],
- 'value': [1, 2, 3, 5]
- },
- index=[4, 3, 2, 1]
- )
- df2 = pd.DataFrame(
- {
- 'rkey': ['foo', 'bar', 'baz', 'foo'],
- 'value': [5, 6, 7, 8]
- },
- index=[0, 1, 2, 3]
- )
- tab1 = q('{`idx xcols update idx: reverse 1 + til count x from x}', tab1)
- tab1 = q('{1!x}', tab1)
- tab2 = q('{`idx xcols update idx: til count x from x}', tab2)
- tab2 = q('{1!x}', tab2)
- res = tab1.merge(tab2, left_index=True, right_index=True, how='right')
- assert q(
- '{x~y}',
- tab1.merge(tab2, left_index=True, right_index=True, how='right', q_join=True),
- q(
- '{1!x}',
- q.lj(
- q('{0!x}', q.xcol(kx.SymbolVector(['idx', 'rkey', 'value_y']), tab2)),
- q.xcol(kx.SymbolVector(['idx', 'lkey', 'value_x']), tab1)
- )
- )
- )
- assert isinstance(res, kx.KeyedTable)
- df_res = df1.merge(df2, left_index=True, right_index=True, how='right')
- # assert our index does match properly before removing it
- assert q('0!', res)['idx'].py() == list(df_res.index)
- # We have idx as a column so we have to remove it to be equal as it won't convert
- # to the pandas index column automatically
- res = q('{(enlist `idx)_(0!x)}', res).pd()
- df_res = df_res.reset_index() # Reset pandas index to default, we already checked it
- df_res.pop('index')
- res.at[0, 'lkey'] = np.NaN
- res.at[0, 'value_x'] = np.NaN
- assert df_res.equals(res)
-
- df1 = pd.DataFrame(
- {'key': ['foo', 'bar', 'baz', 'foo', 'quz'], 'value': [1, 2, 3, 5, None]}
- )
- df2 = pd.DataFrame({'key': ['foo', 'bar', 'baz', 'foo', None], 'value': [5, 6, 7, 8, 99]})
- tab1 = kx.toq(df1)
- tab2 = kx.toq(df2)
-
- df_res = df1.merge(df2, on='key', how='right')
- res = tab1.merge(tab2, on='key', how='right').pd()
- assert str(res.at[6, 'key']) == ''
- res.at[6, 'key'] = None
- assert res.equals(df_res)
-
-
-def test_table_outer_merge(kx, q):
- if sys.version_info.minor > 7:
- # Merge on keys
- df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
- df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
- tab1 = kx.toq(df1)
- tab2 = kx.toq(df2)
- assert df1.merge(
- df2,
- left_on='lkey',
- right_on='rkey',
- how='outer'
- ).equals(
- tab1.merge(
- tab2,
- left_on='lkey',
- right_on='rkey',
- how='outer'
- ).pd()
- )
- assert df1.merge(
- df2,
- left_on='lkey',
- right_on='rkey',
- how='outer',
- sort=True
- ).equals(
- tab1.merge(
- tab2,
- left_on='lkey',
- right_on='rkey',
- how='outer',
- sort=True
- ).pd()
- )
-
- # Merge on keys KeyedTable
- df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
- df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
- tab1 = q('{1!x}', kx.toq(df1))
- tab2 = q('{1!x}', kx.toq(df2))
- assert df1.merge(
- df2,
- left_on='lkey',
- right_on='rkey',
- how='outer',
- sort=True
- ).equals(
- q('{0!x}', tab1.merge(
- tab2,
- left_on='lkey',
- right_on='rkey',
- how='outer',
- sort=True
- )).pd()
- )
-
- # Merge on differing keys
- df1 = pd.DataFrame({'a': ['foo', 'bar'], 'b': [1, 2]})
- df2 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]})
- tab1 = kx.toq(df1)
- tab2 = kx.toq(df2)
- tab_res = tab1.merge(tab2, on='a', how='outer').pd()
- assert str(tab_res.at[1, 'c']) == '--'
- tab_res.at[1, 'c'] = np.NaN
- assert str(tab_res.at[2, 'b']) == '--'
- tab_res.at[2, 'b'] = np.NaN
- assert df1.merge(df2, on='a', how='outer').equals(tab_res)
-
- # Merge on same indexes
- df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
- df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
- tab1 = kx.toq(df1)
- tab2 = kx.toq(df2)
- assert df1.merge(
- df2,
- left_index=True,
- right_index=True,
- how='outer'
- ).equals(
- tab1.merge(
- tab2,
- left_index=True,
- right_index=True,
- how='outer'
- ).pd()
- )
- assert df1.merge(
- df2,
- left_index=True,
- right_index=True,
- how='outer',
- sort=True
- ).equals(
- tab1.merge(
- tab2,
- left_index=True,
- right_index=True,
- how='outer',
- sort=True
- ).pd()
- )
-
- # Merge on different indexes
- df1 = pd.DataFrame(
- {
- 'lkey': ['foo', 'bar', 'baz', 'foo'],
- 'value': [1, 2, 3, 5]
- },
- index=[4, 3, 2, 1]
- )
- df2 = pd.DataFrame(
- {
- 'rkey': ['foo', 'bar', 'baz', 'foo'],
- 'value': [5, 6, 7, 8]
- },
- index=[0, 1, 2, 3]
- )
- tab1 = q('{`idx xcols update idx: reverse 1 + til count x from x}', tab1)
- tab1 = q('{1!x}', tab1)
- tab2 = q('{`idx xcols update idx: til count x from x}', tab2)
- tab2 = q('{1!x}', tab2)
- res = tab1.merge(tab2, left_index=True, right_index=True, how='outer')
- assert isinstance(res, kx.KeyedTable)
- df_res = df1.merge(df2, left_index=True, right_index=True, how='outer')
- # assert our index does match properly before removing it
- assert q('0!', res)['idx'].py() == list(df_res.index)
- # We have idx as a column so we have to remove it to be equal as it won't convert
- # to the pandas index column automatically
- res = q('{(enlist `idx)_(0!x)}', res).pd()
- df_res = df_res.reset_index() # Reset pandas index to default, we already checked it
- df_res.pop('index')
- res.at[0, 'lkey'] = np.NaN
- res.at[0, 'value_x'] = np.NaN
- res.at[4, 'rkey'] = np.NaN
- res.at[4, 'value_y'] = np.NaN
- assert df_res.equals(res)
-
- df1 = pd.DataFrame(
- {'key': ['foo', 'bar', 'baz', 'foo', 'quz'], 'value': [1, 2, 3, 5, None]}
- )
- df2 = pd.DataFrame(
- {
- 'key': ['foo', 'bar', 'baz', 'foo', None],
- 'value': [5.0, 6.0, 7.0, 8.0, 99.0]
- }
- )
- tab1 = kx.toq(df1)
- tab2 = kx.toq(df2)
- df_res = df1.merge(df2, on='key', how='outer')
- res = tab1.merge(tab2, on='key', how='outer').pd()
- assert res.at[7, 'key'] == ''
- res.at[7, 'key'] = None
- assert df_res.equals(res)
-
-
-def test_cross_merge(kx, q):
- df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
- df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
- tab1 = kx.toq(df1)
- tab2 = kx.toq(df2)
- assert df1.merge(df2, how='cross').equals(tab1.merge(tab2, how='cross').pd())
- tab1 = kx.q('{`idx xcols update idx: reverse 1 + til count x from x}', tab1)
- tab1 = kx.q('{1!x}', tab1)
- tab2 = kx.q('{`idx xcols update idx: til count x from x}', tab2)
- tab2 = kx.q('{1!x}', tab2)
- df_res = df1.merge(df2, how='cross')
- res = tab1.merge(tab2, how='cross')
- assert q('0!', res)['idx'].py() == list(df_res.index)
- # We have idx as a column so we have to remove it to be equal as it won't convert
- # to the pandas index column automatically
- res = q('{(enlist `idx)_(0!x)}', res).pd()
- assert df_res.equals(res)
-
-
-def test_merge_errors(kx):
- df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
- df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
- tab1 = kx.toq(df1)
- tab2 = kx.toq(df2)
- with pytest.raises(ValueError):
- tab1.merge(
- tab2,
- left_on='lkey',
- right_on='rkey',
- how='outer',
- suffixes=(False, False)
- )
-
-
-def test_cross_merge_errors(kx, q):
- df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
- df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
- tab1 = kx.toq(df1)
- tab2 = kx.toq(df2)
- with pytest.raises(ValueError) as e1:
- tab1.merge(tab2, how='cross', on='lkey')
- assert (
- 'Can not pass on, right_on, left_on or set right_index=True or left_index=True'
- in str(e1.value)
- )
- with pytest.raises(ValueError) as e2:
- tab1.merge(tab2, how='cross', left_on='lkey', right_on='rkey')
- assert (
- 'Can not pass on, right_on, left_on or set right_index=True or left_index=True'
- in str(e2.value)
- )
- with pytest.raises(ValueError) as e3:
- tab1.merge(tab2, how='cross', left_index=True, right_index=True)
- assert (
- 'Can not pass on, right_on, left_on or set right_index=True or left_index=True'
- in str(e3.value)
- )
-
-
-def test_api_vs_pandas(kx, q):
- tab = q('([] x: til 10; y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
- df = tab.pd()
- assert q(
- '{x ~ y}',
- tab[(tab['z'] == 'b') | (tab['z'] == 'c') | (tab['z'] == 'd')],
- q('{value x}', kx.toq(df[(df['z'] == 'b') | (df['z'] == 'c') | (df['z'] == 'd')]))
- )
- assert q(
- '{x ~ y}',
- tab[(tab['z'] == 'b') | (tab['z'] == 'c') | (tab['z'] == 'd')][tab['x'] > 5],
- q(
- '{value x}',
- kx.toq(df[(df['z'] == 'b') | (df['z'] == 'c') | (df['z'] == 'd')][df['x'] > 5])
- )
- )
- assert q(
- '{x ~ y}',
- tab.iloc[(tab['z'] == 'b') | (tab['z'] == 'c') | (tab['z'] == 'd')].iloc[tab['x'] > 5],
- q(
- '{value x}',
- kx.toq(df[(df['z'] == 'b') | (df['z'] == 'c') | (df['z'] == 'd')][df['x'] > 5])
- )
- )
-
-
-def test_df_astype_vanilla_checks(kx, q):
- df = q('([] c1:1 2 3i; c2:1 2 3j; c3:1 2 3h; c4:1 2 3i)')
- assert check_result_and_type(
- kx,
- df.astype(kx.LongVector).py(),
- q('([] c1:1 2 3j; c2:1 2 3j; c3:1 2 3j; c4:1 2 3j)').py()
- )
- assert check_result_and_type(
- kx,
- df.astype({'c1': kx.LongVector, 'c2': 'kx.ShortVector'}).py(),
- q('([] c1:1 2 3j; c2:1 2 3h; c3:1 2 3h; c4:1 2 3i)').py()
- )
-
-
-def test_df_astype_string_to_sym(kx, q):
- df = q('''([] c1:3#.z.p; c2:`abc`def`ghi; c3:1 2 3j;
- c4:("abc";"def";"ghi");c5:"abc";c6:(1 2 3;4 5 6;7 8 9))''')
- assert check_result_and_type(
- kx,
- df.astype({'c4': kx.SymbolVector, 'c5': kx.SymbolVector}).py(),
- q('''([] c1:3#.z.p; c2:`abc`def`ghi; c3:1 2 3j;
- c4:`abc`def`ghi;c5:`a`b`c;c6:(1 2 3;4 5 6;7 8 9))''').py()
- )
- assert check_result_and_type(
- kx,
- df.astype({'c4': kx.SymbolVector}).py(),
- q('''([] c1:3#.z.p; c2:`abc`def`ghi; c3:1 2 3j;
- c4:`abc`def`ghi;c5:"abc";c6:(1 2 3;4 5 6;7 8 9))''').py()
- )
-
-
-def test_df_astype_value_errors(kx, q):
- df = q('''([] c1:3#.z.p; c2:`abc`def`ghi; c3:1 2 3j;
- c4:("abc";"def";"ghi");c5:"abc";c6:(1 2 3;4 5 6;7 8 9))''')
- # Check errors parameter set to 'ignore'
- assert check_result_and_type(
- kx,
- df.astype({'c6': kx.CharVector}, errors='ignore').py(),
- q('''([] c1:3#.z.p; c2:`abc`def`ghi; c3:1 2 3j;
- c4:("abc";"def";"ghi");c5:"abc";c6:(1 2 3;4 5 6;7 8 9))''').py()
- )
- with pytest.raises(ValueError,
- match=r"This method can only handle casting string complex columns to "
- "symbols. Other complex column data or"
- " casting to other data is not supported."):
- raise df.astype({'c4': kx.ShortVector})
- with pytest.raises(ValueError,
- match=r"This method can only handle casting string complex columns to "
- "symbols. Other complex column data or"
- " casting to other data is not supported."):
- raise df.astype({'c6': kx.CharVector})
- with pytest.raises(kx.QError,
- match=r"Not supported: "
- "Error casting LongVector to GUIDVector with q error: type"):
- raise df.astype({'c3': kx.GUIDVector})
- with pytest.raises(NotImplementedError,
- match=r"Currently only the default value of True is accepted for copy"):
- raise df.astype({'c3': kx.ShortVector}, copy='False')
- with pytest.raises(ValueError,
- match=r"Column name passed in dictionary not present in df table"):
- raise df.astype({'c100': kx.ShortVector})
- with pytest.raises(kx.QError,
- match=r'Value passed does not match PyKX wrapper type'):
- raise df.astype({'c1': 'nomatchvalue'})
- with pytest.raises(kx.QError,
- match=r'Value passed does not match PyKX wrapper type'):
- raise df.astype('nomatchvalue')
- df = q('''([] c1:("abc";"def";"ghi");c2:(1 2 3;4 5 6;7 8 9))''')
- with pytest.raises(ValueError,
- match=r"This method can only handle casting string complex"
- " columns to symbols. Other complex column data or"
- " casting to other data is not supported."):
- raise df.astype(kx.SymbolVector)
- df = q('''([] a:1 2 3 4 5 6;b:`b`n`h``v`;
- c:("ll";"ll";"ll";"ll";"ll";"ll");
- d:("ll";"ll";"ll";"ll";"ll";1 2 3))''')
- with pytest.raises(ValueError,
- match=r"This method can only handle casting string complex"
- " columns to symbols. Other complex column data or"
- " casting to other data is not supported."):
- raise df.astype({'d': kx.SymbolVector})
-
-
-def test_df_select_dtypes(kx, q):
- df = q('([] c1:`a`b`c; c2:1 2 3h; c3:1 2 3j; c4:1 2 3i)')
- assert check_result_and_type(
- kx,
- df.select_dtypes(include=[kx.ShortVector, kx.LongVector]).py(),
- q('([] c2:1 2 3h; c3:1 2 3j)').py()
- )
- assert check_result_and_type(
- kx,
- df.select_dtypes(exclude='kx.LongVector').py(),
- q('([] c1:`a`b`c; c2:1 2 3h; c4:1 2 3i)').py()
- )
- assert check_result_and_type(
- kx,
- df.select_dtypes(include=['ShortVector', kx.LongVector],
- exclude=[kx.SymbolVector]).py(),
- q('([] c2:1 2 3h; c3:1 2 3j; c4:1 2 3i)').py()
- )
-
-
-def test_df_select_dtypes_errors(kx, q):
- df = q('([] c1:`a`b`c; c2:1 2 3h; c3:1 2 3j; c4:1 2 3i)')
- with pytest.raises(ValueError, match=r"Expecting either include or"
- " exclude param to be passed"):
- raise df.select_dtypes()
- with pytest.raises(ValueError, match=r"Include and Exclude lists"
- " have overlapping elements"):
- df.select_dtypes(include='kx.LongVector',
- exclude='kx.LongVector')
-
-
-def test_df_drop(kx, q):
- t = q('([] til 10; 10?10; 10?1f; (10 10)#100?" ")')
-
- # Test dropping rows from table
-
- rez = t.drop(5)
- assert(len(rez)==9)
- assert(5 not in rez['x'])
-
- rez = t.drop([3, 5, 7])
- assert(len(rez)==7)
- assert(all([x not in rez['x'] for x in [3, 5, 7]]))
-
- rez = t.drop(index=5)
- assert(len(rez)==9)
- assert(5 not in rez['x'])
-
- rez = t.drop(index=[3, 5, 7])
- assert(len(rez)==7)
- assert(all([x not in rez['x'] for x in [3, 5, 7]]))
-
- rez = t.drop(-1, errors='ignore')
- assert(q('{x~y}', t, rez).py())
-
- rez = t.drop([-1, 10], errors='ignore')
- assert(q('{x~y}', t, rez).py())
-
- # Test dropping columns from table
-
- rez = t.drop('x1', axis=1)
- assert(len(rez.columns) == 3)
- assert('x1' not in rez.columns)
-
- rez = t.drop(['x1', 'x3'], axis=1)
-
- assert(len(rez.columns) == 2)
- assert(all([x not in rez.columns for x in ['x1', 'x3']]))
-
- rez = t.drop(columns='x1')
- assert(len(rez.columns) == 3)
- assert('x1' not in rez.columns)
-
- rez = t.drop(columns=['x1', 'x3'])
- assert(len(rez.columns) == 2)
- assert(all([x not in rez.columns for x in ['x1', 'x3']]))
-
- rez = t.drop('x72', axis=1, errors='ignore')
- assert(q('{x~y}', t, rez).py())
-
- rez = t.drop(['x42', 'x72'], axis=1, errors='ignore')
- assert(q('{x~y}', t, rez).py())
-
- # Test dropping rows from keyed table
-
- q('sym:`aaa`bbb`ccc')
- kt = q('([sym,7?sym; til 10] 10?10; 10?1f; (10 10)#100?" ")')
-
- key = q('{value exec from key[x] where sym=`aaa}', kt).py()
- rez = kt.drop([key])
- assert(len(rez)==q('{[a;b;c] count delete from key[a] where (sym=b) and x=c}',
- kt, key[0], key[1]).py())
- assert(key not in q('key', rez))
-
- keys = q('{(2 2)#raze flip value flip 2#select from key[x] where sym=`aaa}', kt).py()
- rez = kt.drop(keys)
- rez2 = q('{c:{(count[x];2)#raze flip value flip key[x]}[x] in y; count delete from x where c}',
- kt, keys).py()
- assert(len(rez)==rez2)
- assert(not any(q('{{(count[x];2)#raze flip value flip key[x]}[x] in y}', rez, keys).py()))
-
- rez = kt.drop(index=[key])
- assert(len(rez)==q('{[a;b;c] count delete from key[a] where (sym=b) and x=c}',
- kt, key[0], key[1]).py())
- assert(key not in q('key', rez))
-
- rez = kt.drop(index=keys)
- rez2 = q('{c:{(count[x];2)#raze flip value flip key[x]}[x] in y; count delete from x where c}',
- kt, keys).py()
- assert(len(rez)==rez2)
- assert(not any(q('{{(count[x];2)#raze flip value flip key[x]}[x] in y}', rez, keys).py()))
-
- key = 'aaa'
- rez = kt.drop(key, level=0)
- rez2 = q('{c:y=key[x]`sym; delete from x where c}', kt, key)
- assert(len(rez)==len(rez2))
- assert(not q('{y in key[x]`sym}', rez, key).py())
-
- keys = ['aaa', 'bbb']
- rez = kt.drop(keys, level=0)
- rez2 = q('{c:(key[x]`sym) in y; delete from x where c}', kt, keys)
- assert(len(rez)==len(rez2))
- assert(not any(q('{(key[x]`sym) in y}', rez, keys).py()))
-
- keys = [0, 1, 2, 3, 4]
- rez = kt.drop(keys, level=1)
- rez2 = q('{c:(key[x]`x) in y; delete from x where c}', kt, keys)
- assert(len(rez)==len(rez2))
- assert(not any(q('{(key[x]`x) in y}', rez, keys).py()))
-
- rez = kt.drop([('a', -1), ('zzz', 99)], errors='ignore')
- assert(q('{x~y}', kt, rez).py())
-
- rez = kt.drop('zzz', level=0, errors='ignore')
- assert(q('{x~y}', kt, rez).py())
-
- rez = kt.drop(['a', 'zzz'], level=0, errors='ignore')
- assert(q('{x~y}', kt, rez).py())
-
- # Test dropping columns from keyed table
-
- rez = kt.drop('x1', axis=1)
- assert(len(rez.columns) == 2)
- assert('x1' not in rez.columns)
-
- rez = kt.drop(['x', 'x2'], axis=1)
- assert(len(rez.columns) == 1)
- assert(all([x not in rez.columns for x in ['x', 'x2']]))
-
- rez = kt.drop(columns='x1')
- assert(len(rez.columns) == 2)
- assert('x1' not in rez.columns)
-
- rez = kt.drop(columns=['x', 'x2'])
- assert(len(rez.columns) == 1)
- assert(all([x not in rez.columns for x in ['x', 'x2']]))
-
- rez = kt.drop('x72', axis=1, errors='ignore')
- assert(q('{x~y}', kt, rez).py())
-
- rez = kt.drop(['x42', 'x72'], axis=1, errors='ignore')
- assert(q('{x~y}', kt, rez).py())
-
- # Test error cases
-
- with pytest.raises(ValueError):
- t.drop()
-
- with pytest.raises(ValueError):
- t.drop(4, index=5, columns='x1')
-
- with pytest.raises(kx.QError):
- t.drop('x1')
-
- with pytest.raises(kx.QError):
- t.drop(2, axis=1)
-
- with pytest.raises(ValueError):
- t.drop(0, axis=1, level=0)
-
- with pytest.raises(kx.QError) as e:
- t.drop(-1)
- assert(str(e.value) == '-1 not found.')
-
- with pytest.raises(kx.QError) as e:
- t.drop([-1, 10])
- assert(str(e.value) == '-1, 10 not found.')
-
- with pytest.raises(kx.QError) as e:
- kt.drop([('a', -1), ('zzz', 99)])
- assert(str(e.value) == '(a, -1), (zzz, 99) not found.')
-
- with pytest.raises(kx.QError) as e:
- kt.drop('zzz', level=0)
- assert(str(e.value) == 'zzz not found.')
-
- with pytest.raises(kx.QError) as e:
- kt.drop(['a', 'zzz'], level=0)
- assert(str(e.value) == 'a, zzz not found.')
-
- with pytest.raises(kx.QError) as e:
- t.drop('x42', axis=1)
- assert(str(e.value) == 'x42 not found.')
-
- with pytest.raises(kx.QError) as e:
- t.drop(['x42', 'x72'], axis=1)
- assert(str(e.value) == 'x42, x72 not found.')
-
- with pytest.raises(kx.QError) as e:
- kt.drop('x42', axis=1)
- assert(str(e.value) == 'x42 not found.')
-
- with pytest.raises(kx.QError) as e:
- kt.drop(['x42', 'x72'], axis=1)
- assert(str(e.value) == 'x42, x72 not found.')
-
-
-def test_df_drop_duplicates(kx, q):
- N = 100
- q['N'] = N
- q('sym:`aaa`bbb`ccc')
- t = q('([] N?sym; N?3)')
-
- rez = t.drop_duplicates()
- rez2 = t.pd().drop_duplicates().reset_index(drop=True)
- assert(q('{x~y}', rez, rez2))
-
- with pytest.raises(ValueError):
- t.drop_duplicates(subset=['x', 'x1'])
-
- with pytest.raises(ValueError):
- t.drop_duplicates(keep='last')
-
- with pytest.raises(ValueError):
- t.drop_duplicates(inplace=True)
-
- with pytest.raises(ValueError):
- t.drop_duplicates(ignore_index=True)
-
-
-def test_df_rename(kx, q):
- q('sym:`aaa`bbb`ccc')
- t = q('([] 10?sym; til 10; 10?10; 10?1f)')
-
- cols = {'sym': 'Symbol', 'x2': 'xsquare'}
- rez = t.rename(cols, axis=1)
- assert(q('{x~y}', rez, t.pd().rename(cols, axis=1)))
-
- cols = {'sym': 'Symbol', 'x2': 'xsquare'}
- rez = t.rename(columns=cols)
- assert(q('{x~y}', rez, t.pd().rename(columns=cols)))
-
- kt = kx.q('([idx:til 10] til 10; 10?10; 10?1f; (10;10)#100?" ")')
-
- idx = {0: 'foo', 5: 'bar'}
- rez = kt.rename(idx)
- # assert(q('{x~y}', rez, kt.pd().rename(idx))) # {x~y}=1b because of some q attribute
- assert(all(rez.pd().eq(kt.pd().rename(idx))))
-
- idx = {0: 'foo', 5: 'bar'}
- rez = kt.rename(index=idx)
- # assert(q('{x~y}', rez, kt.pd().rename(index=idx))) # {x~y}=1b because of some q attribute
- assert(all(rez.pd().eq(kt.pd().rename(index=idx))))
-
- with pytest.raises(ValueError):
- t.rename()
-
- with pytest.raises(ValueError):
- t.rename(index={5: 'foo'}, axis=1)
-
- with pytest.raises(ValueError):
- t.rename(columns={'x': 'xXx'}, level=0)
-
- with pytest.raises(ValueError):
- t.rename(columns={'x': 'xXx'}, copy=False)
-
- with pytest.raises(ValueError):
- t.rename(columns={'x': 'xXx'}, inplace=True)
-
- with pytest.raises(ValueError):
- t.rename({5: 'foo'}, level=0)
-
- with pytest.raises(ValueError):
- t.rename(columns={'x': 'xXx'}, errors='raise')
-
-
-@pytest.mark.pandas_api
-@pytest.mark.xfail(reason='Flaky randomization')
-def test_df_sample(kx, q):
- q('sym:`aaa`bbb`ccc')
- t = q('([] 10?sym; til 10; 10?10; 10?1f)')
- df = t.pd()
- kt = q('([idx:til 10] til 10; 10?10; 10?1f; (10;10)#100?" ")')
- df2 = kt.pd()
-
- rez = t.sample()
- assert(type(rez) is kx.Table)
- assert(len(rez) == 1)
- check = df.iloc[rez['x'].py()].reset_index(drop=True)
- assert(q('{x~y}', rez, check))
-
- rez = t.sample(5)
- assert(type(rez) is kx.Table)
- assert(len(rez) == 5)
- check = df.iloc[rez['x'].py()].reset_index(drop=True)
- assert(q('{x~y}', rez, check))
-
- rez = t.sample(10)
- assert(type(rez) is kx.Table)
- assert(len(rez) == 10)
- check = df.iloc[rez['x'].py()].reset_index(drop=True)
- assert(q('{x~y}', rez, check))
- assert(q('{x~y}', rez['x'].pd().unique(), check['x'].unique()))
-
- rez = t.sample(100, replace=True)
- assert(type(rez) is kx.Table)
- assert(len(rez) == 100)
- check = df.iloc[rez['x'].py()].reset_index(drop=True)
- assert(q('{x~y}', rez, check))
-
- rez = t.sample(frac=0.5, replace=True)
- assert(type(rez) is kx.Table)
- assert(len(rez) == 5)
- check = df.iloc[rez['x'].py()].reset_index(drop=True)
- assert(q('{x~y}', rez, check))
-
- rez = kt.sample()
- assert(type(rez) is kx.KeyedTable)
- assert(len(rez) == 1)
- check = df2.iloc[rez['x'].py()]
- assert(q('{x~y}', rez, check))
-
- rez = kt.sample(5)
- assert(type(rez) is kx.KeyedTable)
- assert(len(rez) == 5)
- check = df2.iloc[rez['x'].py()]
- assert(q('{x~y}', rez, check))
-
- rez = kt.sample(10)
- assert(type(rez) is kx.KeyedTable)
- assert(len(rez) == 10)
- check = df2.iloc[rez['x'].py()]
- assert(q('{x~y}', rez, check))
- assert(q('{x~y}', rez['x'].pd().unique(), check['x'].unique()))
-
- rez = kt.sample(100, replace=True)
- assert(type(rez) is kx.KeyedTable)
- assert(len(rez) == 100)
- check = df2.iloc[rez['x'].py()]
- assert(q('{x~y}', rez, check))
-
- rez = kt.sample(frac=0.5, replace=True)
- assert(type(rez) is kx.KeyedTable)
- assert(len(rez) == 5)
- check = df2.iloc[rez['x'].py()]
- assert(q('{x~y}', rez, check))
-
- with pytest.raises(ValueError):
- t.sample(100)
-
- with pytest.raises(ValueError):
- t.sample(weights=np.ones(10))
-
- with pytest.raises(ValueError):
- t.sample(random_state=42)
-
- with pytest.raises(ValueError):
- t.sample(axis=1)
-
- with pytest.raises(ValueError):
- t.sample(ignore_index=True)
-
-
-def test_mean(kx, q):
- df = pd.DataFrame(
- {
- 'a': [1, 2, 2, 4],
- 'b': [1, 2, 6, 7],
- 'c': [7, 8, 9, 10],
- 'd': [7, 11, 14, 14]
- }
- )
- tab = kx.toq(df)
- p_m = df.mean()
- q_m = tab.mean()
- for c in q.key(q_m).py():
- assert p_m[c] == q_m[c].py()
- p_m = df.mean(axis=1)
- q_m = tab.mean(axis=1)
- for c in range(len(q.cols(tab))):
- assert p_m[c] == q_m[q('{`$string x}', c)].py()
-
- q['tab'] = kx.toq(df)
- tab = q('1!`idx xcols update idx: til count tab from tab')
- p_m = df.mean()
- q_m = tab.mean()
- for c in q.key(q_m).py():
- assert p_m[c] == q_m[c].py()
- p_m = df.mean(axis=1)
- q_m = tab.mean(axis=1)
- for c in range(len(q.cols(tab)) - 1):
- assert p_m[c] == q_m[q('{`$string x}', c)].py()
-
- df = pd.DataFrame(
- {
- 'a': [1, 2, 2, 4],
- 'b': [1, 2, 6, 7],
- 'c': [7, 8, 9, 10],
- 'd': ['foo', 'bar', 'baz', 'qux']
- }
- )
- tab = kx.toq(df)
- p_m = df.mean(numeric_only=True)
- q_m = tab.mean(numeric_only=True)
- for c in q.key(q_m).py():
- assert p_m[c] == q_m[c].py()
- p_m = df.mean(axis=1, numeric_only=True)
- q_m = tab.mean(axis=1, numeric_only=True)
- for c in range(len(q.cols(tab))):
- assert p_m[c] == q_m[q('{`$string x}', c)].py()
-
- with pytest.raises(kx.QError):
- q_m = tab.mean()
- with pytest.raises(kx.QError):
- q_m = tab.mean(axis=1)
-
-
-def test_median(kx, q):
- df = pd.DataFrame(
- {
- 'a': [1, 2, 2, 4],
- 'b': [1, 2, 6, 7],
- 'c': [7, 8, 9, 10],
- 'd': [7, 11, 14, 14]
- }
- )
- tab = kx.toq(df)
- p_m = df.median()
- q_m = tab.median()
- for c in q.key(q_m).py():
- assert p_m[c] == q_m[c].py()
- p_m = df.median(axis=1)
- q_m = tab.median(axis=1)
- for c in range(len(q.cols(tab))):
- assert p_m[c] == q_m[q('{`$string x}', c)].py()
-
- q['tab'] = kx.toq(df)
- tab = q('1!`idx xcols update idx: til count tab from tab')
- p_m = df.median()
- q_m = tab.median()
- for c in q.key(q_m).py():
- assert p_m[c] == q_m[c].py()
- p_m = df.median(axis=1)
- q_m = tab.median(axis=1)
- for c in range(len(q.cols(tab)) - 1):
- assert p_m[c] == q_m[q('{`$string x}', c)].py()
-
- df = pd.DataFrame(
- {
- 'a': [1, 2, 2, 4],
- 'b': [1, 2, 6, 7],
- 'c': [7, 8, 9, 10],
- 'd': ['foo', 'bar', 'baz', 'qux']
- }
- )
- tab = kx.toq(df)
- p_m = df.median(numeric_only=True)
- q_m = tab.median(numeric_only=True)
- for c in q.key(q_m).py():
- assert p_m[c] == q_m[c].py()
- p_m = df.median(axis=1, numeric_only=True)
- q_m = tab.median(axis=1, numeric_only=True)
- for c in range(len(q.cols(tab))):
- assert p_m[c] == q_m[q('{`$string x}', c)].py()
-
- with pytest.raises(kx.QError):
- q_m = tab.median()
- with pytest.raises(kx.QError):
- q_m = tab.median(axis=1)
-
-
-def test_mode(kx, q): # noqa
- if sys.version_info.minor > 7:
- def compare_q_to_pd(tab, df):
- if 'idx' in q.cols(tab):
- tab.pop('idx')
- tab = tab.pd()
- for i in range(len(tab)):
- for c in tab.columns:
- df_c = c
- try:
- df_c = int(c)
- except BaseException:
- pass
- if str(tab.at[i, c]) == '--':
- tab.at[i, c] = np.NaN
- if str(tab.at[i, c]) == '':
- tab.at[i, c] = 'nan'
- if str(tab.at[i, c]) == 'nan' and str(df.at[i, df_c]) == 'nan':
- continue
- if tab.at[i, c] != df.at[i, df_c]:
- return False
- return True
-
- df = pd.DataFrame(
- {
- 'a': [1, 2, 2, 4],
- 'b': [1, 2, 6, 7],
- 'c': [7, 8, 9, 10],
- 'd': [7, 11, 14, 14]
- }
- )
- tab = kx.toq(df)
- p_m = df.mode()
- q_m = tab.mode()
- assert compare_q_to_pd(q_m, p_m)
-
- p_m = df.mode(axis=1)
- q_m = tab.mode(axis=1)
- assert compare_q_to_pd(q_m, p_m)
-
- q['tab'] = kx.toq(df)
- tab = q('1!`idx xcols update idx: til count tab from tab')
-
- p_m = df.mode()
- q_m = tab.mode()
- assert compare_q_to_pd(q_m, p_m)
-
- p_m = df.mode(axis=1)
- q_m = tab.mode(axis=1)
- assert compare_q_to_pd(q_m, p_m)
-
- df = pd.DataFrame(
- {
- 'a': [1, 2, 2, 4],
- 'b': [1, 2, 6, 7],
- 'c': [7, 8, 9, 10],
- 'd': ['foo', 'bar', 'baz', 'foo']
- }
- )
- tab = kx.toq(df)
- p_m = df.mode()
- q_m = tab.mode()
- assert compare_q_to_pd(q_m, p_m)
-
- p_m = df.mode(axis=1, numeric_only=True)
- q_m = tab.mode(axis=1, numeric_only=True)
- assert compare_q_to_pd(q_m, p_m)
-
- df = pd.DataFrame({
- 'x': [0, 1, 2, 3, 4, 5, 6, 7, np.NaN, np.NaN],
- 'y': [10, 11, 12, 13, 14, 15, 16, 17, 18, np.NaN],
- 'z': ['a', 'b', 'c', 'd', 'd', 'e', 'e', 'f', 'g', 'h']
- })
- tab = kx.toq(df)
-
- p_m = df.mode()
- q_m = tab.mode()
- assert compare_q_to_pd(q_m, p_m)
-
- p_m = df.mode(axis=1, numeric_only=True)
- q_m = tab.mode(axis=1, numeric_only=True)
- assert compare_q_to_pd(q_m, p_m)
-
- p_m = df.mode(numeric_only=True)
- q_m = tab.mode(numeric_only=True)
- assert compare_q_to_pd(q_m, p_m)
-
- p_m = df.mode(axis=1, numeric_only=True)
- q_m = tab.mode(axis=1, numeric_only=True)
- assert compare_q_to_pd(q_m, p_m)
-
- p_m = df.mode(dropna=False)
- q_m = tab.mode(dropna=False)
- assert compare_q_to_pd(q_m, p_m)
-
- p_m = df.mode(axis=1, dropna=False, numeric_only=True)
- q_m = tab.mode(axis=1, dropna=False, numeric_only=True)
- assert compare_q_to_pd(q_m, p_m)
-
-
-def test_table_merge_asof(kx, q):
- left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]})
- right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]})
- qleft = kx.toq(left)
- qright = kx.toq(right)
-
- assert (pd.merge_asof(left, right, on='a')
- == kx.merge_asof(qleft, qright, on='a').pd()).all().all()
- assert (pd.merge_asof(left, right, on='a')
- == qleft.merge_asof(qright, on='a').pd()).all().all()
- assert (pd.merge_asof(left, right, on='a')
- == q('0!', q('1!', qleft).merge_asof(qright, on='a')).pd()).all().all()
- assert (pd.merge_asof(left, right, on='a')
- == qleft.merge_asof(q('1!', qright), on='a').pd()).all().all()
- assert (pd.merge_asof(left, right, on='a')
- == q('0!', q('1!', qleft).merge_asof(q('1!', qright), on='a')).pd()).all().all()
- left = pd.DataFrame({
- "time": [
- pd.Timestamp("2016-05-25 13:30:00.023"),
- pd.Timestamp("2016-05-25 13:30:00.023"),
- pd.Timestamp("2016-05-25 13:30:00.030"),
- pd.Timestamp("2016-05-25 13:30:00.041"),
- pd.Timestamp("2016-05-25 13:30:00.048"),
- pd.Timestamp("2016-05-25 13:30:00.049"),
- pd.Timestamp("2016-05-25 13:30:00.072"),
- pd.Timestamp("2016-05-25 13:30:00.075")
- ],
- "ticker": [
- "GOOG",
- "MSFT",
- "MSFT",
- "MSFT",
- "GOOG",
- "AAPL",
- "GOOG",
- "MSFT"
- ],
- "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],
- "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03]
- })
- right = pd.DataFrame({
- "time": [
- pd.Timestamp("2016-05-25 13:30:00.023"),
- pd.Timestamp("2016-05-25 13:30:00.038"),
- pd.Timestamp("2016-05-25 13:30:00.048"),
- pd.Timestamp("2016-05-25 13:30:00.048"),
- pd.Timestamp("2016-05-25 13:30:00.048")
- ],
- "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
- "price": [51.95, 51.95, 720.77, 720.92, 98.0],
- "quantity": [75, 155, 100, 100, 100]
- })
-
- qleft = kx.toq(left)
- qright = kx.toq(right)
-
- assert (pd.merge_asof(left, right, on='time')
- == kx.merge_asof(qleft, qright, on='time').pd()).all().all()
- assert (pd.merge_asof(left, right, on='time')
- == qleft.merge_asof(qright, on='time').pd()).all().all()
- assert (pd.merge_asof(left, right, on='time')
- == q('0!', q('1!', qleft).merge_asof(qright, on='time')).pd()).all().all()
- assert (pd.merge_asof(left, right, on='time')
- == qleft.merge_asof(q('1!', qright), on='time').pd()).all().all()
- assert (pd.merge_asof(left, right, on='time')
- == q('0!', q('1!', qleft).merge_asof(q('1!', qright), on='time')).pd()).all().all()
-
-
-def test_pandas_abs(kx, q):
- tab = q('([] sym: 100?`foo`bar`baz`qux; price: 250.0f - 100?500.0f; ints: 100 - 100?200)')
- ntab = tab[['price', 'ints']]
-
- assert ntab.abs().py() == tab.abs(numeric_only=True).py()
-
- with pytest.raises(kx.QError):
- tab.abs()
-
-
-def test_pandas_min(q):
- tab = q('([] sym: 100?`foo`bar`baz`qux; price: 250.0f - 100?500.0f; ints: 100 - 100?200)')
- df = tab.pd()
-
- qmin = tab.min().py()
- pmin = df.min()
-
- assert str(pmin['sym']) == qmin['sym']
- assert float(pmin['price']) == qmin['price']
- assert float(pmin['ints']) == qmin['ints']
-
- qmin = tab.min(axis=1, numeric_only=True, skipna=True).py()
- pmin = df.min(axis=1, numeric_only=True, skipna=True)
-
- for i in range(100):
- assert float(qmin[i]) == float(pmin[i])
-
-
-def test_pandas_max(q):
- tab = q('([] sym: 100?`foo`bar`baz`qux; price: 250.0f - 100?500.0f; ints: 100 - 100?200)')
- df = tab.pd()
-
- qmax = tab.max().py()
- pmax = df.max()
-
- assert str(pmax['sym']) == qmax['sym']
- assert float(pmax['price']) == qmax['price']
- assert float(pmax['ints']) == qmax['ints']
-
- qmax = tab.max(axis=1, numeric_only=True, skipna=True).py()
- pmax = df.max(axis=1, numeric_only=True, skipna=True)
-
- for i in range(100):
- assert float(qmax[i]) == float(pmax[i])
-
-
-def test_pandas_all(q):
- tab = q(
- '([] sym: 100?`foo`bar`baz`qux; price: 250.0f - 100?500.0f; ints: 100 - 100?200;'
- ' bools: 100?0b)'
- )
- df = tab.pd()
-
- qall = tab.all().py()
- pall = df.all()
- assert qall['sym'] == pall['sym']
- assert qall['ints'] == pall['ints']
- assert qall['price'] == pall['price']
- assert qall['bools'] == pall['bools']
-
- qall = tab.all(bool_only=True).py()
- pall = df.all(bool_only=True)
- assert qall['bools'] == pall['bools']
-
- qall = tab.all(axis=1).py()
- pall = df.all(axis=1)
- for i in range(100):
- assert qall[i] == pall[i]
-
-
-def test_pandas_any(q):
- tab = q(
- '([] sym: 100?`foo`bar`baz`qux; price: 250.0f - 100?500.0f; ints: 100 - 100?200;'
- ' bools: 100?0b)'
- )
- df = tab.pd()
-
- qany = tab.any().py()
- pany = df.any()
- assert qany['sym'] == pany['sym']
- assert qany['ints'] == pany['ints']
- assert qany['price'] == pany['price']
- assert qany['bools'] == pany['bools']
-
- qany = tab.any(bool_only=True).py()
- pany = df.any(bool_only=True)
- assert qany['bools'] == pany['bools']
-
- qany = tab.any(axis=1).py()
- pany = df.any(axis=1)
- for i in range(100):
- assert qany[i] == pany[i]
-
-
-def test_pandas_prod(q):
- tab = q('([] sym: 10?`a`b`c; price: 12.25f - 10?25.0f; ints: 10 - 10?20)')
- df = tab.pd()
-
- qprod = tab.prod(numeric_only=True).py()
- pprod = df.prod(numeric_only=True)
- assert float(qprod['price']) == float(pprod['price'])
- assert float(qprod['ints']) == float(pprod['ints'])
-
- qprod = tab.prod(numeric_only=True, skipna=True, axis=1).py()
- pprod = df.prod(numeric_only=True, skipna=True, axis=1)
- for i in range(10):
- assert float(qprod[i]) == float(pprod[i])
-
- qprod = tab.prod(numeric_only=True, skipna=True, axis=1, min_count=5).py()
- pprod = df.prod(numeric_only=True, skipna=True, axis=1, min_count=5)
- for i in range(10):
- assert qprod[i] == q('0N')
- assert str(pprod[i]) == 'nan'
-
-
-def test_pandas_sum(q):
- tab = q('([] sym: 100?`foo`bar`baz`qux; price: 250.0f - 100?500.0f; ints: 100 - 100?200)')
- df = tab.pd()
-
- qsum = tab.sum().py()
- psum = df.sum()
- assert float(qsum['price']) == float(psum['price'])
- assert float(qsum['ints']) == float(psum['ints'])
- assert str(qsum['sym']) == str(psum['sym'])
-
- qsum = tab.sum(numeric_only=True, skipna=True, axis=1).py()
- psum = df.sum(numeric_only=True, skipna=True, axis=1)
- for i in range(10):
- assert float(qsum[i]) == float(psum[i])
-
- qsum = tab.sum(numeric_only=True, skipna=True, axis=1, min_count=5).py()
- psum = df.sum(numeric_only=True, skipna=True, axis=1, min_count=5)
- for i in range(10):
- assert qsum[i] == q('0N')
- assert str(psum[i]) == 'nan'
-
-
-def test_pandas_groupby_errors(kx, q):
- tab = q('([] sym: 100?`foo`bar`baz`qux; price: 250.0f - 100?500.0f; ints: 100 - 100?200)')
-
- with pytest.raises(RuntimeError):
- tab.groupby(by='sym', level=[1])
-
- with pytest.raises(NotImplementedError):
- tab.groupby(by=lambda x: x)
- with pytest.raises(NotImplementedError):
- tab.groupby(by='sym', observed=True)
- with pytest.raises(NotImplementedError):
- tab.groupby(by='sym', group_keys=False)
- with pytest.raises(NotImplementedError):
- tab.groupby(by='sym', axis=1)
-
- arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot', 'Parrot'],
- ['Captive', 'Wild', 'Captive', 'Wild', 'Wild']]
- index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
- df = pd.DataFrame({'Max Speed': [390., 350., 30., 20., 25.]},
- index=index)
- tab = kx.toq(df)
-
- with pytest.raises(KeyError):
- tab.groupby(level=[0, 4])
-
-
-def test_pandas_groupby(kx, q):
- df = pd.DataFrame(
- {
- 'Animal': ['Falcon', 'Falcon', 'Parrot', 'Parrot'],
- 'Max Speed': [380., 370., 24., 26.],
- 'Max Altitude': [570., 555., 275., 300.]
- }
- )
-
- tab = kx.toq(df)
-
- assert all(
- df.groupby(['Animal']).mean() == tab.groupby(kx.SymbolVector(['Animal'])).mean().pd()
- )
- assert df.groupby(['Animal']).ndim == tab.groupby(kx.SymbolVector(['Animal'])).ndim
- assert all(
- df.groupby(['Animal'], as_index=False).mean()
- == tab.groupby(kx.SymbolVector(['Animal']), as_index=False).mean().pd()
- )
- assert all(
- df.groupby(['Animal']).tail(1).reset_index(drop=True)
- == tab.groupby(kx.SymbolVector(['Animal'])).tail(1).pd()
- )
- assert all(
- df.groupby(['Animal']).tail(2)
- == tab.groupby(kx.SymbolVector(['Animal'])).tail(2).pd()
- )
-
- df = pd.DataFrame(
- [
- ["a", 12, 12],
- [None, 12.3, 33.],
- ["b", 12.3, 123],
- ["a", 1, 1]
- ],
- columns=["a", "b", "c"]
- )
- tab = kx.toq(df)
-
- # NaN in column is filled when converted to q this unfills it and re-sorts it
- assert q(
- '{[x; y] x:update a:` from x where i=2; x: `a xasc x; x~y}',
- df.groupby('a', dropna=False).sum(),
- tab.groupby('a', dropna=False).sum()
- )
- assert q(
- '{[x; y] x:update a:` from x where i=1; x~y}',
- df.groupby('a', dropna=False, sort=False).sum(),
- tab.groupby('a', dropna=False, sort=False).sum()
- )
- assert all(
- df.groupby('a', dropna=False, as_index=False).sum()
- == tab.groupby('a', dropna=False, as_index=False).sum().pd()
- )
-
- arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot', 'Parrot'],
- ['Captive', 'Wild', 'Captive', 'Wild', 'Wild']]
- index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
- df = pd.DataFrame({'Max Speed': [390., 350., 30., 20., 25.]},
- index=index)
- tab = kx.toq(df)
-
- assert all(
- df.groupby(['Animal']).mean()
- == tab.groupby(['Animal']).mean().pd()
- )
- assert all(
- df.groupby(['Animal'], as_index=False).mean()
- == tab.groupby(['Animal'], as_index=False).mean().pd()
- )
-
- assert all(
- df.groupby(level=[1]).mean()
- == tab.groupby(level=[1]).mean().pd()
- )
- assert all(
- df.groupby(level=1, as_index=False).mean()
- == tab.groupby(level=1, as_index=False).mean().pd()
- )
-
- assert all(
- df.groupby(level=[0, 1]).mean()
- == tab.groupby(level=[0, 1]).mean().pd()
- )
- assert all(
- df.groupby(level=[0, 1], as_index=False).mean()
- == tab.groupby(level=[0, 1], as_index=False).mean().pd()
- )
-
-
-def test_keyed_loc_fixes(q):
- mkt = q('([k1:`a`b`a;k2:100+til 3] x:til 3; y:`multi`keyed`table)')
- assert q.keys(mkt['x']).py() == ['k1', 'k2']
- assert q.value(mkt['x']).py() == {'x': [0, 1, 2]}
- assert mkt[['x', 'y']].pd().equals(mkt.pd()[['x', 'y']])
- assert mkt['a', 100].py() == {'x': [0], 'y': ['multi']}
-
- with pytest.raises(KeyError):
- mkt[['k1', 'y']]
- with pytest.raises(KeyError):
- mkt['k1']
+"""Tests for the Pandas API."""
+
+import sys
+
+import numpy as np
+import pandas as pd
+import pytest
+
+
+def check_result_and_type(kx, tab, result):
+ if ((isinstance(tab, kx.Table) or isinstance(tab, kx.KeyedTable))
+ and tab.py() == result.py() if isinstance(result, kx.K) else result):
+ return True
+ return False
+
+
+def test_api_meta_error(kx):
+ with pytest.raises(Exception):
+ kx.PandasAPI()
+
+
+def test_df_columns(q):
+ df = q('([] til 10; 10?10)')
+ assert all(df.columns == df.pd().columns)
+
+
+def test_df_dtypes(q):
+ df = q('([] til 10; 10?0Ng; 10?1f;0f,til 9;10?("abc";"def");10?1e)')
+ assert all(df.dtypes.columns == ['columns', 'type'])
+ assert q('{x~y}',
+ q('("kx.LongAtom";"kx.GUIDAtom";"kx.FloatAtom";"kx.List";"kx.CharVector";"kx.RealAtom")'), # noqa: E501
+ df.dtypes['type'])
+
+
+def test_df_empty(q):
+ df = q('([] til 10; 10?10)')
+ assert df.empty == df.pd().empty
+ df = q('([] `long$(); `long$())')
+ assert df.empty == df.pd().empty
+
+
+def test_df_ndim(q):
+ df = q('([] til 10; 10?10)')
+ assert(df.ndim == df.pd().ndim)
+
+
+def test_df_ndim_multicol(q):
+ df = q('([] til 10; 10?10; 10?1f)')
+ assert(df.ndim == df.pd().ndim)
+
+
+def test_df_shape(q):
+ df = q('([] til 10; 10?10)')
+ assert (df.shape == df.pd().shape)
+
+
+def test_df_size(q):
+ df = q('([] til 10; 10?10)')
+ assert (df.size == df.pd().size)
+
+
+def test_df_head(kx, q):
+ df = q('([] til 10; 10 - til 10)')
+ assert check_result_and_type(kx, df.head(), q('5 # ([] til 10; 10 - til 10)'))
+ assert check_result_and_type(kx, df.head(2), q('2 # ([] til 10; 10 - til 10)'))
+ df = q('([til 10] 10 - til 10)')
+ assert check_result_and_type(kx, df.head(), q('5 # ([til 10] 10 - til 10)'))
+ assert check_result_and_type(kx, df.head(2), q('2 # ([til 10] 10 - til 10)'))
+
+
+def test_df_tail(kx, q):
+ df = q('([] til 10; 10 - til 10)')
+ assert check_result_and_type(kx, df.tail(), q('5 _ ([] til 10; 10 - til 10)'))
+ assert check_result_and_type(kx, df.tail(2), q('8 _ ([] til 10; 10 - til 10)'))
+ df = q('([til 10] 10 - til 10)')
+ assert check_result_and_type(kx, df.tail(), q('5 _ ([til 10] 10 - til 10)'))
+ assert check_result_and_type(kx, df.tail(2), q('8 _ ([til 10] 10 - til 10)'))
+
+
+def test_df_pop(kx, q):
+ df = q('([] x: til 10; y: 10 - til 10; z: 10?`a`b`c`d)')
+ assert check_result_and_type(kx, df.pop('x'), {'x': [x for x in range(10)]})
+ assert check_result_and_type(kx, df.pop('y'), {'y': [10 - x for x in range(10)]})
+ df = q('([] x: til 10; y: 10 - til 10; z: 10?`a`b`c`d)')
+ df.pop('z')
+ assert check_result_and_type(
+ kx,
+ df.head(),
+ {
+ 'x': [x for x in range(5)],
+ 'y': [10 - x for x in range(5)]
+ }
+ )
+ df = q('([] x: til 10; y: 10 - til 10; z: 10?`a`b`c`d)')
+ df.pop(['y', 'z'])
+ assert check_result_and_type(kx, df, {'x': [x for x in range(10)]})
+
+
+def test_df_get(kx, q):
+ df = q('([] x: til 10; y: 10 - til 10; z: 10?`a`b`c)')
+ assert check_result_and_type(kx, df.get('x'), {'x': [x for x in range(10)]})
+ assert check_result_and_type(kx, df.get(kx.SymbolAtom('y')), {'y': [10 - x for x in range(10)]})
+ assert check_result_and_type(kx, df.get(['x', 'y']), {
+ 'x': [x for x in range(10)],
+ 'y': [10 - x for x in range(10)]
+ })
+ assert df.get(['y', 'z']).py() == df[['y', 'z']].py()
+ assert df.get(['x', 'y']).py() == df[['x', 'y']].py()
+ assert df.get('r') is None
+ assert df.get('r', default=5) == 5
+ assert df.get(['x', 'r']) is None
+ assert df.get(['x', 'r'], default=5) == 5
+
+
+def test_df_get_keyed(kx, q):
+ df = q('([x: til 10] y: 10 - til 10; z: 10?`a`b`c)')
+ assert check_result_and_type(kx, df.get('x'), {'x': [x for x in range(10)]})
+ assert check_result_and_type(kx, df.get(kx.SymbolAtom('y')), {'y': [10 - x for x in range(10)]})
+ assert check_result_and_type(kx, df.get(['x', 'y']), {
+ 'x': [x for x in range(10)],
+ 'y': [10 - x for x in range(10)]
+ })
+ assert df.get(['y', 'z']).py() == q.value(df[['y', 'z']]).py()
+ assert df.get('r') is None
+ assert df.get('r', default=5) == 5
+ assert df.get(['x', 'r']) is None
+ assert df.get(['x', 'r'], default=5) == 5
+
+
+def test_df_at(q):
+ df = q('([] x: til 10; y: 10 - til 10; z: 10?`a`b`c)')
+ for i in range(10):
+ assert df.at[i, 'y'].py() == 10 - i
+ df.at[i, 'y'] = 2
+ assert df.at[i, 'y'].py() == 2
+ assert not df.replace_self
+ with pytest.raises(ValueError):
+ df.at[0]
+ with pytest.raises(ValueError):
+ df.at[0] = 5
+
+
+def test_df_at_keyed(kx, q):
+ df = q('([x: til 10] y: 10 - til 10; z: 10?`a`b`c)')
+ for i in range(10):
+ assert df.at[i, 'y'].py() == 10 - i
+ df.at[i, 'y'] = 2
+ assert df.at[i, 'y'].py() == 2
+ assert not df.replace_self
+ with pytest.raises(ValueError):
+ df.at[0]
+ with pytest.raises(ValueError):
+ df.at[0] = 5
+ with pytest.raises(kx.QError):
+ df.at[0, 'x']
+ with pytest.raises(kx.QError):
+ df.at[0, 'x'] = 5
+
+
+def test_df_replace_self(q):
+ df = q('([x: 0, til 10] y: 0, 10 - til 10; z: 11?`a`b`c)')
+ df.replace_self = True
+ df.tail(10)
+ for i in range(10):
+ assert df.at[i, 'y'].py() == 10 - i
+ df.at[i, 'y'] = 2
+ assert df.at[i, 'y'].py() == 2
+ assert df.replace_self
+
+
+def test_df_loc(kx, q):
+ df = q('([] x: til 10; y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
+ assert check_result_and_type(kx, df.loc[0], {'y': 10, 'z': 'a'})
+ assert check_result_and_type(kx, df.loc[[1]], {'y': 9, 'z': 'a'})
+ assert check_result_and_type(kx, df.loc[[0, 1]], {'y': [10, 9], 'z': ['a', 'a']})
+ assert check_result_and_type(kx, df.loc[0, :], {'y': [10, 9], 'z': ['a', 'a']})
+
+
+def test_df_loc_keyed(kx, q):
+ df = q('([x: til 10] y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
+ assert check_result_and_type(kx, df.loc[0], {'y': 10, 'z': 'a'})
+ assert check_result_and_type(kx, df.loc[[1]], {'y': 9, 'z': 'a'})
+ assert check_result_and_type(kx, df.loc[[0, 1]], {'y': [10, 9], 'z': ['a', 'a']})
+ assert check_result_and_type(kx, df.loc[df['y'] < 100], df.py())
+
+
+def test_df_loc_cols(kx, q):
+ df = q('([x: til 10] y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
+ assert check_result_and_type(kx, df.loc[[0, 1], 'z':], {'z': ['a', 'a']})
+ assert check_result_and_type(kx, df[[0, 1], :'y'], {'y': [10, 9]})
+ assert check_result_and_type(kx, df[[0, 1], 'y':'y'], {'y': [10, 9]})
+ assert check_result_and_type(kx, df[[0, 1], :2], {'y': [10, 9]})
+
+
+def test_df_getitem(kx, q):
+ df = q('([x: til 10] y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
+ assert check_result_and_type(kx, df[0], {'y': 10, 'z': 'a'})
+ assert check_result_and_type(kx, df[[1]], {'y': 9, 'z': 'a'})
+ assert check_result_and_type(kx, df[[0, 1]], {'y': [10, 9], 'z': ['a', 'a']})
+ assert check_result_and_type(kx, df[:], df.py())
+ assert check_result_and_type(kx, df[:, ['x', 'y']], q('([x: til 10] y: 10 - til 10)').py())
+ df = q('([] x: til 10; y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
+ assert check_result_and_type(
+ kx,
+ df[df['z'] == 'a'],
+ {
+ 'x': [0, 1],
+ 'y': [10, 9],
+ 'z': ['a', 'a']
+ }
+ )
+
+
+def test_df_loc_set(kx, q):
+ df = q('([x: til 10] y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
+ df.loc[df.loc['z'] == 'a', 'y'] = 99
+ assert check_result_and_type(
+ kx,
+ df,
+ q('([x: til 10] y: (99 99),8 - til 8; z: `a`a`b`b`c`c`d`d`e`e)').py()
+ )
+ df = q('([] x: til 10; y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
+ df.loc[df['z'] == 'a', 'y'] = 99
+ assert check_result_and_type(
+ kx,
+ df,
+ q('([x: til 10] y: (99 99),8 - til 8; z: `a`a`b`b`c`c`d`d`e`e)').py()
+ )
+ with pytest.raises(ValueError):
+ df.loc[df['z'] == 'a'] = 99
+ with pytest.raises(ValueError):
+ df.loc[df['z'] == 'a', 3] = 99
+ with pytest.raises(ValueError):
+ df.loc[df['z'] == 'a', 'y', 'z'] = 99
+
+
+def test_df_set_cols(kx, q):
+ qtab = q('([]til 10;10?1f;10?100)')
+ df = qtab
+ df['x3'] = 99
+ assert check_result_and_type(
+ kx,
+ df,
+ q('{update x3:99 from x}', qtab).py()
+ )
+ df = qtab
+ df['x'] = q('reverse til 10')
+ assert check_result_and_type(
+ kx,
+ df,
+ q('{update x:reverse til 10 from x}', qtab).py()
+ )
+ df = qtab
+ df['x'] = ['a', 'a', 'b', 'b', 'c', 'c', 'd', 'd', 'e', 'e']
+ assert check_result_and_type(
+ kx,
+ df,
+ q('{update x:`a`a`b`b`c`c`d`d`e`e from x}', qtab).py()
+ )
+ df = qtab
+ df[['x', 'x3']] = [q('reverse til 10'), 99]
+ assert check_result_and_type(
+ kx,
+ df,
+ q('{update x:reverse til 10, x3:99 from x}', qtab).py()
+ )
+ df = qtab
+ df[['x', 'x3']] = [q('reverse til 10'), ['a', 'a', 'b', 'b', 'c', 'c', 'd', 'd', 'e', 'e']]
+ assert check_result_and_type(
+ kx,
+ df,
+ q('{update x:reverse til 10, x3:`a`a`b`b`c`c`d`d`e`e from x}', qtab).py()
+ )
+
+
+def test_df_iloc_set(kx, q):
+ df = q('([x: til 10] y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
+ df.iloc[df.loc['z'] == 'a', 'y'] = 99
+ assert check_result_and_type(
+ kx,
+ df,
+ q('([x: til 10] y: (99 99),8 - til 8; z: `a`a`b`b`c`c`d`d`e`e)').py()
+ )
+ df = q('([] x: til 10; y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
+ df.iloc[df['z'] == 'a', 'y'] = 99
+ assert check_result_and_type(
+ kx,
+ df,
+ q('([x: til 10] y: (99 99),8 - til 8; z: `a`a`b`b`c`c`d`d`e`e)').py()
+ )
+ with pytest.raises(ValueError):
+ df.iloc[df['z'] == 'a'] = 99
+ with pytest.raises(ValueError):
+ df.iloc[df['z'] == 'a', 3] = 99
+ with pytest.raises(ValueError):
+ df.iloc[df['z'] == 'a', 'y', 'z'] = 99
+
+
+def test_df_iloc(kx, q):
+ df = q('([x: til 10] y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
+ assert check_result_and_type(kx, df.iloc[:], df.py())
+ assert check_result_and_type(kx, df.iloc[:, :-1], q('([x: til 10] y: 10 - til 10)').py())
+ assert check_result_and_type(kx, df.iloc[df['y'] < 100], df.py())
+ df = q('([] x: til 10; y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
+ assert check_result_and_type(kx, df.iloc[:-2], df.head(8).py())
+ assert check_result_and_type(kx, df.iloc[0], {'x': 0, 'y': 10, 'z': 'a'})
+ assert check_result_and_type(kx, df.iloc[[0]], {'x': 0, 'y': 10, 'z': 'a'})
+ assert check_result_and_type(
+ kx,
+ df.iloc[::-1],
+ {
+ 'x': [10 - x for x in range(10)],
+ 'y': [x for x in range(10)],
+ 'z': ['e', 'e', 'd', 'd', 'c', 'c', 'b', 'b', 'a', 'a']
+ }
+ )
+ assert check_result_and_type(
+ kx,
+ df.head(4).iloc[[True, False, True, False]],
+ {
+ 'x': [0, 2],
+ 'y': [10, 8],
+ 'z': ['a', 'b']
+ })
+ assert check_result_and_type(
+ kx,
+ df.iloc[lambda x: [x % 2 == 0 for x in range(len(x))]],
+ {
+ 'x': [0, 2, 4, 6, 8],
+ 'y': [10, 8, 6, 4, 2],
+ 'z': ['a', 'b', 'c', 'd', 'e']
+ }
+ )
+ assert check_result_and_type(
+ kx,
+ df.iloc[df['y'] > 5],
+ {
+ 'x': [0, 1, 2, 3, 4],
+ 'y': [10, 9, 8, 7, 6],
+ 'z': ['a', 'a', 'b', 'b', 'c']
+ }
+ )
+
+
+def test_df_iloc_with_cols(kx, q):
+ df = q('([] x: til 10; y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
+ assert check_result_and_type(kx, df.iloc[0, 0], {'x': 0, 'z': 'a'})
+ assert check_result_and_type(kx, df.iloc[[0], [2]], {'z': 'a'})
+ assert check_result_and_type(
+ kx,
+ df.iloc[::-1, ::-1],
+ {
+ 'z': ['e', 'e', 'd', 'd', 'c', 'c', 'b', 'b', 'a', 'a'],
+ 'y': [x for x in range(10)],
+ 'x': [10 - x for x in range(10)]
+ }
+ )
+ assert check_result_and_type(
+ kx,
+ df.head(4).iloc[[True, False, True, False], [False, True, False]],
+ {
+ 'y': [10, 8]
+ }
+ )
+ assert check_result_and_type(
+ kx,
+ df.iloc[lambda x: [x % 2 == 0 for x in range(len(x))], lambda x: [0, 2]],
+ {
+ 'x': [0, 2, 4, 6, 8],
+ 'z': ['a', 'b', 'c', 'd', 'e']
+ }
+ )
+ assert check_result_and_type(
+ kx,
+ df.iloc[:, :],
+ q('([] x: til 10; y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)').py()
+ )
+ assert check_result_and_type(
+ kx,
+ df.iloc[:, 'y':],
+ q('([] y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)').py()
+ )
+ assert check_result_and_type(
+ kx,
+ df.iloc[:, :'y'],
+ q('([] x: til 10; y: 10 - til 10)').py()
+ )
+ assert check_result_and_type(
+ kx,
+ df.iloc[:, 1:],
+ q('([] y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)').py()
+ )
+ assert check_result_and_type(
+ kx,
+ df.iloc[:, :2],
+ q('([] x: til 10; y: 10 - til 10)').py()
+ )
+ assert check_result_and_type(
+ kx,
+ df.iloc[:, :-2],
+ q('([] x: til 10; y: 10 - til 10)').py()
+ )
+ assert check_result_and_type(kx, df.loc[df['z']=='a', ['x', 'y']], {'x': [0, 1], 'y': [10, 9]})
+
+
+def test_table_validate(kx):
+ # Copy kwarg
+ df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
+ df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
+ tab1 = kx.toq(df1)
+ tab2 = kx.toq(df2)
+ with pytest.raises(ValueError):
+ tab1.merge(tab2, left_on='lkey', right_on='rkey', validate='1:1')
+ with pytest.raises(ValueError):
+ tab1.merge(tab2, left_on='lkey', right_on='rkey', validate='m:1')
+ with pytest.raises(ValueError):
+ tab1.merge(tab2, left_on='lkey', right_on='rkey', validate='1:m')
+
+
+def test_table_merge_copy(kx, q):
+ # Copy kwarg
+ df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
+ df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
+ tab1 = kx.toq(df1)
+ tab2 = kx.toq(df2)
+ tab1.merge(tab2, left_on='lkey', right_on='rkey', copy=False)
+ assert df1.merge(df2, left_on='lkey', right_on='rkey').equals(tab1.pd())
+
+ # Replace_self property
+ df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
+ df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
+ tab1 = kx.toq(df1)
+ tab1.replace_self = True
+ tab2 = kx.toq(df2)
+ tab1.merge(tab2, left_on='lkey', right_on='rkey')
+ assert df1.merge(df2, left_on='lkey', right_on='rkey').equals(tab1.pd())
+
+
+def test_table_inner_merge(kx, q):
+ # Merge on keys
+ df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
+ df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
+ tab1 = kx.toq(df1)
+ tab2 = kx.toq(df2)
+ assert df1.merge(
+ df2,
+ left_on='lkey',
+ right_on='rkey'
+ ).equals(
+ tab1.merge(
+ tab2,
+ left_on='lkey',
+ right_on='rkey'
+ ).pd()
+ )
+
+ # Merge on keys KeyedTable
+ df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
+ df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
+ tab1 = q('{1!x}', kx.toq(df1))
+ tab2 = q('{1!x}', kx.toq(df2))
+ assert df1.merge(
+ df2,
+ left_on='lkey',
+ right_on='rkey'
+ ).equals(
+ q('{0!x}', tab1.merge(
+ tab2,
+ left_on='lkey',
+ right_on='rkey'
+ )).pd()
+ )
+
+ # Merge on differing keys
+ df1 = pd.DataFrame({'a': ['foo', 'bar'], 'b': [1, 2]})
+ df2 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]})
+ tab1 = kx.toq(df1)
+ tab2 = kx.toq(df2)
+ assert df1.merge(df2, on='a').equals(tab1.merge(tab2, on='a').pd())
+
+ # Merge on same indexes
+ df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
+ df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
+ tab1 = kx.toq(df1)
+ tab2 = kx.toq(df2)
+ assert df1.merge(
+ df2,
+ left_index=True,
+ right_index=True
+ ).equals(
+ tab1.merge(
+ tab2,
+ left_index=True,
+ right_index=True
+ ).pd()
+ )
+
+ # Merge on different indexes
+ df1 = pd.DataFrame(
+ {
+ 'lkey': ['foo', 'bar', 'baz', 'foo'],
+ 'value': [1, 2, 3, 5]
+ },
+ index=[4, 3, 2, 1]
+ )
+ df2 = pd.DataFrame(
+ {
+ 'rkey': ['foo', 'bar', 'baz', 'foo'],
+ 'value': [5, 6, 7, 8]
+ },
+ index=[0, 1, 2, 3]
+ )
+ tab1 = q('{`idx xcols update idx: reverse 1 + til count x from x}', tab1)
+ tab1 = q('{1!x}', tab1)
+ tab2 = q('{`idx xcols update idx: til count x from x}', tab2)
+ tab2 = q('{1!x}', tab2)
+ res = tab1.merge(tab2, left_index=True, right_index=True)
+ assert q(
+ '{x~y}',
+ tab1.merge(tab2, left_index=True, right_index=True, q_join=True),
+ q(
+ '{1!x}',
+ q.ij(
+ q('{0!x}', q.xcol(kx.SymbolVector(['idx', 'lkey', 'value_x']), tab1)),
+ q.xcol(kx.SymbolVector(['idx', 'rkey', 'value_y']), tab2)
+ )
+ )
+ )
+ assert isinstance(res, kx.KeyedTable)
+ df_res = df1.merge(df2, left_index=True, right_index=True)
+ # assert our index does match properly before removing it
+ assert q('0!', res)['idx'].py() == list(df_res.index)
+ # We have idx as a column so we have to remove it to be equal as it won't convert
+ # to the pandas index column automatically
+ res = q('{(enlist `idx)_(0!x)}', res)
+ df_res = df_res.reset_index() # Reset pandas index to default, we already checked it
+ df_res.pop('index')
+ assert df_res.equals(res.pd())
+
+
+def test_table_left_merge(kx, q):
+ if sys.version_info.minor > 7:
+ # Merge on keys
+ df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
+ df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
+ tab1 = kx.toq(df1)
+ tab2 = kx.toq(df2)
+ assert df1.merge(
+ df2,
+ left_on='lkey',
+ right_on='rkey',
+ how='left'
+ ).equals(
+ tab1.merge(
+ tab2,
+ left_on='lkey',
+ right_on='rkey',
+ how='left'
+ ).pd()
+ )
+
+ # Merge on keys KeyedTable
+ df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
+ df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
+ tab1 = q('{1!x}', kx.toq(df1))
+ tab2 = q('{1!x}', kx.toq(df2))
+ assert df1.merge(
+ df2,
+ left_on='lkey',
+ right_on='rkey',
+ how='left'
+ ).equals(
+ q('{0!x}', tab1.merge(
+ tab2,
+ left_on='lkey',
+ right_on='rkey',
+ how='left'
+ )).pd()
+ )
+
+ # Merge on differing keys
+ df1 = pd.DataFrame({'a': ['foo', 'bar'], 'b': [1, 2]})
+ df2 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]})
+ tab1 = kx.toq(df1)
+ tab2 = kx.toq(df2)
+ tab_res = tab1.merge(tab2, on='a', how='left').pd()
+ assert str(tab_res.at[1, 'c']) == '--'
+ tab_res.at[1, 'c'] = np.NaN
+ assert df1.merge(df2, on='a', how='left').equals(tab_res)
+
+ # Merge on same indexes
+ df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
+ df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
+ tab1 = kx.toq(df1)
+ tab2 = kx.toq(df2)
+ assert df1.merge(
+ df2,
+ left_index=True,
+ right_index=True,
+ how='left'
+ ).equals(
+ tab1.merge(
+ tab2,
+ left_index=True,
+ right_index=True,
+ how='left'
+ ).pd()
+ )
+
+ # Merge on different indexes
+ df1 = pd.DataFrame(
+ {
+ 'lkey': ['foo', 'bar', 'baz', 'foo'],
+ 'value': [1, 2, 3, 5]
+ },
+ index=[4, 3, 2, 1]
+ )
+ df2 = pd.DataFrame(
+ {
+ 'rkey': ['foo', 'bar', 'baz', 'foo'],
+ 'value': [5, 6, 7, 8]
+ },
+ index=[0, 1, 2, 3]
+ )
+ tab1 = q('{`idx xcols update idx: reverse 1 + til count x from x}', tab1)
+ tab1 = q('{1!x}', tab1)
+ tab2 = q('{`idx xcols update idx: til count x from x}', tab2)
+ tab2 = q('{1!x}', tab2)
+ res = tab1.merge(tab2, left_index=True, right_index=True, how='left')
+ assert q(
+ '{x~y}',
+ tab1.merge(tab2, left_index=True, right_index=True, how='left', q_join=True),
+ q(
+ '{1!x}',
+ q.lj(
+ q('{0!x}', q.xcol(kx.SymbolVector(['idx', 'lkey', 'value_x']), tab1)),
+ q.xcol(kx.SymbolVector(['idx', 'rkey', 'value_y']), tab2)
+ )
+ )
+ )
+ assert isinstance(res, kx.KeyedTable)
+ df_res = df1.merge(df2, left_index=True, right_index=True, how='left')
+ # assert our index does match properly before removing it
+ assert q('0!', res)['idx'].py() == list(df_res.index)
+ # We have idx as a column so we have to remove it to be equal as it won't convert
+ # to the pandas index column automatically
+ res = q('{(enlist `idx)_(0!x)}', res).pd()
+ df_res = df_res.reset_index() # Reset pandas index to default, we already checked it
+ df_res.pop('index')
+ res.at[0, 'rkey'] = np.NaN
+ res.at[0, 'value_y'] = np.NaN
+ assert df_res.equals(res)
+
+ df1 = pd.DataFrame(
+ {'key': ['foo', 'bar', 'baz', 'foo', 'quz'], 'value': [1, 2, 3, 5, None]}
+ )
+ df2 = pd.DataFrame({'key': ['foo', 'bar', 'baz', 'foo', None], 'value': [5, 6, 7, 8, 99]})
+ tab1 = kx.toq(df1)
+ tab2 = kx.toq(df2)
+
+ df_res = df1.merge(df2, on='key', how='left')
+ res = tab1.merge(tab2, on='key', how='left').pd()
+ assert str(res.at[6, 'value_y']) == '--'
+ res.at[6, 'value_y'] = np.NaN
+ assert res.equals(df_res)
+
+
+def test_table_right_merge(kx, q):
+ if sys.version_info.minor > 7:
+ # Merge on keys
+ df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
+ df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
+ tab1 = kx.toq(df1)
+ tab2 = kx.toq(df2)
+ assert df1.merge(
+ df2,
+ left_on='lkey',
+ right_on='rkey',
+ how='right'
+ ).equals(
+ tab1.merge(
+ tab2,
+ left_on='lkey',
+ right_on='rkey',
+ how='right'
+ ).pd()
+ )
+
+ # Merge on keys KeyedTable
+ df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
+ df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
+ tab1 = q('{1!x}', kx.toq(df1))
+ tab2 = q('{1!x}', kx.toq(df2))
+ assert df1.merge(
+ df2,
+ left_on='lkey',
+ right_on='rkey',
+ how='right'
+ ).equals(
+ q('{0!x}', tab1.merge(
+ tab2,
+ left_on='lkey',
+ right_on='rkey',
+ how='right'
+ )).pd()
+ )
+
+ # Merge on differing keys
+ df1 = pd.DataFrame({'a': ['foo', 'bar'], 'b': [1, 2]})
+ df2 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]})
+ tab1 = kx.toq(df1)
+ tab2 = kx.toq(df2)
+ tab_res = tab1.merge(tab2, on='a', how='right').pd()
+ assert str(tab_res.at[1, 'b']) == '--'
+ tab_res.at[1, 'b'] = np.NaN
+ assert df1.merge(df2, on='a', how='right').equals(tab_res)
+
+ # Merge on same indexes
+ df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
+ df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
+ tab1 = kx.toq(df1)
+ tab2 = kx.toq(df2)
+ assert df1.merge(
+ df2,
+ left_index=True,
+ right_index=True,
+ how='right'
+ ).equals(
+ tab1.merge(
+ tab2,
+ left_index=True,
+ right_index=True,
+ how='right'
+ ).pd()
+ )
+
+ # Merge on different indexes
+ df1 = pd.DataFrame(
+ {
+ 'lkey': ['foo', 'bar', 'baz', 'foo'],
+ 'value': [1, 2, 3, 5]
+ },
+ index=[4, 3, 2, 1]
+ )
+ df2 = pd.DataFrame(
+ {
+ 'rkey': ['foo', 'bar', 'baz', 'foo'],
+ 'value': [5, 6, 7, 8]
+ },
+ index=[0, 1, 2, 3]
+ )
+ tab1 = q('{`idx xcols update idx: reverse 1 + til count x from x}', tab1)
+ tab1 = q('{1!x}', tab1)
+ tab2 = q('{`idx xcols update idx: til count x from x}', tab2)
+ tab2 = q('{1!x}', tab2)
+ res = tab1.merge(tab2, left_index=True, right_index=True, how='right')
+ assert q(
+ '{x~y}',
+ tab1.merge(tab2, left_index=True, right_index=True, how='right', q_join=True),
+ q(
+ '{1!x}',
+ q.lj(
+ q('{0!x}', q.xcol(kx.SymbolVector(['idx', 'rkey', 'value_y']), tab2)),
+ q.xcol(kx.SymbolVector(['idx', 'lkey', 'value_x']), tab1)
+ )
+ )
+ )
+ assert isinstance(res, kx.KeyedTable)
+ df_res = df1.merge(df2, left_index=True, right_index=True, how='right')
+ # assert our index does match properly before removing it
+ assert q('0!', res)['idx'].py() == list(df_res.index)
+ # We have idx as a column so we have to remove it to be equal as it won't convert
+ # to the pandas index column automatically
+ res = q('{(enlist `idx)_(0!x)}', res).pd()
+ df_res = df_res.reset_index() # Reset pandas index to default, we already checked it
+ df_res.pop('index')
+ res.at[0, 'lkey'] = np.NaN
+ res.at[0, 'value_x'] = np.NaN
+ assert df_res.equals(res)
+
+ df1 = pd.DataFrame(
+ {'key': ['foo', 'bar', 'baz', 'foo', 'quz'], 'value': [1, 2, 3, 5, None]}
+ )
+ df2 = pd.DataFrame({'key': ['foo', 'bar', 'baz', 'foo', None], 'value': [5, 6, 7, 8, 99]})
+ tab1 = kx.toq(df1)
+ tab2 = kx.toq(df2)
+
+ df_res = df1.merge(df2, on='key', how='right')
+ res = tab1.merge(tab2, on='key', how='right').pd()
+ assert str(res.at[6, 'key']) == ''
+ res.at[6, 'key'] = None
+ assert res.equals(df_res)
+
+
+def test_table_outer_merge(kx, q):
+ if sys.version_info.minor > 7:
+ # Merge on keys
+ df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
+ df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
+ tab1 = kx.toq(df1)
+ tab2 = kx.toq(df2)
+ assert df1.merge(
+ df2,
+ left_on='lkey',
+ right_on='rkey',
+ how='outer'
+ ).equals(
+ tab1.merge(
+ tab2,
+ left_on='lkey',
+ right_on='rkey',
+ how='outer'
+ ).pd()
+ )
+ assert df1.merge(
+ df2,
+ left_on='lkey',
+ right_on='rkey',
+ how='outer',
+ sort=True
+ ).equals(
+ tab1.merge(
+ tab2,
+ left_on='lkey',
+ right_on='rkey',
+ how='outer',
+ sort=True
+ ).pd()
+ )
+
+ # Merge on keys KeyedTable
+ df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
+ df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
+ tab1 = q('{1!x}', kx.toq(df1))
+ tab2 = q('{1!x}', kx.toq(df2))
+ assert df1.merge(
+ df2,
+ left_on='lkey',
+ right_on='rkey',
+ how='outer',
+ sort=True
+ ).equals(
+ q('{0!x}', tab1.merge(
+ tab2,
+ left_on='lkey',
+ right_on='rkey',
+ how='outer',
+ sort=True
+ )).pd()
+ )
+
+ # Merge on differing keys
+ df1 = pd.DataFrame({'a': ['foo', 'bar'], 'b': [1, 2]})
+ df2 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]})
+ tab1 = kx.toq(df1)
+ tab2 = kx.toq(df2)
+ tab_res = tab1.merge(tab2, on='a', how='outer').pd()
+ assert str(tab_res.at[1, 'c']) == '--'
+ tab_res.at[1, 'c'] = np.NaN
+ assert str(tab_res.at[2, 'b']) == '--'
+ tab_res.at[2, 'b'] = np.NaN
+ assert df1.merge(df2, on='a', how='outer').equals(tab_res)
+
+ # Merge on same indexes
+ df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
+ df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
+ tab1 = kx.toq(df1)
+ tab2 = kx.toq(df2)
+ assert df1.merge(
+ df2,
+ left_index=True,
+ right_index=True,
+ how='outer'
+ ).equals(
+ tab1.merge(
+ tab2,
+ left_index=True,
+ right_index=True,
+ how='outer'
+ ).pd()
+ )
+ assert df1.merge(
+ df2,
+ left_index=True,
+ right_index=True,
+ how='outer',
+ sort=True
+ ).equals(
+ tab1.merge(
+ tab2,
+ left_index=True,
+ right_index=True,
+ how='outer',
+ sort=True
+ ).pd()
+ )
+
+ # Merge on different indexes
+ df1 = pd.DataFrame(
+ {
+ 'lkey': ['foo', 'bar', 'baz', 'foo'],
+ 'value': [1, 2, 3, 5]
+ },
+ index=[4, 3, 2, 1]
+ )
+ df2 = pd.DataFrame(
+ {
+ 'rkey': ['foo', 'bar', 'baz', 'foo'],
+ 'value': [5, 6, 7, 8]
+ },
+ index=[0, 1, 2, 3]
+ )
+ tab1 = q('{`idx xcols update idx: reverse 1 + til count x from x}', tab1)
+ tab1 = q('{1!x}', tab1)
+ tab2 = q('{`idx xcols update idx: til count x from x}', tab2)
+ tab2 = q('{1!x}', tab2)
+ res = tab1.merge(tab2, left_index=True, right_index=True, how='outer')
+ assert isinstance(res, kx.KeyedTable)
+ df_res = df1.merge(df2, left_index=True, right_index=True, how='outer')
+ # assert our index does match properly before removing it
+ assert q('0!', res)['idx'].py() == list(df_res.index)
+ # We have idx as a column so we have to remove it to be equal as it won't convert
+ # to the pandas index column automatically
+ res = q('{(enlist `idx)_(0!x)}', res).pd()
+ df_res = df_res.reset_index() # Reset pandas index to default, we already checked it
+ df_res.pop('index')
+ res.at[0, 'lkey'] = np.NaN
+ res.at[0, 'value_x'] = np.NaN
+ res.at[4, 'rkey'] = np.NaN
+ res.at[4, 'value_y'] = np.NaN
+ assert df_res.equals(res)
+
+ df1 = pd.DataFrame(
+ {'key': ['foo', 'bar', 'baz', 'foo', 'quz'], 'value': [1, 2, 3, 5, None]}
+ )
+ df2 = pd.DataFrame(
+ {
+ 'key': ['foo', 'bar', 'baz', 'foo', None],
+ 'value': [5.0, 6.0, 7.0, 8.0, 99.0]
+ }
+ )
+ tab1 = kx.toq(df1)
+ tab2 = kx.toq(df2)
+ df_res = df1.merge(df2, on='key', how='outer')
+ res = tab1.merge(tab2, on='key', how='outer').pd()
+ assert res.at[7, 'key'] == ''
+ res.at[7, 'key'] = None
+ assert df_res.equals(res)
+
+
+def test_cross_merge(kx, q):
+ df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
+ df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
+ tab1 = kx.toq(df1)
+ tab2 = kx.toq(df2)
+ assert df1.merge(df2, how='cross').equals(tab1.merge(tab2, how='cross').pd())
+ tab1 = kx.q('{`idx xcols update idx: reverse 1 + til count x from x}', tab1)
+ tab1 = kx.q('{1!x}', tab1)
+ tab2 = kx.q('{`idx xcols update idx: til count x from x}', tab2)
+ tab2 = kx.q('{1!x}', tab2)
+ df_res = df1.merge(df2, how='cross')
+ res = tab1.merge(tab2, how='cross')
+ assert q('0!', res)['idx'].py() == list(df_res.index)
+ # We have idx as a column so we have to remove it to be equal as it won't convert
+ # to the pandas index column automatically
+ res = q('{(enlist `idx)_(0!x)}', res).pd()
+ assert df_res.equals(res)
+
+
+def test_merge_errors(kx):
+ df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
+ df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
+ tab1 = kx.toq(df1)
+ tab2 = kx.toq(df2)
+ with pytest.raises(ValueError):
+ tab1.merge(
+ tab2,
+ left_on='lkey',
+ right_on='rkey',
+ how='outer',
+ suffixes=(False, False)
+ )
+
+
+def test_cross_merge_errors(kx, q):
+ df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
+ df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
+ tab1 = kx.toq(df1)
+ tab2 = kx.toq(df2)
+ with pytest.raises(ValueError) as e1:
+ tab1.merge(tab2, how='cross', on='lkey')
+ assert (
+ 'Can not pass on, right_on, left_on or set right_index=True or left_index=True'
+ in str(e1.value)
+ )
+ with pytest.raises(ValueError) as e2:
+ tab1.merge(tab2, how='cross', left_on='lkey', right_on='rkey')
+ assert (
+ 'Can not pass on, right_on, left_on or set right_index=True or left_index=True'
+ in str(e2.value)
+ )
+ with pytest.raises(ValueError) as e3:
+ tab1.merge(tab2, how='cross', left_index=True, right_index=True)
+ assert (
+ 'Can not pass on, right_on, left_on or set right_index=True or left_index=True'
+ in str(e3.value)
+ )
+
+
+def test_api_vs_pandas(kx, q):
+ tab = q('([] x: til 10; y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
+ df = tab.pd()
+ assert q(
+ '{x ~ y}',
+ tab[(tab['z'] == 'b') | (tab['z'] == 'c') | (tab['z'] == 'd')],
+ q('{value x}', kx.toq(df[(df['z'] == 'b') | (df['z'] == 'c') | (df['z'] == 'd')]))
+ )
+ assert q(
+ '{x ~ y}',
+ tab[(tab['z'] == 'b') | (tab['z'] == 'c') | (tab['z'] == 'd')][tab['x'] > 5],
+ q(
+ '{value x}',
+ kx.toq(df[(df['z'] == 'b') | (df['z'] == 'c') | (df['z'] == 'd')][df['x'] > 5])
+ )
+ )
+ assert q(
+ '{x ~ y}',
+ tab.iloc[(tab['z'] == 'b') | (tab['z'] == 'c') | (tab['z'] == 'd')].iloc[tab['x'] > 5],
+ q(
+ '{value x}',
+ kx.toq(df[(df['z'] == 'b') | (df['z'] == 'c') | (df['z'] == 'd')][df['x'] > 5])
+ )
+ )
+
+
+def test_df_astype_vanilla_checks(kx, q):
+ df = q('([] c1:1 2 3i; c2:1 2 3j; c3:1 2 3h; c4:1 2 3i)')
+ assert check_result_and_type(
+ kx,
+ df.astype(kx.LongVector).py(),
+ q('([] c1:1 2 3j; c2:1 2 3j; c3:1 2 3j; c4:1 2 3j)').py()
+ )
+ assert check_result_and_type(
+ kx,
+ df.astype({'c1': kx.LongVector, 'c2': 'kx.ShortVector'}).py(),
+ q('([] c1:1 2 3j; c2:1 2 3h; c3:1 2 3h; c4:1 2 3i)').py()
+ )
+
+
+def test_df_astype_string_to_sym(kx, q):
+ df = q('''([] c1:3#.z.p; c2:`abc`def`ghi; c3:1 2 3j;
+ c4:("abc";"def";"ghi");c5:"abc";c6:(1 2 3;4 5 6;7 8 9))''')
+ assert check_result_and_type(
+ kx,
+ df.astype({'c4': kx.SymbolVector, 'c5': kx.SymbolVector}).py(),
+ q('''([] c1:3#.z.p; c2:`abc`def`ghi; c3:1 2 3j;
+ c4:`abc`def`ghi;c5:`a`b`c;c6:(1 2 3;4 5 6;7 8 9))''').py()
+ )
+ assert check_result_and_type(
+ kx,
+ df.astype({'c4': kx.SymbolVector}).py(),
+ q('''([] c1:3#.z.p; c2:`abc`def`ghi; c3:1 2 3j;
+ c4:`abc`def`ghi;c5:"abc";c6:(1 2 3;4 5 6;7 8 9))''').py()
+ )
+
+
+def test_df_astype_value_errors(kx, q):
+ df = q('''([] c1:3#.z.p; c2:`abc`def`ghi; c3:1 2 3j;
+ c4:("abc";"def";"ghi");c5:"abc";c6:(1 2 3;4 5 6;7 8 9))''')
+ # Check errors parameter set to 'ignore'
+ assert check_result_and_type(
+ kx,
+ df.astype({'c6': kx.CharVector}, errors='ignore').py(),
+ q('''([] c1:3#.z.p; c2:`abc`def`ghi; c3:1 2 3j;
+ c4:("abc";"def";"ghi");c5:"abc";c6:(1 2 3;4 5 6;7 8 9))''').py()
+ )
+ with pytest.raises(ValueError,
+ match=r"This method can only handle casting string complex columns to "
+ "symbols. Other complex column data or"
+ " casting to other data is not supported."):
+ raise df.astype({'c4': kx.ShortVector})
+ with pytest.raises(ValueError,
+ match=r"This method can only handle casting string complex columns to "
+ "symbols. Other complex column data or"
+ " casting to other data is not supported."):
+ raise df.astype({'c6': kx.CharVector})
+ with pytest.raises(kx.QError,
+ match=r"Not supported: "
+ "Error casting LongVector to GUIDVector with q error: type"):
+ raise df.astype({'c3': kx.GUIDVector})
+ with pytest.raises(NotImplementedError,
+ match=r"Currently only the default value of True is accepted for copy"):
+ raise df.astype({'c3': kx.ShortVector}, copy='False')
+ with pytest.raises(ValueError,
+ match=r"Column name passed in dictionary not present in df table"):
+ raise df.astype({'c100': kx.ShortVector})
+ with pytest.raises(kx.QError,
+ match=r'Value passed does not match PyKX wrapper type'):
+ raise df.astype({'c1': 'nomatchvalue'})
+ with pytest.raises(kx.QError,
+ match=r'Value passed does not match PyKX wrapper type'):
+ raise df.astype('nomatchvalue')
+ df = q('''([] c1:("abc";"def";"ghi");c2:(1 2 3;4 5 6;7 8 9))''')
+ with pytest.raises(ValueError,
+ match=r"This method can only handle casting string complex"
+ " columns to symbols. Other complex column data or"
+ " casting to other data is not supported."):
+ raise df.astype(kx.SymbolVector)
+ df = q('''([] a:1 2 3 4 5 6;b:`b`n`h``v`;
+ c:("ll";"ll";"ll";"ll";"ll";"ll");
+ d:("ll";"ll";"ll";"ll";"ll";1 2 3))''')
+ with pytest.raises(ValueError,
+ match=r"This method can only handle casting string complex"
+ " columns to symbols. Other complex column data or"
+ " casting to other data is not supported."):
+ raise df.astype({'d': kx.SymbolVector})
+
+
+def test_df_select_dtypes(kx, q):
+ df = q('([] c1:`a`b`c; c2:1 2 3h; c3:1 2 3j; c4:1 2 3i)')
+ assert check_result_and_type(
+ kx,
+ df.select_dtypes(include=[kx.ShortVector, kx.LongVector]).py(),
+ q('([] c2:1 2 3h; c3:1 2 3j)').py()
+ )
+ assert check_result_and_type(
+ kx,
+ df.select_dtypes(exclude='kx.LongVector').py(),
+ q('([] c1:`a`b`c; c2:1 2 3h; c4:1 2 3i)').py()
+ )
+ assert check_result_and_type(
+ kx,
+ df.select_dtypes(include=['ShortVector', kx.LongVector],
+ exclude=[kx.SymbolVector]).py(),
+ q('([] c2:1 2 3h; c3:1 2 3j; c4:1 2 3i)').py()
+ )
+
+
+def test_df_select_dtypes_errors(kx, q):
+ df = q('([] c1:`a`b`c; c2:1 2 3h; c3:1 2 3j; c4:1 2 3i)')
+ with pytest.raises(ValueError, match=r"Expecting either include or"
+ " exclude param to be passed"):
+ raise df.select_dtypes()
+ with pytest.raises(ValueError, match=r"Include and Exclude lists"
+ " have overlapping elements"):
+ df.select_dtypes(include='kx.LongVector',
+ exclude='kx.LongVector')
+
+
+def test_df_drop(kx, q):
+ t = q('([] til 10; 10?10; 10?1f; (10 10)#100?" ")')
+
+ # Test dropping rows from table
+
+ rez = t.drop(5)
+ assert(len(rez)==9)
+ assert(5 not in rez['x'])
+
+ rez = t.drop([3, 5, 7])
+ assert(len(rez)==7)
+ assert(all([x not in rez['x'] for x in [3, 5, 7]]))
+
+ rez = t.drop(index=5)
+ assert(len(rez)==9)
+ assert(5 not in rez['x'])
+
+ rez = t.drop(index=[3, 5, 7])
+ assert(len(rez)==7)
+ assert(all([x not in rez['x'] for x in [3, 5, 7]]))
+
+ rez = t.drop(-1, errors='ignore')
+ assert(q('{x~y}', t, rez).py())
+
+ rez = t.drop([-1, 10], errors='ignore')
+ assert(q('{x~y}', t, rez).py())
+
+ # Test dropping columns from table
+
+ rez = t.drop('x1', axis=1)
+ assert(len(rez.columns) == 3)
+ assert('x1' not in rez.columns)
+
+ rez = t.drop(['x1', 'x3'], axis=1)
+
+ assert(len(rez.columns) == 2)
+ assert(all([x not in rez.columns for x in ['x1', 'x3']]))
+
+ rez = t.drop(columns='x1')
+ assert(len(rez.columns) == 3)
+ assert('x1' not in rez.columns)
+
+ rez = t.drop(columns=['x1', 'x3'])
+ assert(len(rez.columns) == 2)
+ assert(all([x not in rez.columns for x in ['x1', 'x3']]))
+
+ rez = t.drop('x72', axis=1, errors='ignore')
+ assert(q('{x~y}', t, rez).py())
+
+ rez = t.drop(['x42', 'x72'], axis=1, errors='ignore')
+ assert(q('{x~y}', t, rez).py())
+
+ # Test dropping rows from keyed table
+
+ q('sym:`aaa`bbb`ccc')
+ kt = q('([sym,7?sym; til 10] 10?10; 10?1f; (10 10)#100?" ")')
+
+ key = q('{value exec from key[x] where sym=`aaa}', kt).py()
+ rez = kt.drop([key])
+ assert(len(rez)==q('{[a;b;c] count delete from key[a] where (sym=b) and x=c}',
+ kt, key[0], key[1]).py())
+ assert(key not in q('key', rez))
+
+ keys = q('{(2 2)#raze flip value flip 2#select from key[x] where sym=`aaa}', kt).py()
+ rez = kt.drop(keys)
+ rez2 = q('{c:{(count[x];2)#raze flip value flip key[x]}[x] in y; count delete from x where c}',
+ kt, keys).py()
+ assert(len(rez)==rez2)
+ assert(not any(q('{{(count[x];2)#raze flip value flip key[x]}[x] in y}', rez, keys).py()))
+
+ rez = kt.drop(index=[key])
+ assert(len(rez)==q('{[a;b;c] count delete from key[a] where (sym=b) and x=c}',
+ kt, key[0], key[1]).py())
+ assert(key not in q('key', rez))
+
+ rez = kt.drop(index=keys)
+ rez2 = q('{c:{(count[x];2)#raze flip value flip key[x]}[x] in y; count delete from x where c}',
+ kt, keys).py()
+ assert(len(rez)==rez2)
+ assert(not any(q('{{(count[x];2)#raze flip value flip key[x]}[x] in y}', rez, keys).py()))
+
+ key = 'aaa'
+ rez = kt.drop(key, level=0)
+ rez2 = q('{c:y=key[x]`sym; delete from x where c}', kt, key)
+ assert(len(rez)==len(rez2))
+ assert(not q('{y in key[x]`sym}', rez, key).py())
+
+ keys = ['aaa', 'bbb']
+ rez = kt.drop(keys, level=0)
+ rez2 = q('{c:(key[x]`sym) in y; delete from x where c}', kt, keys)
+ assert(len(rez)==len(rez2))
+ assert(not any(q('{(key[x]`sym) in y}', rez, keys).py()))
+
+ keys = [0, 1, 2, 3, 4]
+ rez = kt.drop(keys, level=1)
+ rez2 = q('{c:(key[x]`x) in y; delete from x where c}', kt, keys)
+ assert(len(rez)==len(rez2))
+ assert(not any(q('{(key[x]`x) in y}', rez, keys).py()))
+
+ rez = kt.drop([('a', -1), ('zzz', 99)], errors='ignore')
+ assert(q('{x~y}', kt, rez).py())
+
+ rez = kt.drop('zzz', level=0, errors='ignore')
+ assert(q('{x~y}', kt, rez).py())
+
+ rez = kt.drop(['a', 'zzz'], level=0, errors='ignore')
+ assert(q('{x~y}', kt, rez).py())
+
+ # Test dropping columns from keyed table
+
+ rez = kt.drop('x1', axis=1)
+ assert(len(rez.columns) == 2)
+ assert('x1' not in rez.columns)
+
+ rez = kt.drop(['x', 'x2'], axis=1)
+ assert(len(rez.columns) == 1)
+ assert(all([x not in rez.columns for x in ['x', 'x2']]))
+
+ rez = kt.drop(columns='x1')
+ assert(len(rez.columns) == 2)
+ assert('x1' not in rez.columns)
+
+ rez = kt.drop(columns=['x', 'x2'])
+ assert(len(rez.columns) == 1)
+ assert(all([x not in rez.columns for x in ['x', 'x2']]))
+
+ rez = kt.drop('x72', axis=1, errors='ignore')
+ assert(q('{x~y}', kt, rez).py())
+
+ rez = kt.drop(['x42', 'x72'], axis=1, errors='ignore')
+ assert(q('{x~y}', kt, rez).py())
+
+ # Test error cases
+
+ with pytest.raises(ValueError):
+ t.drop()
+
+ with pytest.raises(ValueError):
+ t.drop(4, index=5, columns='x1')
+
+ with pytest.raises(kx.QError):
+ t.drop('x1')
+
+ with pytest.raises(kx.QError):
+ t.drop(2, axis=1)
+
+ with pytest.raises(ValueError):
+ t.drop(0, axis=1, level=0)
+
+ with pytest.raises(kx.QError) as e:
+ t.drop(-1)
+ assert(str(e.value) == '-1 not found.')
+
+ with pytest.raises(kx.QError) as e:
+ t.drop([-1, 10])
+ assert(str(e.value) == '-1, 10 not found.')
+
+ with pytest.raises(kx.QError) as e:
+ kt.drop([('a', -1), ('zzz', 99)])
+ assert(str(e.value) == '(a, -1), (zzz, 99) not found.')
+
+ with pytest.raises(kx.QError) as e:
+ kt.drop('zzz', level=0)
+ assert(str(e.value) == 'zzz not found.')
+
+ with pytest.raises(kx.QError) as e:
+ kt.drop(['a', 'zzz'], level=0)
+ assert(str(e.value) == 'a, zzz not found.')
+
+ with pytest.raises(kx.QError) as e:
+ t.drop('x42', axis=1)
+ assert(str(e.value) == 'x42 not found.')
+
+ with pytest.raises(kx.QError) as e:
+ t.drop(['x42', 'x72'], axis=1)
+ assert(str(e.value) == 'x42, x72 not found.')
+
+ with pytest.raises(kx.QError) as e:
+ kt.drop('x42', axis=1)
+ assert(str(e.value) == 'x42 not found.')
+
+ with pytest.raises(kx.QError) as e:
+ kt.drop(['x42', 'x72'], axis=1)
+ assert(str(e.value) == 'x42, x72 not found.')
+
+
+def test_df_drop_duplicates(kx, q):
+ N = 100
+ q['N'] = N
+ q('sym:`aaa`bbb`ccc')
+ t = q('([] N?sym; N?3)')
+
+ rez = t.drop_duplicates()
+ rez2 = t.pd().drop_duplicates().reset_index(drop=True)
+ assert(q('{x~y}', rez, rez2))
+
+ with pytest.raises(ValueError):
+ t.drop_duplicates(subset=['x', 'x1'])
+
+ with pytest.raises(ValueError):
+ t.drop_duplicates(keep='last')
+
+ with pytest.raises(ValueError):
+ t.drop_duplicates(inplace=True)
+
+ with pytest.raises(ValueError):
+ t.drop_duplicates(ignore_index=True)
+
+
+def test_df_rename(kx, q):
+ q('sym:`aaa`bbb`ccc')
+ t = q('([] 10?sym; til 10; 10?10; 10?1f)')
+
+ cols = {'sym': 'Symbol', 'x2': 'xsquare'}
+ rez = t.rename(cols, axis=1)
+ assert(q('{x~y}', rez, t.pd().rename(cols, axis=1)))
+
+ cols = {'sym': 'Symbol', 'x2': 'xsquare'}
+ rez = t.rename(columns=cols)
+ assert(q('{x~y}', rez, t.pd().rename(columns=cols)))
+
+ kt = kx.q('([idx:til 10] til 10; 10?10; 10?1f; (10;10)#100?" ")')
+
+ idx = {0: 'foo', 5: 'bar'}
+ rez = kt.rename(idx)
+ # assert(q('{x~y}', rez, kt.pd().rename(idx))) # {x~y}=1b because of some q attribute
+ assert(all(rez.pd().eq(kt.pd().rename(idx))))
+
+ idx = {0: 'foo', 5: 'bar'}
+ rez = kt.rename(index=idx)
+ # assert(q('{x~y}', rez, kt.pd().rename(index=idx))) # {x~y}=1b because of some q attribute
+ assert(all(rez.pd().eq(kt.pd().rename(index=idx))))
+
+ with pytest.raises(ValueError):
+ t.rename()
+
+ with pytest.raises(ValueError):
+ t.rename(index={5: 'foo'}, axis=1)
+
+ with pytest.raises(ValueError):
+ t.rename(columns={'x': 'xXx'}, level=0)
+
+ with pytest.raises(ValueError):
+ t.rename(columns={'x': 'xXx'}, copy=False)
+
+ with pytest.raises(ValueError):
+ t.rename(columns={'x': 'xXx'}, inplace=True)
+
+ with pytest.raises(ValueError):
+ t.rename({5: 'foo'}, level=0)
+
+ with pytest.raises(ValueError):
+ t.rename(columns={'x': 'xXx'}, errors='raise')
+
+
+def test_df_add_prefix(kx, q):
+ q('sym:`aaa`bbb`ccc')
+ t = q('([] 10?sym; til 10; 10?10; 10?1f)')
+
+ rez = t.add_prefix("col_")
+
+ assert(q('{x~y}', rez, t.pd().add_prefix("col_")))
+
+ kt = kx.q('([idx:til 5] til 5; 5?5; 5?1f; (5;5)#100?" ")')
+ kt_res = kx.q('([idx: `col_0`col_1`col_2`col_3`col_4] til 5)')
+
+ rez = kt.add_prefix("col_")
+ assert(q('{x~y}', rez, kt.pd().add_prefix("col_")))
+
+ rez = kt.add_prefix("col_", axis=1)
+ assert(q('{x~y}', kx.q("{(0!x) `idx}",rez), kx.q("{(0!x) `idx}",kt_res)))
+
+ with pytest.raises(ValueError):
+ t.add_prefix()
+
+def test_df_add_suffix(kx, q):
+ q('sym:`aaa`bbb`ccc')
+ t = q('([] 10?sym; til 10; 10?10; 10?1f)')
+
+ rez = t.add_suffix("_col")
+
+ assert(q('{x~y}', rez, t.pd().add_suffix("_col")))
+
+ kt = kx.q('([idx:til 5] til 5; 5?5; 5?1f; (5;5)#100?" ")')
+ kt_res = kx.q('([idx: `0_col`1_col`2_col`3_col`4_col] til 5)')
+
+ rez = kt.add_suffix("_col", axis=1)
+ assert(q('{x~y}', kx.q("{(0!x) `idx}",rez), kx.q("{(0!x) `idx}",kt_res)))
+
+
+ rez = kt.add_suffix("_col")
+ assert(q('{x~y}', rez, kt.pd().add_suffix("_col")))
+
+ with pytest.raises(ValueError):
+ t.add_suffix()
+
+@pytest.mark.pandas_api
+@pytest.mark.xfail(reason='Flaky randomization')
+def test_df_sample(kx, q):
+ q('sym:`aaa`bbb`ccc')
+ t = q('([] 10?sym; til 10; 10?10; 10?1f)')
+ df = t.pd()
+ kt = q('([idx:til 10] til 10; 10?10; 10?1f; (10;10)#100?" ")')
+ df2 = kt.pd()
+
+ rez = t.sample()
+ assert(type(rez) is kx.Table)
+ assert(len(rez) == 1)
+ check = df.iloc[rez['x'].py()].reset_index(drop=True)
+ assert(q('{x~y}', rez, check))
+
+ rez = t.sample(5)
+ assert(type(rez) is kx.Table)
+ assert(len(rez) == 5)
+ check = df.iloc[rez['x'].py()].reset_index(drop=True)
+ assert(q('{x~y}', rez, check))
+
+ rez = t.sample(10)
+ assert(type(rez) is kx.Table)
+ assert(len(rez) == 10)
+ check = df.iloc[rez['x'].py()].reset_index(drop=True)
+ assert(q('{x~y}', rez, check))
+ assert(q('{x~y}', rez['x'].pd().unique(), check['x'].unique()))
+
+ rez = t.sample(100, replace=True)
+ assert(type(rez) is kx.Table)
+ assert(len(rez) == 100)
+ check = df.iloc[rez['x'].py()].reset_index(drop=True)
+ assert(q('{x~y}', rez, check))
+
+ rez = t.sample(frac=0.5, replace=True)
+ assert(type(rez) is kx.Table)
+ assert(len(rez) == 5)
+ check = df.iloc[rez['x'].py()].reset_index(drop=True)
+ assert(q('{x~y}', rez, check))
+
+ rez = kt.sample()
+ assert(type(rez) is kx.KeyedTable)
+ assert(len(rez) == 1)
+ check = df2.iloc[rez['x'].py()]
+ assert(q('{x~y}', rez, check))
+
+ rez = kt.sample(5)
+ assert(type(rez) is kx.KeyedTable)
+ assert(len(rez) == 5)
+ check = df2.iloc[rez['x'].py()]
+ assert(q('{x~y}', rez, check))
+
+ rez = kt.sample(10)
+ assert(type(rez) is kx.KeyedTable)
+ assert(len(rez) == 10)
+ check = df2.iloc[rez['x'].py()]
+ assert(q('{x~y}', rez, check))
+ assert(q('{x~y}', rez['x'].pd().unique(), check['x'].unique()))
+
+ rez = kt.sample(100, replace=True)
+ assert(type(rez) is kx.KeyedTable)
+ assert(len(rez) == 100)
+ check = df2.iloc[rez['x'].py()]
+ assert(q('{x~y}', rez, check))
+
+ rez = kt.sample(frac=0.5, replace=True)
+ assert(type(rez) is kx.KeyedTable)
+ assert(len(rez) == 5)
+ check = df2.iloc[rez['x'].py()]
+ assert(q('{x~y}', rez, check))
+
+ with pytest.raises(ValueError):
+ t.sample(100)
+
+ with pytest.raises(ValueError):
+ t.sample(weights=np.ones(10))
+
+ with pytest.raises(ValueError):
+ t.sample(random_state=42)
+
+ with pytest.raises(ValueError):
+ t.sample(axis=1)
+
+ with pytest.raises(ValueError):
+ t.sample(ignore_index=True)
+
+
+def test_mean(kx, q):
+ df = pd.DataFrame(
+ {
+ 'a': [1, 2, 2, 4],
+ 'b': [1, 2, 6, 7],
+ 'c': [7, 8, 9, 10],
+ 'd': [7, 11, 14, 14]
+ }
+ )
+ tab = kx.toq(df)
+ p_m = df.mean()
+ q_m = tab.mean()
+ for c in q.key(q_m).py():
+ assert p_m[c] == q_m[c].py()
+ p_m = df.mean(axis=1)
+ q_m = tab.mean(axis=1)
+ for c in range(len(q.cols(tab))):
+ assert p_m[c] == q_m[q('{`$string x}', c)].py()
+
+ q['tab'] = kx.toq(df)
+ tab = q('1!`idx xcols update idx: til count tab from tab')
+ p_m = df.mean()
+ q_m = tab.mean()
+ for c in q.key(q_m).py():
+ assert p_m[c] == q_m[c].py()
+ p_m = df.mean(axis=1)
+ q_m = tab.mean(axis=1)
+ for c in range(len(q.cols(tab)) - 1):
+ assert p_m[c] == q_m[q('{`$string x}', c)].py()
+
+ df = pd.DataFrame(
+ {
+ 'a': [1, 2, 2, 4],
+ 'b': [1, 2, 6, 7],
+ 'c': [7, 8, 9, 10],
+ 'd': ['foo', 'bar', 'baz', 'qux']
+ }
+ )
+ tab = kx.toq(df)
+ p_m = df.mean(numeric_only=True)
+ q_m = tab.mean(numeric_only=True)
+ for c in q.key(q_m).py():
+ assert p_m[c] == q_m[c].py()
+ p_m = df.mean(axis=1, numeric_only=True)
+ q_m = tab.mean(axis=1, numeric_only=True)
+ for c in range(len(q.cols(tab))):
+ assert p_m[c] == q_m[q('{`$string x}', c)].py()
+
+ with pytest.raises(kx.QError):
+ q_m = tab.mean()
+ with pytest.raises(kx.QError):
+ q_m = tab.mean(axis=1)
+
+
+def test_std(kx, q):
+ df = pd.DataFrame(
+ {
+ 'a': [1, 2, 2, 4],
+ 'b': [1, 2, 6, 7],
+ 'c': [7, 8, 9, 10],
+ 'd': [7, 11, 14, 14]
+ }
+ )
+ tab = kx.toq(df)
+ p_m = df.std()
+ q_m = tab.std()
+ for c in q.key(q_m).py():
+ assert p_m[c] == q_m[c].py()
+ p_m = df.std(axis=1)
+ q_m = tab.std(axis=1)
+ for c in range(len(q.cols(tab))):
+ assert p_m[c] == q_m[q('{`$string x}', c)].py()
+ p_m = df.std(ddof=0)
+ q_m = tab.std(ddof=0)
+ for c in q.key(q_m).py():
+ assert p_m[c] == q_m[c].py()
+
+ p_m = df.std(ddof=4)
+ q_m = tab.std(ddof=4)
+ for c in q.key(q_m).py():
+ assert np.isnan(p_m[c]) == np.isnan(q_m[c].py())
+
+ q['tab'] = kx.toq(df)
+ tab = q('1!`idx xcols update idx: til count tab from tab')
+ p_m = df.std()
+ q_m = tab.std()
+ for c in q.key(q_m).py():
+ assert p_m[c] == q_m[c].py()
+ p_m = df.std(axis=1)
+ q_m = tab.std(axis=1)
+ for c in range(len(q.cols(tab)) - 1):
+ assert p_m[c] == q_m[q('{`$string x}', c)].py()
+
+ df = pd.DataFrame(
+ {
+ 'a': [1, 2, 2, 4],
+ 'b': [1, 2, 6, 7],
+ 'c': [7, 8, 9, 10],
+ 'd': ['foo', 'bar', 'baz', 'qux']
+ }
+ )
+ tab = kx.toq(df)
+ p_m = df.std(numeric_only=True)
+ q_m = tab.std(numeric_only=True)
+ for c in q.key(q_m).py():
+ assert p_m[c] == q_m[c].py()
+ p_m = df.std(axis=1, numeric_only=True)
+ q_m = tab.std(axis=1, numeric_only=True)
+ for c in range(len(q.cols(tab))):
+ assert p_m[c] == q_m[q('{`$string x}', c)].py()
+
+ with pytest.raises(kx.QError):
+ q_m = tab.std()
+ with pytest.raises(kx.QError):
+ q_m = tab.std(axis=1)
+
+
+def test_median(kx, q):
+ df = pd.DataFrame(
+ {
+ 'a': [1, 2, 2, 4],
+ 'b': [1, 2, 6, 7],
+ 'c': [7, 8, 9, 10],
+ 'd': [7, 11, 14, 14]
+ }
+ )
+ tab = kx.toq(df)
+ p_m = df.median()
+ q_m = tab.median()
+ for c in q.key(q_m).py():
+ assert p_m[c] == q_m[c].py()
+ p_m = df.median(axis=1)
+ q_m = tab.median(axis=1)
+ for c in range(len(q.cols(tab))):
+ assert p_m[c] == q_m[q('{`$string x}', c)].py()
+
+ q['tab'] = kx.toq(df)
+ tab = q('1!`idx xcols update idx: til count tab from tab')
+ p_m = df.median()
+ q_m = tab.median()
+ for c in q.key(q_m).py():
+ assert p_m[c] == q_m[c].py()
+ p_m = df.median(axis=1)
+ q_m = tab.median(axis=1)
+ for c in range(len(q.cols(tab)) - 1):
+ assert p_m[c] == q_m[q('{`$string x}', c)].py()
+
+ df = pd.DataFrame(
+ {
+ 'a': [1, 2, 2, 4],
+ 'b': [1, 2, 6, 7],
+ 'c': [7, 8, 9, 10],
+ 'd': ['foo', 'bar', 'baz', 'qux']
+ }
+ )
+ tab = kx.toq(df)
+ p_m = df.median(numeric_only=True)
+ q_m = tab.median(numeric_only=True)
+ for c in q.key(q_m).py():
+ assert p_m[c] == q_m[c].py()
+ p_m = df.median(axis=1, numeric_only=True)
+ q_m = tab.median(axis=1, numeric_only=True)
+ for c in range(len(q.cols(tab))):
+ assert p_m[c] == q_m[q('{`$string x}', c)].py()
+
+ with pytest.raises(kx.QError):
+ q_m = tab.median()
+ with pytest.raises(kx.QError):
+ q_m = tab.median(axis=1)
+
+
+def test_mode(kx, q): # noqa
+ if sys.version_info.minor > 7:
+ def compare_q_to_pd(tab, df):
+ if 'idx' in q.cols(tab):
+ tab.pop('idx')
+ tab = tab.pd()
+ for i in range(len(tab)):
+ for c in tab.columns:
+ df_c = c
+ try:
+ df_c = int(c)
+ except BaseException:
+ pass
+ if str(tab.at[i, c]) == '--':
+ tab.at[i, c] = np.NaN
+ if str(tab.at[i, c]) == '':
+ tab.at[i, c] = 'nan'
+ if str(tab.at[i, c]) == 'nan' and str(df.at[i, df_c]) == 'nan':
+ continue
+ if tab.at[i, c] != df.at[i, df_c]:
+ return False
+ return True
+
+ df = pd.DataFrame(
+ {
+ 'a': [1, 2, 2, 4],
+ 'b': [1, 2, 6, 7],
+ 'c': [7, 8, 9, 10],
+ 'd': [7, 11, 14, 14]
+ }
+ )
+ tab = kx.toq(df)
+ p_m = df.mode()
+ q_m = tab.mode()
+ assert compare_q_to_pd(q_m, p_m)
+
+ p_m = df.mode(axis=1)
+ q_m = tab.mode(axis=1)
+ assert compare_q_to_pd(q_m, p_m)
+
+ q['tab'] = kx.toq(df)
+ tab = q('1!`idx xcols update idx: til count tab from tab')
+
+ p_m = df.mode()
+ q_m = tab.mode()
+ assert compare_q_to_pd(q_m, p_m)
+
+ p_m = df.mode(axis=1)
+ q_m = tab.mode(axis=1)
+ assert compare_q_to_pd(q_m, p_m)
+
+ df = pd.DataFrame(
+ {
+ 'a': [1, 2, 2, 4],
+ 'b': [1, 2, 6, 7],
+ 'c': [7, 8, 9, 10],
+ 'd': ['foo', 'bar', 'baz', 'foo']
+ }
+ )
+ tab = kx.toq(df)
+ p_m = df.mode()
+ q_m = tab.mode()
+ assert compare_q_to_pd(q_m, p_m)
+
+ p_m = df.mode(axis=1, numeric_only=True)
+ q_m = tab.mode(axis=1, numeric_only=True)
+ assert compare_q_to_pd(q_m, p_m)
+
+ df = pd.DataFrame({
+ 'x': [0, 1, 2, 3, 4, 5, 6, 7, np.NaN, np.NaN],
+ 'y': [10, 11, 12, 13, 14, 15, 16, 17, 18, np.NaN],
+ 'z': ['a', 'b', 'c', 'd', 'd', 'e', 'e', 'f', 'g', 'h']
+ })
+ tab = kx.toq(df)
+
+ p_m = df.mode()
+ q_m = tab.mode()
+ assert compare_q_to_pd(q_m, p_m)
+
+ p_m = df.mode(axis=1, numeric_only=True)
+ q_m = tab.mode(axis=1, numeric_only=True)
+ assert compare_q_to_pd(q_m, p_m)
+
+ p_m = df.mode(numeric_only=True)
+ q_m = tab.mode(numeric_only=True)
+ assert compare_q_to_pd(q_m, p_m)
+
+ p_m = df.mode(axis=1, numeric_only=True)
+ q_m = tab.mode(axis=1, numeric_only=True)
+ assert compare_q_to_pd(q_m, p_m)
+
+ p_m = df.mode(dropna=False)
+ q_m = tab.mode(dropna=False)
+ assert compare_q_to_pd(q_m, p_m)
+
+ p_m = df.mode(axis=1, dropna=False, numeric_only=True)
+ q_m = tab.mode(axis=1, dropna=False, numeric_only=True)
+ assert compare_q_to_pd(q_m, p_m)
+
+
+def test_table_merge_asof(kx, q):
+ left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]})
+ right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]})
+ qleft = kx.toq(left)
+ qright = kx.toq(right)
+
+ assert (pd.merge_asof(left, right, on='a')
+ == kx.merge_asof(qleft, qright, on='a').pd()).all().all()
+ assert (pd.merge_asof(left, right, on='a')
+ == qleft.merge_asof(qright, on='a').pd()).all().all()
+ assert (pd.merge_asof(left, right, on='a')
+ == q('0!', q('1!', qleft).merge_asof(qright, on='a')).pd()).all().all()
+ assert (pd.merge_asof(left, right, on='a')
+ == qleft.merge_asof(q('1!', qright), on='a').pd()).all().all()
+ assert (pd.merge_asof(left, right, on='a')
+ == q('0!', q('1!', qleft).merge_asof(q('1!', qright), on='a')).pd()).all().all()
+ left = pd.DataFrame({
+ "time": [
+ pd.Timestamp("2016-05-25 13:30:00.023"),
+ pd.Timestamp("2016-05-25 13:30:00.023"),
+ pd.Timestamp("2016-05-25 13:30:00.030"),
+ pd.Timestamp("2016-05-25 13:30:00.041"),
+ pd.Timestamp("2016-05-25 13:30:00.048"),
+ pd.Timestamp("2016-05-25 13:30:00.049"),
+ pd.Timestamp("2016-05-25 13:30:00.072"),
+ pd.Timestamp("2016-05-25 13:30:00.075")
+ ],
+ "ticker": [
+ "GOOG",
+ "MSFT",
+ "MSFT",
+ "MSFT",
+ "GOOG",
+ "AAPL",
+ "GOOG",
+ "MSFT"
+ ],
+ "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],
+ "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03]
+ })
+ right = pd.DataFrame({
+ "time": [
+ pd.Timestamp("2016-05-25 13:30:00.023"),
+ pd.Timestamp("2016-05-25 13:30:00.038"),
+ pd.Timestamp("2016-05-25 13:30:00.048"),
+ pd.Timestamp("2016-05-25 13:30:00.048"),
+ pd.Timestamp("2016-05-25 13:30:00.048")
+ ],
+ "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
+ "price": [51.95, 51.95, 720.77, 720.92, 98.0],
+ "quantity": [75, 155, 100, 100, 100]
+ })
+
+ qleft = kx.toq(left)
+ qright = kx.toq(right)
+
+ assert (pd.merge_asof(left, right, on='time')
+ == kx.merge_asof(qleft, qright, on='time').pd()).all().all()
+ assert (pd.merge_asof(left, right, on='time')
+ == qleft.merge_asof(qright, on='time').pd()).all().all()
+ assert (pd.merge_asof(left, right, on='time')
+ == q('0!', q('1!', qleft).merge_asof(qright, on='time')).pd()).all().all()
+ assert (pd.merge_asof(left, right, on='time')
+ == qleft.merge_asof(q('1!', qright), on='time').pd()).all().all()
+ assert (pd.merge_asof(left, right, on='time')
+ == q('0!', q('1!', qleft).merge_asof(q('1!', qright), on='time')).pd()).all().all()
+
+
+def test_pandas_abs(kx, q):
+ tab = q('([] sym: 100?`foo`bar`baz`qux; price: 250.0f - 100?500.0f; ints: 100 - 100?200)')
+ ntab = tab[['price', 'ints']]
+
+ assert ntab.abs().py() == tab.abs(numeric_only=True).py()
+
+ with pytest.raises(kx.QError):
+ tab.abs()
+
+def test_pandas_round(kx, q):
+ q_tab = q('([]c1:1.0 .1 .01 .001 .0001 .00001;'
+ 'c2:1.0 1.2 1.02 1.002 1.0002 1.00002;'
+ 'c3:til 6;'
+ 'c4:`a`b`c`d`e`f)')
+ pd_tab = q_tab.pd()
+ round_dict = {'c1': 0, 'c2': 2}
+
+ assert all(pd_tab.round() == q_tab.round().pd())
+ assert all(q_tab.round(0).pd() == q_tab.round().pd())
+ assert all(pd_tab.round(2) == q_tab.round(2).pd())
+ assert all(pd_tab.round(round_dict) == q_tab.round(round_dict).pd())
+
+ round_dict_non_numerical = {'c1': 0, 'c3': 2, 'c4': 2}
+ assert all(pd_tab.round(round_dict_non_numerical) == q_tab.round(round_dict_non_numerical).pd())
+
+
+def test_pandas_min(q):
+ tab = q('([] sym: 100?`foo`bar`baz`qux; price: 250.0f - 100?500.0f; ints: 100 - 100?200)')
+ df = tab.pd()
+
+ qmin = tab.min().py()
+ pmin = df.min()
+
+ assert str(pmin['sym']) == qmin['sym']
+ assert float(pmin['price']) == qmin['price']
+ assert float(pmin['ints']) == qmin['ints']
+
+ qmin = tab.min(axis=1, numeric_only=True, skipna=True).py()
+ pmin = df.min(axis=1, numeric_only=True, skipna=True)
+
+ for i in range(100):
+ assert float(qmin[i]) == float(pmin[i])
+
+
+def test_pandas_max(q):
+ tab = q('([] sym: 100?`foo`bar`baz`qux; price: 250.0f - 100?500.0f; ints: 100 - 100?200)')
+ df = tab.pd()
+
+ qmax = tab.max().py()
+ pmax = df.max()
+
+ assert str(pmax['sym']) == qmax['sym']
+ assert float(pmax['price']) == qmax['price']
+ assert float(pmax['ints']) == qmax['ints']
+
+ qmax = tab.max(axis=1, numeric_only=True, skipna=True).py()
+ pmax = df.max(axis=1, numeric_only=True, skipna=True)
+
+ for i in range(100):
+ assert float(qmax[i]) == float(pmax[i])
+
+
+def test_pandas_all(q):
+ tab = q(
+ '([] sym: 100?`foo`bar`baz`qux; price: 250.0f - 100?500.0f; ints: 100 - 100?200;'
+ ' bools: 100?0b)'
+ )
+ df = tab.pd()
+
+ qall = tab.all().py()
+ pall = df.all()
+ assert qall['sym'] == pall['sym']
+ assert qall['ints'] == pall['ints']
+ assert qall['price'] == pall['price']
+ assert qall['bools'] == pall['bools']
+
+ qall = tab.all(bool_only=True).py()
+ pall = df.all(bool_only=True)
+ assert qall['bools'] == pall['bools']
+
+ qall = tab.all(axis=1).py()
+ pall = df.all(axis=1)
+ for i in range(100):
+ assert qall[i] == pall[i]
+
+
+def test_pandas_any(q):
+ tab = q(
+ '([] sym: 100?`foo`bar`baz`qux; price: 250.0f - 100?500.0f; ints: 100 - 100?200;'
+ ' bools: 100?0b)'
+ )
+ df = tab.pd()
+
+ qany = tab.any().py()
+ pany = df.any()
+ assert qany['sym'] == pany['sym']
+ assert qany['ints'] == pany['ints']
+ assert qany['price'] == pany['price']
+ assert qany['bools'] == pany['bools']
+
+ qany = tab.any(bool_only=True).py()
+ pany = df.any(bool_only=True)
+ assert qany['bools'] == pany['bools']
+
+ qany = tab.any(axis=1).py()
+ pany = df.any(axis=1)
+ for i in range(100):
+ assert qany[i] == pany[i]
+
+
+def test_pandas_prod(q):
+ tab = q('([] sym: 10?`a`b`c; price: 12.25f - 10?25.0f; ints: 10 - 10?20)')
+ df = tab.pd()
+
+ qprod = tab.prod(numeric_only=True).py()
+ pprod = df.prod(numeric_only=True)
+ assert float(qprod['price']) == float(pprod['price'])
+ assert float(qprod['ints']) == float(pprod['ints'])
+
+ qprod = tab.prod(numeric_only=True, skipna=True, axis=1).py()
+ pprod = df.prod(numeric_only=True, skipna=True, axis=1)
+ for i in range(10):
+ assert float(qprod[i]) == float(pprod[i])
+
+ qprod = tab.prod(numeric_only=True, skipna=True, axis=1, min_count=5).py()
+ pprod = df.prod(numeric_only=True, skipna=True, axis=1, min_count=5)
+ for i in range(10):
+ assert qprod[i] == q('0N')
+ assert str(pprod[i]) == 'nan'
+
+
+def test_pandas_sum(q):
+ tab = q('([] sym: 100?`foo`bar`baz`qux; price: 250.0f - 100?500.0f; ints: 100 - 100?200)')
+ df = tab.pd()
+
+ qsum = tab.sum().py()
+ psum = df.sum()
+ assert float(qsum['price']) == float(psum['price'])
+ assert float(qsum['ints']) == float(psum['ints'])
+ assert str(qsum['sym']) == str(psum['sym'])
+
+ qsum = tab.sum(numeric_only=True, skipna=True, axis=1).py()
+ psum = df.sum(numeric_only=True, skipna=True, axis=1)
+ for i in range(10):
+ assert float(qsum[i]) == float(psum[i])
+
+ qsum = tab.sum(numeric_only=True, skipna=True, axis=1, min_count=5).py()
+ psum = df.sum(numeric_only=True, skipna=True, axis=1, min_count=5)
+ for i in range(10):
+ assert qsum[i] == q('0N')
+ assert str(psum[i]) == 'nan'
+
+
+def test_pandas_skew(q):
+ tab = q('([] price: 250.0f - 100?500.0f; ints: 100 - 100?200)')
+ df = tab.pd()
+
+ qskew = tab.skew().py()
+ pskew = df.skew()
+ assert round(float(qskew['price']), 6) == round(float(pskew['price']), 6)
+ assert round(float(qskew['ints']), 6) == round(float(pskew['ints']), 6)
+
+
+def test_pandas_groupby_errors(kx, q):
+ tab = q('([] sym: 100?`foo`bar`baz`qux; price: 250.0f - 100?500.0f; ints: 100 - 100?200)')
+
+ with pytest.raises(RuntimeError):
+ tab.groupby(by='sym', level=[1])
+
+ with pytest.raises(NotImplementedError):
+ tab.groupby(by=lambda x: x)
+ with pytest.raises(NotImplementedError):
+ tab.groupby(by='sym', observed=True)
+ with pytest.raises(NotImplementedError):
+ tab.groupby(by='sym', group_keys=False)
+ with pytest.raises(NotImplementedError):
+ tab.groupby(by='sym', axis=1)
+
+ arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot', 'Parrot'],
+ ['Captive', 'Wild', 'Captive', 'Wild', 'Wild']]
+ index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
+ df = pd.DataFrame({'Max Speed': [390., 350., 30., 20., 25.]},
+ index=index)
+ tab = kx.toq(df)
+
+ with pytest.raises(KeyError):
+ tab.groupby(level=[0, 4])
+
+
+def test_pandas_groupby(kx, q):
+ df = pd.DataFrame(
+ {
+ 'Animal': ['Falcon', 'Falcon', 'Parrot', 'Parrot'],
+ 'Max Speed': [380., 370., 24., 26.],
+ 'Max Altitude': [570., 555., 275., 300.]
+ }
+ )
+
+ tab = kx.toq(df)
+
+ assert all(
+ df.groupby(['Animal']).mean() == tab.groupby(kx.SymbolVector(['Animal'])).mean().pd()
+ )
+ assert df.groupby(['Animal']).ndim == tab.groupby(kx.SymbolVector(['Animal'])).ndim
+ assert all(
+ df.groupby(['Animal'], as_index=False).mean()
+ == tab.groupby(kx.SymbolVector(['Animal']), as_index=False).mean().pd()
+ )
+ assert all(
+ df.groupby(['Animal']).tail(1).reset_index(drop=True)
+ == tab.groupby(kx.SymbolVector(['Animal'])).tail(1).pd()
+ )
+ assert all(
+ df.groupby(['Animal']).tail(2)
+ == tab.groupby(kx.SymbolVector(['Animal'])).tail(2).pd()
+ )
+
+ df = pd.DataFrame(
+ [
+ ["a", 12, 12],
+ [None, 12.3, 33.],
+ ["b", 12.3, 123],
+ ["a", 1, 1]
+ ],
+ columns=["a", "b", "c"]
+ )
+ tab = kx.toq(df)
+
+ # NaN in column is filled when converted to q this unfills it and re-sorts it
+ assert q(
+ '{[x; y] x:update a:` from x where i=2; x: `a xasc x; x~y}',
+ df.groupby('a', dropna=False).sum(),
+ tab.groupby('a', dropna=False).sum()
+ )
+ assert q(
+ '{[x; y] x:update a:` from x where i=1; x~y}',
+ df.groupby('a', dropna=False, sort=False).sum(),
+ tab.groupby('a', dropna=False, sort=False).sum()
+ )
+ assert all(
+ df.groupby('a', dropna=False, as_index=False).sum()
+ == tab.groupby('a', dropna=False, as_index=False).sum().pd()
+ )
+
+ arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot', 'Parrot'],
+ ['Captive', 'Wild', 'Captive', 'Wild', 'Wild']]
+ index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
+ df = pd.DataFrame({'Max Speed': [390., 350., 30., 20., 25.]},
+ index=index)
+ tab = kx.toq(df)
+
+ assert all(
+ df.groupby(['Animal']).mean()
+ == tab.groupby(['Animal']).mean().pd()
+ )
+ assert all(
+ df.groupby(['Animal'], as_index=False).mean()
+ == tab.groupby(['Animal'], as_index=False).mean().pd()
+ )
+
+ assert all(
+ df.groupby(level=[1]).mean()
+ == tab.groupby(level=[1]).mean().pd()
+ )
+ assert all(
+ df.groupby(level=1, as_index=False).mean()
+ == tab.groupby(level=1, as_index=False).mean().pd()
+ )
+
+ assert all(
+ df.groupby(level=[0, 1]).mean()
+ == tab.groupby(level=[0, 1]).mean().pd()
+ )
+ assert all(
+ df.groupby(level=[0, 1], as_index=False).mean()
+ == tab.groupby(level=[0, 1], as_index=False).mean().pd()
+ )
+
+
+def test_keyed_loc_fixes(q):
+ mkt = q('([k1:`a`b`a;k2:100+til 3] x:til 3; y:`multi`keyed`table)')
+ assert q.keys(mkt['x']).py() == ['k1', 'k2']
+ assert q.value(mkt['x']).py() == {'x': [0, 1, 2]}
+ assert mkt[['x', 'y']].pd().equals(mkt.pd()[['x', 'y']])
+ assert mkt['a', 100].py() == {'x': [0], 'y': ['multi']}
+
+ with pytest.raises(KeyError):
+ mkt[['k1', 'y']]
+ with pytest.raises(KeyError):
+ mkt['k1']
+
+def test_pandas_count(q):
+ tab = q('([] k1: 0n 2 0n 2 0n ; k2: (`a;`;`b;`;`c))')
+ df = tab.pd()
+
+ # Assert axis = 1
+ qcount = tab.count(axis=1).py()
+ pcount = df.count(axis=1)
+
+ print(pcount)
+ assert int(qcount[0]) == int(pcount[0])
+ assert int(qcount[1]) == 1
+
+ # Assert axis = 0
+ qcount = tab.count().py()
+ pcount = df.count()
+
+ assert int(qcount["k1"]) == int(pcount["k1"])
+ assert int(qcount["k2"]) == 3
+
+ # Assert only numeric
+ qcount = tab.count(numeric_only = True).py()
+ pcount = df.count(numeric_only = True)
+
+ assert int(qcount["k1"]) == int(pcount["k1"])
+