diff --git a/README.md b/README.md index 29610de..4e3283d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# PyKX +# PyKX ## Introduction diff --git a/docs/examples/subscriber/archive.zip b/docs/examples/subscriber/archive.zip index 655bb7d..0e8323e 100644 Binary files a/docs/examples/subscriber/archive.zip and b/docs/examples/subscriber/archive.zip differ diff --git a/docs/examples/subscriber/readme.md b/docs/examples/subscriber/readme.md index 77e3178..f6cc736 100644 --- a/docs/examples/subscriber/readme.md +++ b/docs/examples/subscriber/readme.md @@ -45,7 +45,7 @@ q process is started. ```bash // run the subscriber which will automatically connect $ python subscriber.py -===== Initial Table ===== +===== Initital Table ===== a b --- 4 8 @@ -58,7 +58,7 @@ a b 2 1 1 8 8 5 -===== Initial Table ===== +===== Initital Table ===== ``` diff --git a/docs/examples/subscriber/subscriber.py b/docs/examples/subscriber/subscriber.py index e187d34..5ab019a 100644 --- a/docs/examples/subscriber/subscriber.py +++ b/docs/examples/subscriber/subscriber.py @@ -30,9 +30,9 @@ async def main_loop(q): async def main(): global table async with kx.RawQConnection(port=5001) as q: - print('===== Initial Table =====') + print('===== Initital Table =====') print(table) - print('===== Initial Table =====') + print('===== Initital Table =====') # Set the variable py_server on the q process pointing towards this processes IPC connection # We use neg to ensure the messages are sent async so no reply is expected from this process await q('py_server: neg .z.w') diff --git a/docs/examples/subscriber/subscriber_async.py b/docs/examples/subscriber/subscriber_async.py index 30b628d..4db8388 100644 --- a/docs/examples/subscriber/subscriber_async.py +++ b/docs/examples/subscriber/subscriber_async.py @@ -25,9 +25,9 @@ async def main_loop(q): async def main(): global table async with kx.RawQConnection(port=5001, event_loop=asyncio.get_event_loop()) as q: - print('===== Initial Table =====') + print('===== Initital Table =====') print(table) - print('===== Initial Table =====') + print('===== Initital Table =====') # Set the variable py_server on the q process pointing towards this processes IPC connection # We use neg to ensure the messages are sent async so no reply is expected from this process await q('py_server: neg .z.w') diff --git a/docs/examples/threaded_execution/archive.zip b/docs/examples/threaded_execution/archive.zip index 6e58739..07efa2b 100644 Binary files a/docs/examples/threaded_execution/archive.zip and b/docs/examples/threaded_execution/archive.zip differ diff --git a/docs/examples/threaded_execution/asyncio_threading.py b/docs/examples/threaded_execution/asyncio_threading.py index 9d96f06..b1931ba 100644 --- a/docs/examples/threaded_execution/asyncio_threading.py +++ b/docs/examples/threaded_execution/asyncio_threading.py @@ -31,9 +31,9 @@ async def main(): calls = 1000 conns = [await kx.RawQConnection(port=5001, event_loop=asyncio.get_event_loop()) for _ in range(N)] # noqa main_q_con = kx.SyncQConnection(port=5001) - print('===== Initial Table =====') + print('===== Initital Table =====') print(kx.q('table')) - print('===== Initial Table =====') + print('===== Initital Table =====') # Set the variable py_server on the q process pointing towards this processes IPC connection # We use neg to ensure the messages are sent async so no reply is expected from this process [await conns[i](f'py_server{i}: neg .z.w') for i in range(N)] diff --git a/docs/examples/threaded_execution/threading.md b/docs/examples/threaded_execution/threading.md index c3dbef4..031706c 100644 --- a/docs/examples/threaded_execution/threading.md +++ b/docs/examples/threaded_execution/threading.md @@ -31,7 +31,7 @@ upserted all of the rows they have recieved to the table the final table will be ``` $ python asyncio_threading.py -===== Initial Table ===== +===== Initital Table ===== a b --- 4 8 @@ -44,7 +44,7 @@ a b 2 1 1 8 8 5 -===== Initial Table ===== +===== Initital Table ===== a b ----- 4 8 diff --git a/docs/examples/threaded_execution/threads.py b/docs/examples/threaded_execution/threads.py index f93689e..374382e 100644 --- a/docs/examples/threaded_execution/threads.py +++ b/docs/examples/threaded_execution/threads.py @@ -33,9 +33,9 @@ async def main(): calls = 1000 conns = [await kx.RawQConnection(port=5001, event_loop=asyncio.get_event_loop()) for _ in range(N)] # noqa main_q_con = kx.SyncQConnection(port=5001) - print('===== Initial Table =====') + print('===== Initital Table =====') print(kx.q('table')) - print('===== Initial Table =====') + print('===== Initital Table =====') # Set the variable py_server on the q process pointing towards this processes IPC connection # We use neg to ensure the messages are sent async so no reply is expected from this process [await conns[i](f'py_server{i}: neg .z.w') for i in range(N)] diff --git a/docs/release-notes/changelog.md b/docs/release-notes/changelog.md index 59d61e6..85b044b 100644 --- a/docs/release-notes/changelog.md +++ b/docs/release-notes/changelog.md @@ -8,6 +8,16 @@ Currently PyKX is not compatible with Pandas 2.2.0 or above as it introduced breaking changes which cause data to be cast to the incorrect type. +## PyKX 2.3.2 + +#### Release Date + +2024-02-12 + +### Fixes and Improvements + +- Update of PyKX 4.0 linux shared object to version 2024.02.09, this update is to facilitate deployments on more secure linux/linux-arm environments. + ## PyKX 2.3.1 #### Release Date diff --git a/examples/notebooks/interface_overview.ipynb b/examples/notebooks/interface_overview.ipynb index 493e0ad..1623b7e 100644 --- a/examples/notebooks/interface_overview.ipynb +++ b/examples/notebooks/interface_overview.ipynb @@ -4,73 +4,54 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Interface Overview\n", - "The purpose of this notebook is to provide a demonstration of the capabilities of PyKX for users who are familiar with q.\n", + "# PyKX Introduction Notebook\n", "\n", - "To follow along please download this notebook using the following 'link.'\n", + "The purpose of this notebook is to provide an introduction to the capabilities and functionality made available to you with PyKX.\n", "\n", - "This demonstration will outline the following\n", + "To follow along please download this notebook using the following 'link.'\n", "\n", - "1. [Initializing the library](#initializing-the-library)\n", - "2. [Generating q objects](#creating-q-objects-from-python-objects)\n", - "3. [Converting q to Python](#converting-q-to-python)\n", - "4. [Interacting with q objects](#k-object-properties-and-methods)\n", - "5. [Context Interface](#context-interface)\n", - "6. [Querying Interface](#querying-interface)\n", - "7. [IPC communication](#ipc-communication)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Initializing the library" + "This Notebook is broken into the following sections\n", + "\n", + "1. [How to import PyKX](#How-to-import-Pykx)\n", + "1. [The basic data structures of PyKX](#The-basic-data-structures-of-PyKX)\n", + "1. [Accessing and creating PyKX objects](#Accessing-and-creating-PyKX-objects)\n", + "1. [Running analytics on objects in PyKX](#Running-analytics-on-objects-in-PyKX)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Non-PyKX Requirements\n", + "## Welcome to PyKX!\n", "\n", - "For the purpose of this demonstration the following Python libraries/modules are required" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import shutil\n", - "import sys\n", - "from tempfile import mkdtemp\n", + "PyKX is a Python library built and maintained for interfacing seamlessly with the worlds fastest time-series database technology kdb+ and it's underlying vector programming language q.\n", "\n", - "import numpy as np\n", - "import pandas as pd\n", - "import pyarrow as pa" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Initialization\n", + "It's aim is to provide you and all Python data-engineers and data-scientist with an interface to efficiently apply analytics on large volumes of on-disk and in-memory data, in a fraction of the time of competitor libraries.\n", + "\n", + "## How to import PyKX\n", "\n", - "Once installed via pip, PyKX can be started by importing the module. This will initialize embedded q within the Python process if a valid q license is found (e.g. in `$QHOME` or `$QLIC`), or fall back to the unlicensed version if no such license is found. This notebook will use the licensed version of PyKX. To force the usage of the unlicensed version (and silence the warning that is raised when the fallback to the unlicensed version is employed) you can add `--unlicensed` to the environment variable `$QARGS`. `$QARGS` can be set to a string of arguments which will be used to initialize the embedded q instance, as if you had used those arguments to start q from the command line." + "To access PyKX and it's functions import it in your Python code as follows" ] }, { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": ["hide_code"] + }, + "outputs": [], + "source": [ + "import os\n", + "os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation.\n" + ] + }, + { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + }, "outputs": [], "source": [ - "import warnings\n", - "warnings.filterwarnings('ignore') # Do not copy, as we are skipping symlinking pyKX to QHOME the core insights libraries will not be copied over and will raise warnings\n", - "os.environ['IGNORE_QHOME'] = '1' # Ignore symlinking PyKX q libraries to QHOME \n", - "os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation.\n", "import pykx as kx\n", "kx.q.system.console_size = [10, 80]" ] @@ -79,7 +60,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Evaluating q code using embedded q" + "The shortening of the import name to `kx` is done for readability of code that uses PyKX and is the intended standard for the library. As such we recommend that you always use `import pykx as kx` when using the library.\n", + "\n", + "Below we load additional libraries used through this notebook." ] }, { @@ -88,16 +71,23 @@ "metadata": {}, "outputs": [], "source": [ - "kx.q('1+1')" + "import numpy as np\n", + "import pandas as pd" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "kx.q('1 2 3 4f')" + "## The basic data structures of PyKX\n", + "\n", + "Central to your interaction with PyKX are the various data types that are supported by the library, fundamentally PyKX is built atop a fully featured functional programming language `q` which provides small footprint data structures that can be used in analytic calculations and the creation of highly performant databases. The types we show below are generated from Python equivalent types but as you will see through this notebook \n", + "\n", + "In this section we will describe the basic elements which you will come in contact with as you traverse the library and explain why/how they are different.\n", + "\n", + "### PyKX Atomic Types\n", + "\n", + "In PyKX an atom denotes a single irreducible value of a specific type, for example you may come across `pykx.FloatAtom` or `pykx.DateAtom` objects generated as follows which may have been generated as follows from an equivalent Pythonic representation. " ] }, { @@ -106,7 +96,7 @@ "metadata": {}, "outputs": [], "source": [ - "kx.q('([]2?1f;2?0Ng;2?0b)')" + "kx.FloatAtom(1.0)" ] }, { @@ -115,51 +105,23 @@ "metadata": {}, "outputs": [], "source": [ - "kx.q('`a`b`c!(til 10;`a`b`c;5?\"abc\")')" + "from datetime import date\n", + "kx.DateAtom(date(2020, 1, 1))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Creating q objects from Python objects\n", - "\n", - "One of the strengths of the PyKX interface is the flexibility in the representations of objects that can be converted from a native Python representation to a q equivalent.\n", + "### PyKX Vector Types\n", "\n", - "By default data formatted in Python using the following libraries can be converted to a q equivalent representation.\n", + "Similar to atoms, vectors are a data structure composed of a collection of multiple elements of a single specified type. These objects in PyKX along with lists described below form the basis for the majority of the other important data structures that you will encounter including dictionaries and tables.\n", "\n", - "* python native types\n", - "* numpy\n", - "* pandas\n", - "* pyarrow\n", + "Typed vector objects provide significant benefits when it comes to the applications of analytics over Python lists for example. Similar to Numpy, PyKX gains from the underlying speed of it's analytic engine when operating on these strictly typed objects.\n", "\n", - "These are all facilitated through use of the `K` method of the base `q` class shown before as follows\n", - "\n", - "#### Atomic Structures" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pyAtomic = 1.5\n", - "npAtomic = np.float64(1.5)\n", - "pdAtomic = pd.Series([1.5])\n", - "paAtomic = pa.array([1.5])\n", + "Vector type objects are always 1-D and as such are/can be indexed along a single axis.\n", "\n", - "print(kx.K(pyAtomic))\n", - "# print(kx.K(npAtomic))\n", - "# print(kx.K(pdAtomic))\n", - "# print(kx.K(paAtomic))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Array/Series Structures" + "In the following example we are creating PyKX vectors from common Python equivalent `numpy` and `pandas` objects." ] }, { @@ -168,23 +130,7 @@ "metadata": {}, "outputs": [], "source": [ - "pyArray = [1, 2.5, \"abc\", b'defg']\n", - "npArray = np.array([1, 2.5, \"abc\", b'defg'], dtype = object)\n", - "pdSeries = pd.Series([pyArray])\n", - "paArray = pa.array([1, 2, 3])\n", - "\n", - "print(kx.K(pyArray))\n", - "# print(kx.K(npArray))\n", - "# print(kx.K(pdSeries))\n", - "# print(kx.K(paArray))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Tabular data\n", - "Round trip support for tabular data is presently supported for Pandas Dataframes and PyArrow tables" + "kx.IntVector(np.array([1, 2, 3, 4], dtype=np.int32))" ] }, { @@ -193,40 +139,18 @@ "metadata": {}, "outputs": [], "source": [ - "pdtable = pd.DataFrame({'col1': [1, 2],\n", - " 'col2': [2., 3.],\n", - " 'col3': ['Hello', 'World']})\n", - "patable = pa.Table.from_pandas(pdtable)\n", - "\n", - "display(kx.K(pdtable))\n", - "# display(kx.K(patable))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---" + "kx.toq(pd.Series([1, 2, 3, 4]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Converting q to Python\n", - "All K objects support one or more of the following methods: `py()`, `np()`, `pd()` or `pa()`\n", + "### PyKX Lists\n", "\n", - "These methods provide an interface to the K object such that they can be converted to an analogous Python, Numpy, Pandas or PyArrow object respectively. \n", + "A `List` in PyKX can loosely be described as an untyped vector object. Unlike vectors which are optimised for the performance of analytics, lists are more commonly used for storing reference information or matrix data.\n", "\n", - "Whether the view is a copy or not varies:\n", - "\n", - "1. The 'py' property always provides a copy.\n", - "2. The 'np' property does not copy unless the data cannot be interpreted by Numpy properly without changing it. For example, all temporal types in Numpy take 64 bits per item, so the 32 bit q temporal types must be copied to be represented as Numpy 'datetime64'/'timedelta64' elements. In cases where copying is unacceptable, the raw keyword argument can be set to true as demonstrated below.\n", - "3. The 'pd' property leverages the 'np' property to create Pandas objects, as such the same restrictions apply to it.\n", - "4. The 'pa' property leverages the 'pd' property to create PyArrow objects, as such the same restrictions apply to it.\n", - "\n", - "### Atomic Conversions\n", - "Define q items for conversion" + "Unlike vector objects which are by definition 1-D in shape, lists can be ragged N-Dimensional objects. This makes them useful for the storage of some complex data structures but limits their performance when dealing with data-access/data modification tasks." ] }, { @@ -235,21 +159,16 @@ "metadata": {}, "outputs": [], "source": [ - "qbool = kx.q('0b')\n", - "qguid = kx.q('\"G\"$\"00000000-0000-0000-0000-000000000001\"')\n", - "qreal = kx.q('1.5e')\n", - "qlong = kx.q('1234')\n", - "qsymb = kx.q('`test')\n", - "qchar = kx.q('\"x\"')\n", - "qtime = kx.q('00:00:01')\n", - "qtstamp = kx.q('rand 0p')" + "kx.List([[1, 2, 3], [1.0, 1.1, 1.2], ['a', 'b', 'c']])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Convert the above items to a variety of the Python types. Change the method used to experiment as necessary" + "### PyKX Dictionaries\n", + "\n", + "A dictionary in PyKX is defined as a mapping between a direct key-value mapping, the list of keys and values to which they are associated must have the same count. While it can be considered as a key-value pair, it is physically stored as a pair of lists." ] }, { @@ -258,22 +177,27 @@ "metadata": {}, "outputs": [], "source": [ - "print(qbool.py())\n", - "print(qguid.pd())\n", - "print(qreal.np())\n", - "print(qlong.pa())\n", - "print(qsymb.py())\n", - "print(qchar.np())\n", - "print(qtime.pd())\n", - "print(qtstamp.np())" + "print(kx.Dictionary({'x': [1, 2, 3], 'x1': np.array([1, 2, 3])}))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Vector Conversions\n", - "Define q items for conversion" + "### PyKX Tables\n", + "\n", + "Tables in PyKX are a first-class typed entity which live in memory. They can be fundamentally described as a collection of named columns implemented as a dictionary. This mapping construct means that tables in PyKX are column-oriented which makes analytic operations on specified columns much faster than would be the case for a relational database equivalent.\n", + "\n", + "Tables in PyKX come in many forms but the key table types are as follows\n", + "\n", + "- `pykx.Table` \n", + "- `pykx.KeyedTable`\n", + "- `pykx.SplayedTable`\n", + "- `pykx.PartitionedTable`\n", + "\n", + "In this section we will deal only with the first two of these which constitute specifically the in-memory data table types. As will be discussed in later sections `Splayed` and `Partitioned` tables are memory-mapped on-disk data structures, these are derivations of the `pykx.Table` and `pykx.KeyedTable` type objects.\n", + "\n", + "#### `pykx.Table`" ] }, { @@ -282,21 +206,7 @@ "metadata": {}, "outputs": [], "source": [ - "qbool = kx.q('2?0b')\n", - "qguid = kx.q('2?0Ng')\n", - "qreal = kx.q('2?5e')\n", - "qlong = kx.q('2?100')\n", - "qsymb = kx.q('2?`4')\n", - "qchar = kx.q('\"testing\"')\n", - "qtime = kx.q('2?0t')\n", - "qtstamp = kx.q('2?0p')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Convert the above items to a variety of the Python types. Change the method used to experiment as necessary" + "print(kx.Table([[1, 2, 'a'], [2, 3, 'b'], [3, 4, 'c']], columns = ['col1', 'col2', 'col3']))" ] }, { @@ -305,22 +215,14 @@ "metadata": {}, "outputs": [], "source": [ - "print(qbool.py())\n", - "print(qguid.pd())\n", - "print(qreal.np())\n", - "print(qlong.pa())\n", - "print(qsymb.py())\n", - "print(qchar.np())\n", - "print(qtime.pd())\n", - "print(qtstamp.np())" + "print(kx.Table(data = {'col1': [1, 2, 3], 'col2': [2 , 3, 4], 'col3': ['a', 'b', 'c']}))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Dictionary conversions\n", - "Conversions between q dictionaries and Python are only supported for the `py()` method, numpy, pandas and pyarrow do not have appropriate equivalent representations and as such are not supported." + "#### `pykx.KeyedTable`" ] }, { @@ -329,20 +231,20 @@ "metadata": {}, "outputs": [], "source": [ - "qdict=kx.q('`x`y`z!(10?10e;10?0Ng;4?`2)')\n", - "qdict.py()" + "kx.Table(data = {'x': [1, 2, 3], 'x1': [2, 3, 4], 'x2': ['a', 'b', 'c']}).set_index(['x'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Table conversions\n", - "Conversions between q keyed and unkeyed tables to an appropriate Python representation are supported for the `py()`, `np()`, `pd()` and `pa()` methods.\n", + "### Other Data Types\n", "\n", - "Round trip conversions `q -> Python -> q` are however only supported for Pandas and PyArrow. Conversions from Numpy records are still to be completed and the most natural representation for a table in native python is a dictionary as such the conversion from python to q returns a q dictionary rather than a table\n", + "The above types outline the majority of the important type structures in PyKX but there are many others which you will encounter as you use the library, below we have outlined some of the important ones that you will run into through the rest of this notebook.\n", "\n", - "Define a q table containing all q data types for conversion" + "#### `pykx.Lambda`\n", + "\n", + "A `pykx.Lambda` is the most basic kind of function within PyKX. They take between 0 and 8 parameters and are the building blocks for most analytics written by users when interacting with data from PyKX." ] }, { @@ -351,19 +253,8 @@ "metadata": {}, "outputs": [], "source": [ - "kx.q('N:5')\n", - "kx.q('gen_data:{@[;0;string]x#/:prd[x]?/:(`6;`6;0Ng;.Q.a),(\"xpdmnuvtbhijef\"$\\:0)}') # noqa\n", - "kx.q('dset_1D:gen_data[enlist N]')\n", - "kx.q('gen_names:{\"dset_\",/:x,/:string til count y}')\n", - "\n", - "qtab = kx.q('flip (`$gen_names[\"tab\";dset_1D])!N#\\'dset_1D') " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Convert the above table to a pandas dataframe and pyarrow table" + "pykx_lambda = kx.q('{x+y}')\n", + "type(pykx_lambda)" ] }, { @@ -372,31 +263,18 @@ "metadata": {}, "outputs": [], "source": [ - "display(qtab.pd())\n", - "display(qtab.pa())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---" + "pykx_lambda(1, 2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## K Object Properties and Methods\n", - "\n", - "### Miscellaneous Methods\n", + "#### `pykx.Projection`\n", "\n", - "All K objects support the following methods/properties: \n", + "Similar to [functools.partial](https://docs.python.org/3/library/functools.html#functools.partial), functions in PyKX can have some of their parameters fixed in advance, resulting in a new function, which is called a projection. When this projection is called, the fixed parameters are no longer required, and cannot be provided.\n", "\n", - "| Method/Property | Description |\n", - "|:----------------|:------------|\n", - "| `t` | Return the q numeric datatype |\n", - "| `is_atom` | Is the item a q atomic type? |" + "If the original function had `n` total parameters, and it had `m` provided, the result would be a function (projection) that requires a user to input `n-m` parameters." ] }, { @@ -405,7 +283,8 @@ "metadata": {}, "outputs": [], "source": [ - "str(kx.q('([] til 3; `a`b`c)'))" + "projection = kx.q('{x+y}')(1)\n", + "projection" ] }, { @@ -414,16 +293,29 @@ "metadata": {}, "outputs": [], "source": [ - "repr(kx.q('\"this is a char vector\"'))" + "projection(2)" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "kx.q('`atom').is_atom" + "---\n", + "\n", + "## Accessing and creating PyKX objects\n", + "\n", + "Now that we have seen some of the PyKX object types that you will encounter, practically speaking how will they be created in real-world scenarios?\n", + "\n", + "### Creating PyKX objects from Pythonic data types\n", + "\n", + "One of the most common ways that PyKX data is generated is through conversions from equivalent Pythonic data types. PyKX natively supports conversions to and from the following common Python data formats.\n", + "\n", + "- Python\n", + "- Numpy\n", + "- Pandas\n", + "- PyArrow\n", + "\n", + "In each of the above cases generation of PyKX objects is facilitated through the use of the `kx.toq` PyKX function." ] }, { @@ -432,7 +324,8 @@ "metadata": {}, "outputs": [], "source": [ - "kx.q('`not`atom').is_atom" + "pydict = {'a': [1, 2, 3], 'b': ['a', 'b', 'c'], 'c': 2}\n", + "kx.toq(pydict)" ] }, { @@ -441,8 +334,8 @@ "metadata": {}, "outputs": [], "source": [ - "print(kx.q('([]10?1f;10?1f)').t)\n", - "print(kx.q('`a`b`c!1 2 3').t)" + "nparray = np.array([1, 2, 3, 4], dtype = np.int32)\n", + "kx.toq(nparray)" ] }, { @@ -451,16 +344,19 @@ "metadata": {}, "outputs": [], "source": [ - "# q list\n", - "qlist = kx.q('(1 2 3;1;\"abc\")')\n", - "list(qlist)" + "pdframe = pd.DataFrame(data = {'a':[1, 2, 3], 'b': ['a', 'b', 'c']})\n", + "kx.toq(pdframe)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Note the difference between this and the conversion of the same `qlist` to a true Python representation" + "### Random data generation\n", + "\n", + "PyKX provides users with a module for the creation of random data of user specified PyKX types or their equivalent Python types. The creation of random data is useful in prototyping analytics and is used extensively within our documentation when creating test examples.\n", + "\n", + "As a first example you can generate a list of 1,000,000 random floating point values between 0 and 1 as follows" ] }, { @@ -469,29 +365,14 @@ "metadata": {}, "outputs": [], "source": [ - "qlist.py()" + "kx.random.random(1000000, 1.0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Numerical comparisons/functions\n", - "Various q datatypes vectors/atoms/tables can also interact with native Python mathematical comparisons and functions, the following provides an outline of a subset of the comparisons/functions that are supported:\n", - "\n", - "| Function | Description |\n", - "|:---------|:------------|\n", - "| `abs` | Absolute value of a number |\n", - "| `<` | Less than |\n", - "| `>=` | Greater than or equal to |\n", - "| `+` | Addition |\n", - "| `-` | Subtraction |\n", - "| `/` | Division |\n", - "| `*` | Multiplication |\n", - "| `**` | Power |\n", - "| `%` | Modulo | \n", - "\n", - "#### Define q/Python atoms and lists for comparisons" + "If instead you wish to choose values randomly from a list, this can be facilitated by using the list as the second argument to your function" ] }, { @@ -500,17 +381,14 @@ "metadata": {}, "outputs": [], "source": [ - "qlong = kx.q('-5')\n", - "pylong = 5\n", - "qlist = kx.q('-3+til 5')\n", - "pylist = [1, 2, 3, 4, 5]" + "kx.random.random(5, [kx.LongAtom(1), ['a', 'b', 'c'], np.array([1.1, 1.2, 1.3])])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Apply a number of the above comparisons/functions to python/q objects in combination" + "Random data does not only come in 1-Dimensional forms however and modifications to the first argument to be a list allow you to create multi-Dimensional PyKX Lists. The below examples are additionally using a PyKX trick where nulls/infinities can be used to generate random data across the full allowable range" ] }, { @@ -519,8 +397,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(abs(qlong))\n", - "print(abs(qlist))" + "kx.random.random([2, 5], kx.GUIDAtom.null)" ] }, { @@ -529,32 +406,24 @@ "metadata": {}, "outputs": [], "source": [ - "print(qlong>pylong)\n", - "print(pylist>qlist)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(qlong*pylong)\n", - "print(pylist*qlist)" + "kx.random.random([2, 3, 4], kx.IntAtom.inf)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### The `raw` q -> Python conversion keyword argument" + "Finally, users can set the seed for the random data generation explicitly allowing users to have consistency over the generated objects. This can be completed globally or for individual function calls" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "All of the interfaces to the K objects support the `raw` keyword argument. When the `raw` keyword argument is set to `True` the interface forgoes some of the features when converting the object in exchange for greater efficiency." + "kx.random.seed(10)\n", + "kx.random.random(10, 2.0)" ] }, { @@ -563,16 +432,18 @@ "metadata": {}, "outputs": [], "source": [ - "tab = kx.q('([]10?1f;10?1f;10?0p;10?0Ng)')" + "kx.random.random(10, 2.0, seed = 10)" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "tab.pd()" + "### Running q code to generate data\n", + "\n", + "As mentioned in the introduction PyKX provides an entrypoint to the vector programming language q, as such users of PyKX can execute q code directly via PyKX within a Python session. This is facilitated through use of calls to `kx.q`.\n", + "\n", + "Create some q data:" ] }, { @@ -581,7 +452,7 @@ "metadata": {}, "outputs": [], "source": [ - "tab.pd(raw=True)" + "kx.q('0 1 2 3 4')" ] }, { @@ -590,16 +461,14 @@ "metadata": {}, "outputs": [], "source": [ - "qvec = kx.q('10?0t')" + "kx.q('([idx:desc til 5]col1:til 5;col2:5?1f;col3:5?`2)')" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "qvec.np()" + "Apply arguments to a user specified function `x+y`" ] }, { @@ -608,19 +477,16 @@ "metadata": {}, "outputs": [], "source": [ - "qvec.np(raw=True)" + "kx.q('{x+y}', kx.LongAtom(1), kx.LongAtom(2))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Editing K objects\n", - "One of the expected aspects of interacting with Python objects natively is being able to index, slice, compare and modify the objects when it is reasonable to do so.\n", + "### Read data from a CSV file\n", "\n", - "The following sections show the interaction of a user with a q vector and table\n", - "\n", - "#### Vectors" + "A lot of data that you run into for data analysis tasks comes in the form of CSV files, PyKX similar to Pandas provides a CSV reader called via `kx.q.read.csv`, in the following cell we will create a CSV to be read in using PyKX" ] }, { @@ -629,15 +495,25 @@ "metadata": {}, "outputs": [], "source": [ - "v = kx.q('12?100')\n", - "print(v)" + "import csv\n", + "\n", + "with open('pykx.csv', 'w', newline='') as file:\n", + " writer = csv.writer(file)\n", + " field = [\"name\", \"age\", \"height\", \"country\"]\n", + " \n", + " writer.writerow(field)\n", + " writer.writerow([\"Oladele Damilola\", \"40\", \"180.0\", \"Nigeria\"])\n", + " writer.writerow([\"Alina Hricko\", \"23\", \"179.2\", \"Ukraine\"])\n", + " writer.writerow([\"Isabel Walter\", \"50\", \"179.5\", \"United Kingdom\"])" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "Get the element at index 2" + "kx.q.read.csv('pykx.csv', types = {'age': kx.LongAtom, 'country': kx.SymbolAtom})" ] }, { @@ -646,14 +522,19 @@ "metadata": {}, "outputs": [], "source": [ - "v[2]" + "import os\n", + "os.remove('pykx.csv')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Retrieve a slice containing elements 3-5" + "### Querying external Processes via IPC\n", + "\n", + "One of the most common usage patterns in organisations with access to data in kdb+/q you will encounter is to query this data from an external server process infrastructure. In the example below we assume that you have q installed in addition to PyKX, see [here](https://kx.com/kdb-insights-personal-edition-license-download/) to install q alongside the license access for PyKX.\n", + "\n", + "First we set up a q/kdb+ server setting it on port 5050 and populating it with some data in the form of a table `tab`" ] }, { @@ -662,14 +543,26 @@ "metadata": {}, "outputs": [], "source": [ - "v[3:6]" + "import subprocess\n", + "import time\n", + "\n", + "try:\n", + " proc = subprocess.Popen(\n", + " ('q', '-p', '5000'),\n", + " stdin=subprocess.PIPE,\n", + " stdout=subprocess.DEVNULL,\n", + " stderr=subprocess.DEVNULL,\n", + " )\n", + " time.sleep(2)\n", + "except:\n", + " raise kx.QError('Unable to create q process on port 5000')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Compare all vector elements to 50" + "Once a q process is available you can establish a connection to it for synchronous query execution as follows" ] }, { @@ -678,16 +571,14 @@ "metadata": {}, "outputs": [], "source": [ - "v < 50" + "conn = kx.SyncQConnection(port = 5000)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Tables\n", - "\n", - "This only applies to in-memory tables" + "You can now run q commands against the q server" ] }, { @@ -696,17 +587,15 @@ "metadata": {}, "outputs": [], "source": [ - "tab = kx.q('([]4?5;4?`2;4?0p;4?0Ng)')\n", - "tab.pd()" + "conn('tab:([]col1:100?`a`b`c;col2:100?1f;col3:100?0Ng)')\n", + "conn('select from tab where col1=`a')" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "tab['x1']" + "Or use the PyKX query API" ] }, { @@ -715,20 +604,14 @@ "metadata": {}, "outputs": [], "source": [ - "tab['x2'].py()" + "conn.qsql.select('tab', where=['col1=`a', 'col2<0.3'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Splayed and Partitioned Tables\n", - "\n", - "Splayed and Partitioned tables are at present only partially supported. Users will be able to query the data and access information around the columns through the `keys` method but will not be able to retrieve the values contained within the data or convert to an analogous Python representation. These will raise a `NotImplementedError`.\n", - "\n", - "Research on this is still pending and any changes to support these conversions will be include an update here\n", - "\n", - "#### Splayed Tables" + "Or use PyKX's context interface to run SQL server side if it's available to you" ] }, { @@ -737,19 +620,15 @@ "metadata": {}, "outputs": [], "source": [ - "tmp_dir = mkdtemp()\n", - "orig_dir = os.getcwd()\n", - "os.chdir(tmp_dir)\n", - "kx.q('`:db/t/ set ([] a:til 3; b:\"xyz\"; c:-3?0Ng)')\n", - "kx.q(r'\\l db')\n", - "t_splayed = kx.q('t')" + "conn('\\l s.k_')\n", + "conn.sql('SELECT * FROM tab where col2>=0.5')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "List the columns that are represented in the splayed table" + "Finally the q server used for this demonstration can be shut down" ] }, { @@ -758,30 +637,37 @@ "metadata": {}, "outputs": [], "source": [ - "list(t_splayed.keys())" + "proc.stdin.close()\n", + "proc.kill()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Query the Splayed table" + "---" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "kx.q('?[`t;enlist(=;`a;1);0b;()]')" + "## Running analytics on objects in PyKX\n", + "\n", + "Like many Python libraries including Numpy and Pandas PyKX provides a number of ways that it's data can be used with analytics defined internal to the library and which you have self generated.\n", + "\n", + "### Using in-built methods on PyKX Vectors\n", + "\n", + "When you are interacting with PyKX Vectors you may wish to gain insights into these objects through the application of basic analytics such as calculation of the `mean`/`median`/`mode` of the vector" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "Attempt to evaluate the values method on the table" + "q_vector = kx.random.random(1000, 10.0)" ] }, { @@ -790,10 +676,7 @@ "metadata": {}, "outputs": [], "source": [ - "try:\n", - " t_splayed.values()\n", - "except NotImplementedError:\n", - " print('NotImplementedError was raised', file=sys.stderr)" + "q_vector.mean()" ] }, { @@ -802,15 +685,14 @@ "metadata": {}, "outputs": [], "source": [ - "os.chdir(orig_dir)\n", - "shutil.rmtree(tmp_dir)" + "q_vector.max()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Partitioned Tables" + "The above is useful for basic analysis but will not be sufficient for more bespoke analytics on these vectors, to allow you more control over the analytics run you can also use the `apply` method." ] }, { @@ -819,22 +701,21 @@ "metadata": {}, "outputs": [], "source": [ - "tmp_dir = mkdtemp()\n", - "orig_dir = os.getcwd()\n", - "os.chdir(tmp_dir)\n", - "kx.q('`:db/2020.01/t/ set ([] a:til 3; b:\"xyz\"; c:-3?0Ng)')\n", - "kx.q('`:db/2020.02/t/ set ([] a:1+til 3; b:\"cat\"; c:-3?0Ng)')\n", - "kx.q('`:db/2020.03/t/ set ([] a:2+til 3; b:\"bat\"; c:-3?0Ng)')\n", - "kx.q(r'\\l db')\n", - "t_partitioned = kx.q('t')\n", - "t_partitioned" + "def bespoke_function(x, y):\n", + " return x*y\n", + "\n", + "q_vector.apply(bespoke_function, 5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "List partitioned table columns" + "### Using in-built methods on PyKX Tables\n", + "\n", + "In addition to the vector processing capabilities of PyKX your ability to operate on Tabular structures is also important. Highlighted in greater depth within the Pandas-Like API documentation [here](../user-guide/advanced/Pandas_API.ipynb) these methods allow you to apply functions and gain insights into your data in a way that is familiar.\n", + "\n", + "In the below example you will use combinations of the most commonly used elements of this Table API operating on the following table" ] }, { @@ -843,14 +724,21 @@ "metadata": {}, "outputs": [], "source": [ - "list(t_partitioned.keys())" + "N = 1000000\n", + "example_table = kx.Table(data = {\n", + " 'sym' : kx.random.random(N, ['a', 'b', 'c']),\n", + " 'col1' : kx.random.random(N, 10.0),\n", + " 'col2' : kx.random.random(N, 20)\n", + " }\n", + ")\n", + "example_table" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Query partitioned table" + "You can search for and filter data within your tables using `loc` similarly to how this is achieved by Pandas as follows" ] }, { @@ -859,26 +747,14 @@ "metadata": {}, "outputs": [], "source": [ - "kx.q('?[`t;enlist(=;`a;1);0b;enlist[`c]!enlist`c]')" + "example_table.loc[example_table['sym'] == 'a']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Attempt to convert partitioned table to a pandas dataframe" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " t_partitioned.pd()\n", - "except NotImplementedError:\n", - " pass" + "This behavior also is incorporated when retrieving data from a table through the `__get__` method as you can see here" ] }, { @@ -887,19 +763,14 @@ "metadata": {}, "outputs": [], "source": [ - "os.chdir(orig_dir)\n", - "shutil.rmtree(tmp_dir)" + "example_table[example_table['sym'] == 'b']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### q Functions\n", - "\n", - "All functions defined in q can be called from PyKX via function objects. These function calls can take Python or q objects as input arguments. It is required that each argument being supplied to the function be convertible to a q representation using `kx.K(arg)`.\n", - "\n", - "Arguments can be provided either positionally, or as keyword arguments when the q function has named parameters." + "You can additionally set the index columns of the table, when dealing with PyKX tables this converts the table from a `pykx.Table` object to a `pykx.KeyedTable` object" ] }, { @@ -908,16 +779,14 @@ "metadata": {}, "outputs": [], "source": [ - "f = kx.q('{x*y+z}')" + "example_table.set_index('sym')" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "f(12, 2, 1)" + "Additional to basic data manipulation such as index setting you also get access to analytic capabilities such as the application of basic data manipulation operations such as `mean` and `median` as demonstrated here" ] }, { @@ -926,16 +795,20 @@ "metadata": {}, "outputs": [], "source": [ - "f(12, 2, 1).py()" + "print('mean:')\n", + "print(example_table.mean(numeric_only = True))\n", + "\n", + "print('median:')\n", + "print(example_table.median(numeric_only = True))" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "g = kx.q('{[arg1;arg2] deltas sum each arg1 cross til arg2}')" + "You can make use of the `groupby` method which groups the PyKX tabular data which can then be used for analytic application.\n", + "\n", + "In your first example let's start by grouping the dataset based on the `sym` column and then calculating the `mean` for each column based on their `sym`" ] }, { @@ -944,18 +817,14 @@ "metadata": {}, "outputs": [], "source": [ - "g(arg2=7, arg1=kx.q('3?45')).np()" + "example_table.groupby('sym').mean()" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "tok = kx.q(\"$'\")\n", - "print(repr(tok))\n", - "print(str(tok))" + "As an extension to the above groupby you can now consider a more complex example which is making use of `numpy` to run some calculations on the PyKX data, you will see later that this can be simplified further in this specific use-case" ] }, { @@ -964,60 +833,74 @@ "metadata": {}, "outputs": [], "source": [ - "tok(kx.q('\"B\"'), kx.q('\" \",.Q.an')).np()" + "def apply_func(x):\n", + " nparray = x.np()\n", + " return np.sqrt(nparray).mean()\n", + "\n", + "example_table.groupby('sym').apply(apply_func)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "---" + "Time-series specific joining of data can be completed using `merge_asof` joins. In this example a number of tables with temporal information namely a `trades` and `quotes` table" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "## Context Interface\n", - "\n", - "The context interface provides a convenient way to interact with q contexts and namespaces using either the embedded q instance `pykx.q` or an IPC connection made with `pykx.QConnection`.\n", - "\n", - "Accessing an attribute which is not defined via the context interface, but which corresponds to a script (i.e. a `.q` or `.k` file), will cause it to be loaded automatically. Scripts are search for if they are:\n", - "1. In the same directory as the process running PyKX\n", - "2. In `QHOME`\n", - "\n", - "Other paths can be searched for by appending them to `kx.q.paths`. Alternatively, you can manually load a script with `kx.q.ctx._register`.\n", + "trades = kx.Table(data={\n", + " \"time\": [\n", + " pd.Timestamp(\"2016-05-25 13:30:00.023\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.023\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.030\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.041\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.049\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.072\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.075\")\n", + " ],\n", + " \"ticker\": [\n", + " \"GOOG\",\n", + " \"MSFT\",\n", + " \"MSFT\",\n", + " \"MSFT\",\n", + " \"GOOG\",\n", + " \"AAPL\",\n", + " \"GOOG\",\n", + " \"MSFT\"\n", + " ],\n", + " \"bid\": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],\n", + " \"ask\": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03]\n", + "})\n", + "quotes = kx.Table(data={\n", + " \"time\": [\n", + " pd.Timestamp(\"2016-05-25 13:30:00.023\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.038\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.048\")\n", + " ],\n", + " \"ticker\": [\"MSFT\", \"MSFT\", \"GOOG\", \"GOOG\", \"AAPL\"],\n", + " \"price\": [51.95, 51.95, 720.77, 720.92, 98.0],\n", + " \"quantity\": [75, 155, 100, 100, 100]\n", + "})\n", "\n", - "Functions which are registered via the context interface are automatically added as callable members of their `QContext`." + "print('trades:')\n", + "display(trades)\n", + "print('quotes:')\n", + "display(quotes)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Builtin namespaces\n", - "\n", - "As a result of the infrastructure outlined above there are a number of namespaces which are automatically added as extensions to the q base class on loading. This includes the `.q`, `.z`, `.Q` and `.j` namespaces contained within `kx.q.k`, the following provides some example invocations of each.\n", - "\n", - "A number of the functions contained within the .z namespace are not callable, including but not limited to the following:\n", - "\n", - "- .z.ts\n", - "- .z.ex\n", - "- .z.ey\n", - "\n", - "Run `dir(kx.q.z)` to see what is available in the `.z` namespace.\n", - "\n", - "#### .q functionality\n", - "All the functions a user would expect to be exposed from q are callable as python methods off the q base class, the following provides a limited number of example invocations" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(kx.q.til(10))" + "When applying the asof join you can additionally used named arguments to ensure that it is possible to make a distinction between the tables that the columns originate. In this case suffixing with `_trades` and `_quotes`" ] }, { @@ -1026,16 +909,26 @@ "metadata": {}, "outputs": [], "source": [ - "print(kx.q.max([100, 2, 3, -4]))" + "trades.merge_asof(quotes, on='time', suffixes=('_trades', '_quotes'))" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "print(kx.q.mavg(4, kx.q.til(10)))" + "### Using PyKX/q native functions\n", + "\n", + "While use of the Pandas-Like API and methods provided off PyKX Vectors provides an effective method of applying analytics on PyKX data the most efficient and performant way you can run analytics on your data is through the use of the PyKX/q primitives which are available through the `kx.q` module.\n", + "\n", + "These include functionality for the calculation of moving averages, application of asof/window joins, column reversal etc. A full list of the available functions and some examples of their usage can be found [here](../api/pykx-execution/q.md).\n", + "\n", + "Here are a few examples of usage of how you can use these functions, broken into sections for convenience\n", + "\n", + "#### Mathematical functions\n", + "\n", + "##### mavg\n", + "\n", + "Calculate a series of average values across a list using a rolling window" ] }, { @@ -1044,17 +937,16 @@ "metadata": {}, "outputs": [], "source": [ - "print(kx.q.tables())" + "kx.q.mavg(10, kx.random.random(10000, 2.0))" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "s = kx.q('([]a:1 2;b:2 3;c:5 7)')\n", - "s" + "##### cor\n", + "\n", + "Calculate the correlation between two lists" ] }, { @@ -1063,8 +955,7 @@ "metadata": {}, "outputs": [], "source": [ - "t = kx.q('([]a:1 2 3;b:2 3 7;c:10 20 30;d:\"ABC\")').pd()\n", - "t" + "kx.q.cor([1, 2, 3], [2, 3, 4])" ] }, { @@ -1073,17 +964,16 @@ "metadata": {}, "outputs": [], "source": [ - "kx.q.uj(s,t)" + "kx.q.cor(kx.random.random(100, 1.0), kx.random.random(100, 1.0))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### `.Q` namespace\n", - "The functions within the `.Q` namespace are also exposed as an extension.\n", + "##### prds\n", "\n", - "**Note**: While all functions within the `.Q` namespace are available, compared to the `.q`/`.z` namespaces these functions can be complicated to implement within the constraints of the PyKX interface for example `.Q.dpft` can be implemented but requires some thought" + "Calculate the cumulative product across a supplied list" ] }, { @@ -1092,16 +982,18 @@ "metadata": {}, "outputs": [], "source": [ - "kx.q.Q" + "kx.q.prds([1, 2, 3, 4, 5])" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "kx.q.Q.an" + "#### Iteration functions\n", + "\n", + "##### each\n", + "\n", + "Supplied both as a standalone primitive and as a method for PyKX Lambdas `each` allows you to pass individual elements of a PyKX object to a function" ] }, { @@ -1110,7 +1002,7 @@ "metadata": {}, "outputs": [], "source": [ - "kx.q.Q.btoa(b'Hello World!')" + "kx.q.each(kx.q('{prd x}'), kx.random.random([5, 5], 10.0, seed=10))" ] }, { @@ -1119,15 +1011,18 @@ "metadata": {}, "outputs": [], "source": [ - "t = kx.q('([]a:3 4 5;b:\"abc\";c:(2;3.4 3.2;\"ab\"))')\n", - "kx.q.each(kx.q.Q.ty, t['a','b','c'])" + "kx.q('{prd x}').each(kx.random.random([5, 5], 10.0, seed=10))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### `.j` namespace" + "#### Table functions\n", + "\n", + "##### meta\n", + "\n", + "Retrieval of metadata information about a table" ] }, { @@ -1136,9 +1031,11 @@ "metadata": {}, "outputs": [], "source": [ - "json = b'{\"x\":1, \"y\":\"test\"}'\n", - "qdict = kx.q.j.k(json)\n", - "print(qdict)" + "qtab = kx.Table(data = {\n", + " 'x' : kx.random.random(1000, ['a', 'b', 'c']).grouped(),\n", + " 'y' : kx.random.random(1000, 1.0),\n", + " 'z' : kx.random.random(1000, kx.TimestampAtom.inf)\n", + "})" ] }, { @@ -1147,46 +1044,16 @@ "metadata": {}, "outputs": [], "source": [ - "kx.q.j.j(qdict).py()" + "kx.q.meta(qtab)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### User defined extensions\n", - "As alluded to above users can add their own extension modules to PyKX by placing a relevant `.q`/`.k` to their `$QHOME`. The following shows the addition of an extension to complete a specific query and set some data which we would like to be available.\n", + "##### xasc\n", "\n", - "#### Extension Example\n", - "The following example we will create (and later delete) the file '$QHOME/demo_extension.q'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "demo_extension_source = '''\n", - "\\d .demo_extension\n", - "N:100\n", - "test_data:([]N?`a`b`c;N?1f;N?10;N?0b)\n", - "test_function:{[data]\n", - " analytic_keys :`max_x1`avg_x2`med_x3;\n", - " analytic_calcs:(\n", - " (max;`x1);\n", - " (avg;`x2);\n", - " (med;`x3));\n", - " ?[data;\n", - " ();\n", - " k!k:enlist `x;\n", - " analytic_keys!analytic_calcs\n", - " ]\n", - " }\n", - "'''\n", - "demo_extension_filename = kx.qhome/'demo_extension.q'\n", - "with open(demo_extension_filename, 'w') as f:\n", - " f.write(demo_extension_source)\n" + "Sort the contents of a specified column in ascending order" ] }, { @@ -1195,535 +1062,7 @@ "metadata": {}, "outputs": [], "source": [ - "kx.q.demo_extension.test_data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.demo_extension.test_function" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.demo_extension.test_function(kx.q.demo_extension.test_data)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "os.remove(demo_extension_filename)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "--- " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Querying Interface\n", - "\n", - "One of the core purposes of this module is to provide users who are unfamiliar with q with a Pythonic approaches to interacting with q objects.\n", - "\n", - "One of the ways this is intended to be achieved is to provide Pythonic wrappers around common q tasks in a way that feels familiar to a Python developer but is still efficient/flexible.\n", - "\n", - "The querying interface is an example of this. It provides a wrapper around the q functional select syntax to facilitate the querying of persisted and local data while also allowing Python objects to be used as inputs where it is relevant.\n", - "\n", - "### help is provided\n", - "Users can use the Python `help` function to display the docstring associated with each of the functions within the `query` module" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# help(kx.q.qsql)\n", - "# help(kx.q.qsql.select)\n", - "# help(kx.q.qsql.exec)\n", - "# help(kx.q.qsql.update)\n", - "# help(kx.q.qsql.delete)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Select functionality\n", - "The select functionality is provided both as an individually callable function or as a method off all tabular data.\n", - "\n", - "Generate a table and assign the Python object as a named entity within the q memory space." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "qtab = kx.q('([]col1:100?`a`b`c;col2:100?1f;col3:100?5)')\n", - "kx.q['qtab'] = qtab" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Retrieve the entirety of the table using an empty select" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.qsql.select(qtab)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Retrieve the entire table using the module function" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.qsql.select(qtab)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Retrieve the entire table based on a named reference\n", - "\n", - "This is important because it provides a method of querying partitioned/splayed tables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.qsql.select('qtab')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**The where keyword**\n", - "\n", - "Where clauses can be provided as a named keyword and are expected to be formatted as an individual string or a list of strings as in the following examples.\n", - "\n", - "By default no where conditions are applied to a select query" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# kx.q.qsql.select(qtab, where='col1=`a')\n", - "kx.q.qsql.select(qtab, where=['col3<0.5', 'col2>0.7'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**The columns keyword**\n", - "\n", - "The columns keyword is used to apply analytics to specific columns of the data or to select and rename columns within the dataset.\n", - "\n", - "By default if a user does not provide this information it is assumed that all columns are to be returned without modification.\n", - "\n", - "The columns keyword is expected to be a dictionary mapping the name that the new table will display for the column to the logic with which this data is modified." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.qsql.select(qtab, columns={'col1': 'col1','newname': 'col2'})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.qsql.select(qtab, columns={'max_col2': 'max col2'}, where='col1=`a')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**The by keyword**\n", - "\n", - "The by keyword is used to apply analytics to group data based on common characteristics.\n", - "\n", - "By default if a user does not provide this information it is assumed that no grouping ins applied.\n", - "\n", - "The by keyword is expected to be a dictionary mapping the name to be applied to the by clause of the grouping to the column of the original table which is being used for the grouping." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.qsql.select(\n", - " qtab,\n", - " columns={'minCol2': 'min col2', 'medCol3': 'med col3'},\n", - " by={'groupCol1': 'col1'},\n", - " where=['col3<0.5', 'col2>0.7']\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Delete functionality\n", - "The delete functionality is provided both as an individually callable function or as a method off all tabular data. \n", - "\n", - "The following provides a outline of how this can be invoked in both cases.\n", - "\n", - "**Note**: By default the delete functionality **does not** modify the underlying representation of the table. This is possible under limited circumstances as is outline in a later section below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.qsql.delete(qtab)\n", - "kx.q.qsql.delete('qtab')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**The columns keyword**\n", - "\n", - "The columns keyword is used to denote the columns that are to be deleted from a table.\n", - "\n", - "By default if a user does not provide this information it is assumed that all columns are to be deleted.\n", - "\n", - "The columns keyword is expected to be a string or list of strings denoting the columns to be deleted.\n", - "\n", - "**Note**: The columns and where clause can not be used in the same function call, this is not supported by the underlying functional delete." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# kx.q.qsql.delete(qtab, columns = 'col3')\n", - "kx.q.qsql.delete(qtab, columns = ['col1','col2'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**The where keyword**\n", - "\n", - "The where keyword is used to filter rows of the data to be deleted.\n", - "\n", - "By default if no where condition is supplied it is assumed that all rows of the dataset are to be deleted.\n", - "\n", - "The where keyword is expected when not default to be a string on which to apply the filtering\n", - "\n", - "**Note**: The columns and where clause can not be used in the same function call, this is not supported by the underlying functional delete." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.qsql.delete(qtab, where='col1 in `a`b')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**The modify keyword**\n", - "\n", - "The modify keyword is used when the user intends for the underlying representation of a named entity within the q memory space to be modified. This is only applicable when calling the function via the `kx.q.qsql.delete` representation of the function.\n", - "\n", - "By default the underlying representation is not modified with `modify=False` in order to change the underlying representation a user must set `modify=True`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q.qsql.delete('qtab', where = 'col1=`c', modify=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kx.q('qtab')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Update and exec functionality\n", - "\n", - "Both the q functional update and exec functionality are supported by this interface. For brevity they are not shown in the same detail as the previous examples" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# kx.q.qsql.exec(qtab, 'col1')\n", - "# kx.q.qsql.exec(qtab, columns='col2', by='col1')\n", - "kx.q.qsql.exec(qtab, columns={'avgCol3': 'avg col3'}, by='col1')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# kx.q.qsql.update({'avg_col2':'avg col2'}, by={'col1': 'col1'})\n", - "# kx.q.qsql.update({'col3':100}, where='col1=`a')\n", - "kx.q.qsql.update('qtab', {'col2': 4.2}, 'col1=`b', modify=True)\n", - "kx.q['qtab']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## IPC Communication\n", - "\n", - "This module also provides users with the ability to retrieve data from remote q processes. This is supported in the absence and presence of a valid q license.\n", - "\n", - "More documentation including exhaustive lists of the functionality available can be found in the [`IPC`](../api/ipc.html) documentation." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Establishing a Connection\n", - "Connections to external q processes are established using the `pykx.QConnection` class. On initialization the instance of this class will establish a connection to the specified q process using the provided connection information (e.g. `host`, `port`, `username`, `password`, etc.). Refer to the PyKX IPC module documentation for more details about this interface, or run `help(pykx.QConnection)`.\n", - "\n", - "### IPC Example\n", - "The following is a basic example of this functionality a more complex subscriber/publisher example is provided in `examples/ipc/`\n", - "\n", - "This example will work in the presence or absence of a valid q license \n", - "\n", - "#### Create the external q process\n", - "To run this example, the Python code in the following cell will do the equivalent to executing the following in a terminal:\n", - "\n", - "```\n", - "$ q -p 5000\n", - "q)tab:([]100?`a`b`c;100?1f;100?0Ng)\n", - "q).z.ps:{[x]0N!(`.z.ps;x);value x}\n", - "q).z.pg:{[x]0N!(`.z.pg;x);value x}\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import subprocess\n", - "import time\n", - "proc = subprocess.Popen(\n", - " ('q', '-p', '5000'),\n", - " stdin=subprocess.PIPE,\n", - " stdout=subprocess.DEVNULL,\n", - " stderr=subprocess.DEVNULL,\n", - ")\n", - "proc.stdin.write(b'tab:([]100?`a`b`c;100?1f;100?0Ng)\\n')\n", - "proc.stdin.write(b'.z.ps:{[x]0N!(`.z.ps;x);value x}\\n')\n", - "proc.stdin.write(b'.z.pg:{[x]0N!(`.z.pg;x);value x}\\n')\n", - "proc.stdin.flush()\n", - "time.sleep(2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Open a connection to this process" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Normally a `with` block would be used for proper context management, but for the sake of this example the connection will be accessed and closed directly\n", - "conn = kx.QConnection('localhost', 5000)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Make a simple synchronous request" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "qvec = conn('2+til 2')\n", - "qvec" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Make a simple asynchronous request" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "conn('setVec::10?1f', wait=False)\n", - "setVec = conn('setVec')\n", - "setVec" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Run a defined function server side with provided arguments" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pytab = pd.DataFrame({'col1': [1, 2, 3], 'col2': [4, 5, 6]})\n", - "conn('{[table;column;rows]rows#column#table}', pytab, ['col1'], 1).pd()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "conn('{[table;column]newtab::table column}', pytab, 'col1', wait=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "conn('newtab').np()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Disconnect from the q process" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "conn.close()\n", - "# This happens automatically when you leave a `with` block that is managing a connection, or when a connection is garbage-collected." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Shutdown the q process we were connected to for the IPC demo\n", - "proc.stdin.close()\n", - "proc.kill()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---" + "kx.q.xasc('z', qtab)" ] } ], @@ -1744,7 +1083,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5" + "version": "3.8.3" }, "mimetype": "text/x-python", "name": "python", @@ -1753,5 +1092,5 @@ "version": 3 }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/examples/subscriber/readme.md b/examples/subscriber/readme.md index 71876fc..6c605a6 100644 --- a/examples/subscriber/readme.md +++ b/examples/subscriber/readme.md @@ -40,7 +40,7 @@ q process is started. ```bash // run the subscriber which will automatically connect $ python subscriber.py -===== Initial Table ===== +===== Initital Table ===== a b --- 4 8 @@ -53,7 +53,7 @@ a b 2 1 1 8 8 5 -===== Initial Table ===== +===== Initital Table ===== ``` diff --git a/examples/subscriber/subscriber.py b/examples/subscriber/subscriber.py index e187d34..5ab019a 100644 --- a/examples/subscriber/subscriber.py +++ b/examples/subscriber/subscriber.py @@ -30,9 +30,9 @@ async def main_loop(q): async def main(): global table async with kx.RawQConnection(port=5001) as q: - print('===== Initial Table =====') + print('===== Initital Table =====') print(table) - print('===== Initial Table =====') + print('===== Initital Table =====') # Set the variable py_server on the q process pointing towards this processes IPC connection # We use neg to ensure the messages are sent async so no reply is expected from this process await q('py_server: neg .z.w') diff --git a/src/pykx/lib/l64/libq.so b/src/pykx/lib/l64/libq.so index 2213c2e..4c24bc9 100755 Binary files a/src/pykx/lib/l64/libq.so and b/src/pykx/lib/l64/libq.so differ diff --git a/src/pykx/lib/l64arm/libq.so b/src/pykx/lib/l64arm/libq.so index e4c4fd0..be29981 100755 Binary files a/src/pykx/lib/l64arm/libq.so and b/src/pykx/lib/l64arm/libq.so differ