diff --git a/.gitignore b/.gitignore
index 0b3d7b2..499167d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -70,6 +70,14 @@ __pycache__/
**/4-1-libs/*.k_
**/4-1-libs/*.k
!**/4-1-libs/q.k
+**/4-1-libs/**/libe.so
+**/4-1-libs/**/e.*
+
+#ToDo revert in KXI-44621
+src/pykx/lib/4-1-libs/l64/kxreaper
+src/pykx/lib/4-1-libs/l64/libkurl.so
+src/pykx/lib/4-1-libs/l64/libobjstor.so
+src/pykx/lib/4-1-libs/l64/pg
# Distribution / packaging
.Python
@@ -219,9 +227,13 @@ custom_theme/partials/header.html
.nvimrc
Session.vim
+# Editor temp files
+*~
+
# Coverage Reports
.coverage*
.scannerwork*
+*report.xml
report.xml
coverage.xml
.pymon
diff --git a/MANIFEST.in b/MANIFEST.in
index 9809842..d28e4cf 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -27,3 +27,6 @@ prune docs
prune benchmarks
prune examples
prune tests
+
+include docs/api/pykx-execution/q.md
+
diff --git a/README.md b/README.md
index 14cc86d..74f9492 100644
--- a/README.md
+++ b/README.md
@@ -93,12 +93,14 @@ KX only officially supports versions of PyKX built by KX, i.e. versions of PyKX
PyKX depends on the following third-party Python packages:
-- `pandas>=1.2, < 2.2.0`
+- `pandas>=1.2, < 2.0; python_version=='3.8'`
+- `pandas>=1.2, < 2.2.0; python_version>'3.8'`
- `numpy~=1.22, <2.0; python_version<'3.11'`
- `numpy~=1.23, <2.0; python_version=='3.11'`
- `numpy~=1.26, <2.0; python_version=='3.12'`
- `pytz>=2022.1`
- `toml~=0.10.2`
+- `dill>=0.2.0`
They are installed automatically by `pip` when PyKX is installed.
@@ -106,21 +108,17 @@ PyKX also has an optional Python dependency of `pyarrow>=3.0.0`, which can be in
When using PyKX with KX Dashboards users will be required to install `ast2json~=0.3` this can be installed using the `dashboards` extra, e.g. `pip install pykx[dashboards]`
-When using PyKX Beta features users will be required to install `dill>=0.2.0` this can be installed using the `beta` extra, e.g. `pip install pykx[beta]`
+When using PyKX Streaming users may require the ability to stop processes initialized in a now unavailable process to facilitate this PyKX can make use of `psutil` this can be installed using the `streaming` extra, e.g. `pip install pykx[streaming]`
When using Streamlit users will be required to install `streamlit~=1.28` this can be installed using the `streamlit` extra, e.g. `pip install pykx[streamlit]`
-**Warning:** Trying to use the `pa` conversion methods of `pykx.K` objects or the `pykx.toq.from_arrow` method when PyArrow is not installed (or could not be imported without error) will raise a `pykx.PyArrowUnavailable` exception. `pyarrow` is supported Python 3.8-3.10 but remains in Beta for Python 3.11.
+**Warning:** Trying to use the `pa` conversion methods of `pykx.K` objects or the `pykx.toq.from_arrow` method when PyArrow is not installed (or could not be imported without error) will raise a `pykx.PyArrowUnavailable` exception.
#### Optional Non-Python Dependencies
- `libssl` for TLS on [IPC connections](docs/api/ipc.md).
- `libpthread` on Linux/MacOS when using the `PYKX_THREADING` environment variable.
-#### Windows Dependencies
-
-To run q or PyKX on Windows, `msvcr100.dll` must be installed. It is included in the [Microsoft Visual C++ 2010 Redistributable](https://www.microsoft.com/en-ca/download/details.aspx?id=26999).
-
## Building from source
### Installing Dependencies
@@ -144,7 +142,6 @@ Windows:
* [Python](https://www.python.org/downloads/windows/)
* [Build Tools for Visual Studio](https://visualstudio.microsoft.com/downloads/?q=build+tools).
* [dlfcn-win32](https://github.com/dlfcn-win32/dlfcn-win32). Can be installed using [Vcpkg](https://github.com/microsoft/vcpkg).
-* `msvcr100.dll`. Available in [Microsoft Visual C++ 2010 Redistributable](https://www.microsoft.com/en-ca/download/details.aspx?id=26999).
To install the above dependencies, you can run the `w64_install.ps1` script as an administrator:
@@ -198,7 +195,7 @@ export QHOME=/location/of/your/q #q needs QHOME available
python -m pytest -vvv -n 0 --no-cov --junitxml=report.xml
```
-## PyKX License access and enablement
+## PyKX Licenses
This work is dual licensed under [Apache 2.0](https://code.kx.com/pykx/license.html#apache-2-license) and the [Software License for q.so](https://code.kx.com/pykx/license.html#qso-license) and users are required to abide by the terms of both licenses in their entirety.
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index fbcc5cc..5956fea 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -17,12 +17,10 @@ requirements:
- git
- python
- setuptools>=68.0
- - setuptools_scm[toml]>=7.1.0 # [py==37]
- - setuptools_scm[toml]>=8.0.0 # [py!=37]
+ - setuptools_scm[toml]>=8.0.0
- cython==3.0.0
- numpy==1.26 # [py==312]
- - numpy==1.22.* # [py!=37 and py<312]
- - numpy==1.20 # [py==37]
+ - numpy==1.22.* # [py<312]
- tomli>=2.0.1
- wheel>=0.36
- sysroot_linux-64 # [linux64]
@@ -30,9 +28,12 @@ requirements:
run:
- python
- numpy>=1.22,<2.0
- - pandas>=1.2, <2.2.0
+ - pandas>=1.2, <2.2.0 # [py>38]
+ - pandas<2.0 # [py==38]
- pytz>=2022.1
- toml>=0.10.2
+ - dill>=0.2.0
+ - requests>2.25.0
test:
imports:
diff --git a/docs/api/columns.md b/docs/api/columns.md
new file mode 100644
index 0000000..9e7988d
--- /dev/null
+++ b/docs/api/columns.md
@@ -0,0 +1,5 @@
+# PyKX Column Objects
+
+::: pykx.wrappers.Column
+
+::: pykx.wrappers.QueryPhrase
diff --git a/docs/api/compress.md b/docs/api/compress.md
index de34fcf..95425a0 100644
--- a/docs/api/compress.md
+++ b/docs/api/compress.md
@@ -1,4 +1,13 @@
-# Compression and Encryption APIs
+---
+title: Compression and encryption
+description: API reference page for using kdb+ compression and encryption through PyKX
+author: KX Systems
+date: September 2024
+tags: compression, encryption, dare
+---
+# Compression and encryption API
+
+_This page documents utilities for managing configuration of compression and encryption settings for on-disk data._
::: pykx.compress_encrypt
rendering:
diff --git a/docs/api/db.md b/docs/api/db.md
index 074a383..46176a4 100644
--- a/docs/api/db.md
+++ b/docs/api/db.md
@@ -1,4 +1,11 @@
-# Database interaction and management functionality
+---
+title: DB Admin
+description: API reference page for managing kdb+ databases with PyKX
+author: KX Systems
+date: September 2024
+tags: admin, management
+---
+# Database interaction and management
::: pykx.db
rendering:
diff --git a/docs/api/exceptions.md b/docs/api/exceptions.md
index 09f36c4..3b0e8e6 100644
--- a/docs/api/exceptions.md
+++ b/docs/api/exceptions.md
@@ -1,3 +1,11 @@
+---
+title: Exceptions
+description: Reference page explaining the different custom exceptions implemented in PyKX
+author: KX Systems
+date: September 2024
+tags: exceptions, errors
+---
+
# Exceptions
::: pykx.exceptions
diff --git a/docs/api/ipc.md b/docs/api/ipc.md
index af5b10e..b36a744 100644
--- a/docs/api/ipc.md
+++ b/docs/api/ipc.md
@@ -1,3 +1,14 @@
+---
+title: q IPC
+description: API reference page for connecting to q processes using IPC
+author: KX Systems
+date: September 2024
+tags: ipc, connections
+---
# IPC
::: pykx.ipc
+ handler: python
+ options:
+ show_source: true
+ members_order: "source"
diff --git a/docs/api/license.md b/docs/api/license.md
index 0ded71c..858b305 100644
--- a/docs/api/license.md
+++ b/docs/api/license.md
@@ -1,6 +1,13 @@
+---
+title: License management
+description: Utilities for the management of PyKX licenses
+author: KX Systems
+date: September 2024
+tags: license
+---
# License management
-The functionality presented here provides users with utilities allowing for the management of PyKX licenses and their lifecycle
+_This page documents utility functions that allow users to manage their PyKX licenses._
::: pykx.license
rendering:
diff --git a/docs/api/pykx-execution/console.md b/docs/api/pykx-execution/console.md
index 4b5d5c3..2837925 100644
--- a/docs/api/pykx-execution/console.md
+++ b/docs/api/pykx-execution/console.md
@@ -1,3 +1,12 @@
+---
+title: Emulated q console
+description: q console emulation in a python repl
+author: KX Systems
+date: September 2024
+tags: console,repl
+---
# PyKX Console
+_This page documents the use of the emulated q console available during use of a Python repl._
+
::: pykx.console
diff --git a/docs/api/pykx-execution/ctx.md b/docs/api/pykx-execution/ctx.md
index 8beae73..2b32999 100644
--- a/docs/api/pykx-execution/ctx.md
+++ b/docs/api/pykx-execution/ctx.md
@@ -1,3 +1,12 @@
-# Context Interface
+---
+title: q context interface
+description: PyKX interface for q contexts/namespaces
+author: KX Systems
+date: September 2024
+tags: namespace
+---
+# q Context Interface
+
+_This page documents the interface used for accessing q contexts/namespaces._
::: pykx.ctx
diff --git a/docs/api/pykx-execution/embedded_q.md b/docs/api/pykx-execution/embedded_q.md
index ea241e6..4baf23c 100644
--- a/docs/api/pykx-execution/embedded_q.md
+++ b/docs/api/pykx-execution/embedded_q.md
@@ -1,5 +1,7 @@
# PyKX Execution Classes
+_This page documents the base class for all interfaces between Python and the embedded q process._
+
::: pykx.Q
::: pykx.EmbeddedQ
diff --git a/docs/api/pykx-execution/q.md b/docs/api/pykx-execution/q.md
index 4a80a91..f6395c3 100644
--- a/docs/api/pykx-execution/q.md
+++ b/docs/api/pykx-execution/q.md
@@ -1,8 +1,17 @@
-# PyKX native function reference card
+---
+title: PyKX q functions and operators
+description: PyKX implementation of a subset of the q language's functions and operators
+author: KX Systems
+date: September 2024
+tags: operators
+---
+# q functions and operators
+
+_This page documents the PyKX implementations of a selection of keywords and operators available in q._
-This page documents the functions found in the q global namespace that are available in PyKX as attributes of `pykx.q`, or as attributes of `pykx.QConnection` instances. Refer to [the q reference card in the q docs](https://code.kx.com/q/ref/#by-category) for more details about using these functions in q. This page documents how one might use them from Python via PyKX.
+The functions listed here are accessible in PyKX as attributes of `#!python pykx.q`, or as attributes of `#!python pykx.QConnection` instances. Refer to [the q reference card in the q docs](https://code.kx.com/q/ref/#by-category) for more details about these functions as they are used in a q process. This page documents using them in Python via PyKX.
-All of these functions take and return q objects, which are wrapped in PyKX as `pykx.K` objects. Arguments of other types will have `pykx.K` called on them to convert them into q objects. Refer to [the PyKX wrappers documentation](../pykx-q-data/wrappers.md) for more information about `pykx.K` objects.
+These functions take and return q objects, which are wrapped in PyKX as `#!python pykx.K` objects. Any arguments of other types are converted appropriately. Refer to [the PyKX wrappers documentation](../pykx-q-data/wrappers.md) for more information about `#!python pykx.K` objects.
## By Category
@@ -21,10 +30,15 @@ Category | Elements
[Sort](#sort) | [`asc`](#asc), [`bin`](#bin), [`binr`](#binr), [`desc`](#desc), [`differ`](#differ), [`distinct`](#distinct), [`iasc`](#iasc), [`idesc`](#idesc), [`rank`](#rank), [`xbar`](#xbar), [`xrank`](#xrank)
[Table](#table) | [`cols`](#cols), [`csv`](#csv), [`fkeys`](#fkeys), [`insert`](#insert), [`key`](#key), [`keys`](#keys), [`meta`](#meta), [`ungroup`](#ungroup), [`upsert`](#upsert), [`xasc`](#xasc), [`xcol`](#xcol), [`xcols`](#xcols), [`xdesc`](#xdesc), [`xgroup`](#xgroup), [`xkey`](#xkey)
[Text](#text) | [`like`](#like), [`lower`](#lower), [`ltrim`](#ltrim), [`md5`](#md5), [`rtrim`](#rtrim), [`ss`](#ss), [`ssr`](#ssr), [`string`](#string), [`trim`](#trim), [`upper`](#upper)
+[Operators](#operators) | [`drop`](#drop), [`coalesce`](#coalesce), [`fill`](#fill), [`take`](#take), [`set_attribute`](#set_attribute), [`join`](#join), [`find`](#find), [`enum_extend`](#enum_extend), [`roll`](#roll), [`deal`](#deal), [`dict`](#dict), [`enkey`](#enkey), [`unkey`](#unkey), [`enumeration`](#enumeration), [`enumerate`](#enumerate), [`pad`](#pad), [`cast`](#cast), [`tok`](#tok), [`compose`](#compose)
-Not all functions listed on [the q reference card](https://code.kx.com/q/ref/#by-category) are available as attributes of `pykx.q`, or as attributes of `pykx.QConnection` instances. These include elements such as `select`, `exec`, `update`, and `delete` which are not actually q functions, but rather part of the q language itself (i.e. handled by the parser), and functions whose names would result in syntax errors in Python, such as `not` and `or`.
+Some keywords listed on [the q reference card](https://code.kx.com/q/ref/#by-category) are unavailable in this API:
-Because arbitrary q code can be executed using PyKX (except in unlicensed mode, in which none of these functions are available), these limitations can be circumvented as necessary by running q code instead of using [the context interface](ctx.md). For example, `pykx.q('not')` can be used instead of `pykx.q.not`. Consider using [the qSQL query documentation](../query.md) as an alternative to writing qSQL queries as q code.
+ - `#!q select`, `#!q exec`, `#!q update` and `#!q delete` are not q functions, but a part of the q language itself
+
+ - functions that have names which would result in syntax errors in Python, such as `#!q not` and `#!q or`
+
+The unavailable functions can still be used in PyKX by executing q code with `#!python pykx.q`, i.e. `#!python pykx.q('not')` instead of `#!python pykx.q.not`. For the qSQL functions (`#!q select`, `#!q exec`, `#!q update`, and `#!q delete`) use [PyKX qSQL](../query.md).
## Environment
@@ -319,9 +333,9 @@ Assign a value to a global variable.
Persist an object as a file or directory.
-| Types | Result |
-|------------------------------|--------------------------------------|
-| pykx.q.set(nam, y) | set global `nam` to `y` |
+| Types | Result |
+|-------------------------------------|------------------------------------|
+| pykx.q.set(nam, y) | set global `nam` to `y` |
| pykx.q.set(fil, y) | write `y` to a file |
| pykx.q.set(dir, y) | splay `y` to a directory |
| pykx.q.set([fil, lbs, alg, lvl], y) | write `y` to a file, compressed |
@@ -424,7 +438,7 @@ pykx.LongVector(q('1 3 6 10 15'))
Performs an as-of join across temporal columns in tables. Returns a table with records from the left-join of the first table and the second table. For each record in the first table, it is matched with the second table over the columns specified in the first input parameter and if there is a match the most recent match will be joined to the record.
-The resulting time column is the value of the boundry used in the first table.
+The resulting time column is the value of the boundary used in the first table.
```python
>>> import pandas as pd
@@ -2104,7 +2118,7 @@ Where x is a table, in which some cells are lists, but for any row, all lists ar
>>> a = pykx.Table([['a', [2, 3], 10], ['b', [5, 6, 7], 20], ['c', [11], 30]], columns=['s', 'x', 'q'])
>>> a
pykx.Table(pykx.q('
-s x q
+s x q
------------
a (2;3) 10
b (5;6;7) 20
@@ -2453,3 +2467,371 @@ pykx.SymbolAtom(q('`HELLO'))
>>> pykx.q.upper(b'hello')
pykx.CharVector(q('"HELLO"'))
```
+
+## Operators
+
+### [drop](https://code.kx.com/q/ref/drop/)
+
+Drop items from a list, entries from a dictionary or rows from a table.
+
+Examples:
+
+Drop the first 3 items from a list
+
+```python
+>>> import pykx as kx
+>>> kx.q.drop(3, kx.q('1 2 3 4 5 6'))
+pykx.LongVector(pykx.q('4 5 6'))
+```
+
+Drop the last 10 rows from a table
+
+```python
+>>> import pykx as kx
+>>> tab = kx.Table(data={
+... 'x': kx.q.til(100),
+... 'y': kx.random.random(100, 10.0)
+... })
+>>> kx.q.drop(-10, tab)
+pykx.Table(pykx.q('
+x y
+------------
+0 3.927524
+1 5.170911
+2 5.159796
+3 4.066642
+4 1.780839
+..
+'))
+>>> len(kx.q.drop(-10, tab))
+90
+```
+
+### [coalesce](https://code.kx.com/q/ref/coalesce/)
+
+Merge two keyed tables ignoring null objects
+
+Example:
+
+Coalesce two keyed tables one containing nulls
+
+```python
+>> tab1 = kx.Table(data={
+... 'x': kx.q.til(10),
+... 'y': kx.random.random(10, 10.0)
+... }).set_index('x')
+>>> tab2 = kx.Table(data={
+... 'x': kx.q.til(10),
+... 'y':kx.random.random(10, [1.0, kx.FloatAtom.null, 10.0])
+... }).set_index('x')
+>>> kx.q.coalesce(tab1, tab2)
+pykx.KeyedTable(pykx.q('
+x| y z
+-| ------------
+0| 9.006991 10
+1| 8.505909
+2| 8.196014 10
+3| 0.9982673 1
+4| 8.187707
+..
+'))
+```
+
+### [fill](https://code.kx.com/q/ref/fill)
+
+Replace nulls in lists, dictionaries or tables
+
+Examples:
+
+Replace null values in a list
+
+```python
+>>> null_list = kx.random.random(10, [10, kx.LongAtom.null, 100])
+>>> kx.q.fill(0, null_list)
+```
+
+Replace all null values in a table
+
+```python
+>>> table = kx.Table(data={
+... 'x': kx.random.random(10, [10.0, kx.FloatAtom.null, 100.0]),
+... 'y': kx.random.random(10, [10.0, kx.FloatAtom.null, 100.0])
+... })
+>>> kx.q.fill(10.0, table)
+```
+
+### [take](https://code.kx.com/q/ref/take)
+
+Select leading or trailing items from a list or dictionary, named entries from a dictionary, or named columns from a table
+
+Examples:
+
+Retrieve the last 3 items from a list
+
+```python
+>>> lst = kx.q.til(100)
+>>> kx.q.take(-3, lst)
+pykx.LongVector(pykx.q('97 98 99'))
+```
+
+Retrieve named columns from a table using take
+
+```python
+>>> table = kx.Table(data={
+... 'x': kx.random.random(5, 10.0),
+... 'y': kx.random.random(5, 10.0),
+... 'z': kx.random.random(5, 10.0),
+... })
+>>> kx.q.take(['x', 'y'], table)
+pykx.Table(pykx.q('
+x y
+-----------------
+6.916099 9.672398
+2.296615 2.306385
+6.919531 9.49975
+4.707883 4.39081
+6.346716 5.759051
+'))
+```
+
+### [set_attribute](https://code.kx.com/q/ref/set-attribute/)
+
+Set an attribute for a supplied list or dictionary, the supplied attribute must be one of: 's', 'u', 'p' or 'g'.
+
+```python
+>>> kx.q.set_attribute('s', kx.q.til(10))
+pykx.LongVector(pykx.q('`s#0 1 2 3 4 5 6 7 8 9'))
+>>> kx.q.set_attribute('g', [2, 1, 2, 1])
+pykx.LongVector(pykx.q('`g#2 1 2 1'))
+```
+
+### [join](https://code.kx.com/q/ref/join/)
+
+Join atoms, lists, dictionaries or tables
+
+```python
+>>> kx.q.join([1, 2, 3], [4, 5, 6])
+pykx.LongVector(pykx.q('1 2 3 4 5 6'))
+```
+
+Join multiple dictionaries together
+
+```python
+>>> kx.q.join({'x': 1, 'y': 2}, {'z': 3})
+pykx.Dictionary(pykx.q('
+x| 1
+y| 2
+z| 3
+'))
+```
+
+Join multiple columns row wise
+
+```python
+>>> t = kx.q('([]a:1 2 3;b:`a`b`c)')
+>>> s = kx.q('([]a:10 11;b:`d`e)')
+>>> kx.q.join(t, s)
+pykx.Table(pykx.q('
+a b
+----
+1 a
+2 b
+3 c
+10 d
+11 e
+'))
+```
+
+### [find](https://code.kx.com/q/ref/find/)
+
+Find the first occurrence of an item(s) in a list
+
+```python
+>>> lst = [10, -8, 3, 5, -1, 2, 3]
+>>> kx.q.find(lst, -8)
+pykx.LongAtom(pykx.q('1'))
+>>> kx.q.find(lst, [10, 3])
+pykx.LongVector(pykx.q('0 2'))
+```
+
+### [enum_extend](https://code.kx.com/q/ref/enum-extend/)
+
+Extend a defined variable enumeration
+
+```python
+>>> kx.q['foo'] = ['a', 'b']
+>>> kx.q.enum_extend('foo', ['a', 'b', 'c', 'a', 'b'])
+pykx.EnumVector(pykx.q('`foo$`a`b`c`a`b'))
+>>> kx.q['foo']
+pykx.SymbolVector(pykx.q('`a`b`c'))
+```
+
+Extend a filepath enumeration
+
+```python
+>>> import os
+>>> from pathlib import Path
+>>> kx.q['bar'] = ['c', 'd'] # about to be overwritten
+>>> kx.q.enum_extend(Path('bar'), ['a', 'b', 'c', 'b', 'b', 'a'])
+pykx.EnumVector(pykx.q('`bar$`a`b`c`b`b`a'))
+>>> os.system('ls -l bar')
+-rw-r--r-- 1 username staff 14 20 Aug 09:34 bar
+>>> kx.q['bar']
+pykx.SymbolVector(pykx.q('`a`b`c'))
+```
+
+### [roll](https://code.kx.com/q/ref/roll/)
+
+Generate a random list of values with duplicates, for this the first parameter must be positive.
+
+```python
+>>> kx.q.roll(3, 10.0)
+pykx.FloatVector(pykx.q('3.927524 5.170911 5.159796'))
+>>> kx.q.roll(4, [1, 'a', 10.0])
+pykx.List(pykx.q('
+`a
+1
+`a
+10f
+'))
+```
+
+### [deal](https://code.kx.com/q/ref/deal/)
+
+Generate a random list of values without duplicates, for this the first parameter must be negative.
+
+```python
+>>> kx.q.deal(-5, 5)
+pykx.LongVector(pykx.q('1 3 2 0 4'))
+>>> kx.q.deal(-3, ['the', 'quick', 'brown', 'fox'])
+pykx.SymbolVector(pykx.q('`the`brown`quick'))
+```
+
+### [dict](https://code.kx.com/q/ref/dict/)
+
+Generate a dictionary by passing two lists of equal lengths
+
+```python
+>>> kx.q.dict(['a', 'b', 'c'], [1, 2, 3])
+pykx.Dictionary(pykx.q('
+a| 1
+b| 2
+c| 3
+'))
+```
+
+### [enkey](https://code.kx.com/q/ref/enkey/)
+
+Create a keyed table by passing an integer to a simple table. This is similar to `set_index`
+
+```python
+>>> simple_tab = kx.Table(data = {
+... 'x': [1, 2, 3],
+... 'y': [4, 5, 6]
+... })
+>>> kx.q.dict(1, simple_tab)
+pykx.KeyedTable(pykx.q('
+x| y
+-| -
+1| 4
+2| 5
+3| 6
+'))
+```
+
+### [unkey](https://code.kx.com/q/ref/unkey/)
+
+Remove the keys from a keyed table returning a simple table, this is similar to `reset_index`
+with no arguments
+
+```python
+>>> keyed_tab = kx.Table(data = {
+... 'x': [1, 2, 3],
+... 'y': [4, 5, 6]
+... }).set_index(1)
+>>> kx.q.unkey(0, keyed_tab)
+pykx.Table(pykx.q('
+x y
+---
+1 4
+2 5
+3 6
+'))
+```
+
+### [enumeration](https://code.kx.com/q/ref/enumeration/)
+
+Enumerate a symbol list
+- First argument is a variable in q memory denoting a symbol list
+- Second argument is a vector of integers in the domain 0-length(first argument)
+
+```python
+>>> kx.q['x'] = ['a', 'b', 'c', 'd']
+>>> kx.q.enumeration('x', [1, 2, 3])
+pykx.EnumVector(pykx.q('`x$`b`c`d'))
+```
+
+### [enumerate](https://code.kx.com/q/ref/enumerate/)
+
+Enumerate a list of symbols based on the symbols in a global q variable
+
+```python
+>>> kx.q['d'] = ['a', 'b', 'c']
+>>> y = ['a', 'b', 'c', 'b', 'a', 'b', 'c']
+>>> kx.q.enumerate('d', y)
+pykx.EnumVector(pykx.q('`d$`a`b`c`b`a`b`c'))
+```
+
+### [pad](https://code.kx.com/q/ref/pad/)
+
+Pad a supplied PyKX string (Python bytes) to the length supplied by the user.
+In the case that you are padding the front of a string use a negative value.
+
+```python
+>>> kx.q.pad(-5, b'abc')
+pykx.CharVector(pykx.q('" abc"'))
+>>> kx.q.pad(10, [b'test', b'string', b'length'])
+pykx.List(pykx.q('
+"test "
+"string "
+"length "
+'))
+```
+
+### [cast](https://code.kx.com/q/ref/cast/)
+
+Convert to another datatype, this should be a single lower case character byte, or name of the type.
+See https://code.kx.com/q/ref/cast/ for the accepted list.
+
+```python
+>>> long_vec = kx.q('til 10')
+>>> kx.q.cast('short', long_vec)
+pykx.ShortVector(pykx.q('0 1 2 3 4 5 6 7 8 9h'))
+>>> kx.q.cast(b'b', long_vec)
+pykx.BooleanVector(pykx.q('0111111111b'))
+```
+
+### [tok](https://code.kx.com/q/ref/tok/)
+
+Interpret a PyKX string as a data value(s), this should use a single upper case character byte or
+a non-positive PyKX short value.
+See https://code.kx.com/q/ref/tok/ for more information on accepted lists for casting
+
+```python
+>>> kx.q.tok(b'F', b'3.14')
+pykx.FloatAtom(pykx.q('3.14'))
+>>> float_int = kx.toq(-9, kx.ShortAtom)
+>>> kx.qkx.toq(int(1), kx.ShortAtom)
+```
+
+### [compose](https://code.kx.com/q/ref/compose/)
+
+Compose a unary value function with another.
+
+```python
+>>> f = kx.q('{2*x}')
+>>> ff = kx.q('{[w;x;y;z]w+x+y+z}')
+>>> d = kx.q.compose(f, ff)
+>>> d(1, 2, 3, 4)
+pykx.LongAtom(pykx.q('20'))
+```
diff --git a/docs/api/pykx-q-data/register.md b/docs/api/pykx-q-data/register.md
index 31e9051..de4b045 100644
--- a/docs/api/pykx-q-data/register.md
+++ b/docs/api/pykx-q-data/register.md
@@ -1,5 +1,19 @@
-# Registering Custom Conversions
+---
+title: Registering Custom Operations
+description: API for pykx.register
+date: October 2024
+author: KX Systems, Inc.,
+tags: PyKX, register, api
+---
-The purpose of this functionality is to provide an extension mechanism for PyKX allowing users to register extension logic for handling conversions from Pythonic types to create PyKX objects when using the `pykx.toq` function or any internal functionality which makes use of this conversion mechanism.
+# Registering Custom Operations
+
+The purpose of this functionality is to provide an extension mechanism for PyKX allowing users to register extension logic for PyKX.
+
+Specifically this allows users to:
+
+1. Extend the supported conversions from Pythonic types to PyKX objects when using the `#!python pykx.toq` function
+2. Extend the supported custom functions on `#!python pykx.Column` objects
::: pykx.register
+
diff --git a/docs/api/pykx-q-data/toq.md b/docs/api/pykx-q-data/toq.md
index 5f330af..ba296dc 100644
--- a/docs/api/pykx-q-data/toq.md
+++ b/docs/api/pykx-q-data/toq.md
@@ -1,3 +1,12 @@
+---
+title: Python to PyKX Data Conversion
+description: Documentation for API that handles converting Python data types into PyKX K objects
+author: KX Systems
+date: September 2024
+tags: conversion,types,toq
+---
# Convert Pythonic data to PyKX
+_This page documents converting python data types to PyKX data types via the `#!python pykx.toq` function._
+
::: pykx.toq
diff --git a/docs/api/pykx-q-data/type_conversions.md b/docs/api/pykx-q-data/type_conversions.md
index 7373913..fc74cfd 100644
--- a/docs/api/pykx-q-data/type_conversions.md
+++ b/docs/api/pykx-q-data/type_conversions.md
@@ -143,7 +143,7 @@ True
[True, False, True]
```
-=== "Numpy, Pandas, Pyarrow"
+=== "Numpy, Pandas, PyArrow"
Converting a `pykx.BoolVector` will result in an array of objects with the `bool` `dtype`, arrays of that `dtype` can also be converted into `pykx.BoolVector` objects.
## `pykx.GUIDAtom`
diff --git a/docs/api/pykx-q-data/wrappers.md b/docs/api/pykx-q-data/wrappers.md
index ba6ca30..a336379 100644
--- a/docs/api/pykx-q-data/wrappers.md
+++ b/docs/api/pykx-q-data/wrappers.md
@@ -1,3 +1,12 @@
# PyKX type wrappers
::: pykx.wrappers
+ selection:
+ filters:
+ - "!Column"
+ - "!pykx.wrappers.Column"
+ - "!wrappers.Column"
+ - "!QueryPhrase"
+ - "!pykx.wrappers.QueryPhrase"
+ - "!wrappers.QueryPhrase"
+
diff --git a/docs/api/pykx-save-load/fileio.md b/docs/api/pykx-save-load/fileio.md
new file mode 100644
index 0000000..2f55cd1
--- /dev/null
+++ b/docs/api/pykx-save-load/fileio.md
@@ -0,0 +1,6 @@
+# Reading and Writing files with PyKX
+
+::: pykx.read
+
+::: pykx.write
+
diff --git a/docs/api/query.md b/docs/api/query.md
index e1b9f7d..fe38d45 100644
--- a/docs/api/query.md
+++ b/docs/api/query.md
@@ -1,3 +1,10 @@
-# Querying
+---
+title: Database queries
+description: API reference page for querying kdb+ databases using PyKX
+author: KX Systems
+date: September 2024
+tags: query, database, update, delete, upsert, insert, select, exec, kdb+
+---
+# Database queries
::: pykx.query
diff --git a/docs/api/random.md b/docs/api/random.md
index 5a4eb36..81152e7 100644
--- a/docs/api/random.md
+++ b/docs/api/random.md
@@ -1,6 +1,13 @@
+---
+title: Random data generation
+description: PyKX interface for accessing q random data generation
+author: KX Systems
+date: September 2024
+tags: random
+---
# Random data generation
-The functionality presented here provides users with utilities for the creation of random data.
+_This page documents utility functions users can execute to create random data._
::: pykx.random
rendering:
diff --git a/docs/api/reimporting.md b/docs/api/reimporting.md
index 4dab9e1..fc4b637 100644
--- a/docs/api/reimporting.md
+++ b/docs/api/reimporting.md
@@ -1,3 +1,10 @@
+---
+title: Reimporting module
+description: API reference page for reimporting the PyKX module in a Python session
+author: KX Systems
+date: September 2024
+tags: reimport, pykx, import
+---
# Reimporting
::: pykx.reimporter
diff --git a/docs/api/remote.md b/docs/api/remote.md
index 2101520..98bdc51 100644
--- a/docs/api/remote.md
+++ b/docs/api/remote.md
@@ -1,4 +1,11 @@
-# Remote Python Execution Functionality
+---
+title: Remote Python execution
+description: API reference page for functions that permit executing python functions on q processes through IPC
+author: KX Systems
+date: September 2024
+tags: ipc, pykx, embed
+---
+# Remote Python execution
::: pykx.remote
rendering:
diff --git a/docs/api/schema.md b/docs/api/schema.md
index f368ed6..18a2694 100644
--- a/docs/api/schema.md
+++ b/docs/api/schema.md
@@ -1,3 +1,10 @@
+---
+title: Schema generation
+description: API reference page for functions that create tables schemas
+author: KX Systems
+date: September 2024
+tags: schemas
+---
# Schema generation
::: pykx.schema
diff --git a/docs/api/serialize.md b/docs/api/serialize.md
index 5cb0758..c19c75d 100644
--- a/docs/api/serialize.md
+++ b/docs/api/serialize.md
@@ -1,3 +1,10 @@
-# Serialization
+---
+title: Serialization and deserialization
+description: API reference page for serializing and deserializing PyKX objects
+author: KX Systems
+date: September 2024
+tags: serialize, deserialize
+---
+# Serialization and deserialization
::: pykx.serialize
diff --git a/docs/api/streamlit.md b/docs/api/streamlit.md
index 0cf146f..bb93e47 100644
--- a/docs/api/streamlit.md
+++ b/docs/api/streamlit.md
@@ -1,4 +1,13 @@
-# Streamlit Integration
+---
+title: Streamlit integration
+description: API reference page for using the streamlit library to request data through q connections
+author: KX Systems
+date: September 2024
+tags: streamlit, ipc
+---
+# Streamlit integration
+
+_This page documents the API for using the Streamlit library with PyKX._
::: pykx.streamlit
rendering:
diff --git a/docs/api/system.md b/docs/api/system.md
index 8a9ed5f..3716746 100644
--- a/docs/api/system.md
+++ b/docs/api/system.md
@@ -1,3 +1,10 @@
-# System Command Wrappers
+---
+title: System command wrappers
+description: API reference page for wrappers around q system commands
+author: KX Systems
+date: September 2024
+tags: commands
+---
+# System command wrappers
::: pykx.system
diff --git a/docs/api/tick.md b/docs/api/tick.md
new file mode 100644
index 0000000..b0d818e
--- /dev/null
+++ b/docs/api/tick.md
@@ -0,0 +1,22 @@
+---
+title: Streaming tickerplant
+description: API reference page for the PyKX tick module
+author: KX Systems
+date: September 2024
+tags: tick, rdb, hdb, idb, streaming, stream
+---
+# Streaming tickerplant
+
+::: pykx.tick
+ rendering:
+ show_root_heading: false
+ options:
+ show_root_heading: false
+ members_order: source
+ members:
+ - STREAMING
+ - BASIC
+ - TICK
+ - RTP
+ - HDB
+ - GATEWAY
diff --git a/docs/api/util.md b/docs/api/util.md
index 55ffc4f..836dfef 100644
--- a/docs/api/util.md
+++ b/docs/api/util.md
@@ -37,6 +37,28 @@ SSL_VERIFY_SERVER| YES
'))
```
+## `pykx.util.kill_q_process`
+
+```python
+pykx.util.kill_q_process(port)
+```
+
+Kill a q process running on a specified port, this allows users to kill sub-processes running q in the case access to the port has been lost due to parent process
+
+**Parameters:**
+
+| Name | Type | Description | Default |
+|------------|------|------------------------------------------------|---------|
+| port | int | The integer representing the port to be killed | |
+
+
+**Returns:**
+
+| Type | Description |
+|------|--------------------------------------------------------------------------------------|
+| bool | Returns `True` if process was successfully killed, `False` if process was not killed |
+
+
## `pykx.util.debug_environment`
```python
@@ -77,43 +99,35 @@ pandas: 2.0.3
numpy: 1.24.4
pytz: 2023.3.post1
which python: /usr/local/bin/python
-which python3: /Library/Frameworks/Python.framework/Versions/3.12/bin/python3
+which python3: /usr/local/anaconda3/lib/python3.8/bin/python3
find_libpython: /usr/local/anaconda3/lib/libpython3.8.dylib
**** Platform information ****
platform.platform: macOS-10.16-x86_64-i386-64bit
-**** PyKX Environment Variables ****
-PYKX_IGNORE_QHOME:
-PYKX_KEEP_LOCAL_TIMES:
-PYKX_ALLOCATOR:
-PYKX_GC:
-PYKX_LOAD_PYARROW_UNSAFE:
-PYKX_MAX_ERROR_LENGTH:
-PYKX_NOQCE:
-PYKX_Q_LIB_LOCATION:
-PYKX_RELEASE_GIL:
-PYKX_Q_LOCK:
-PYKX_DEFAULT_CONVERSION:
-PYKX_SKIP_UNDERQ:
-PYKX_UNSET_GLOBALS:
-PYKX_DEBUG_INSIGHTS_LIBRARIES:
-PYKX_EXECUTABLE: /usr/local/anaconda3/bin/python
-PYKX_PYTHON_LIB_PATH:
-PYKX_PYTHON_BASE_PATH:
-PYKX_PYTHON_HOME_PATH:
+**** PyKX Configuration Variables ****
+PYKX_IGNORE_QHOME: False
+PYKX_KEEP_LOCAL_TIMES: False
+PYKX_ALLOCATOR: False
+PYKX_GC: False
+PYKX_LOAD_PYARROW_UNSAFE: False
+PYKX_MAX_ERROR_LENGTH: 256
+PYKX_NOQCE: False
+PYKX_RELEASE_GIL: False
+PYKX_Q_LIB_LOCATION: /usr/local/anaconda3/lib/python3.8/site-packages/pykx/lib
+PYKX_Q_LOCK: False
+PYKX_SKIP_UNDERQ: False
+PYKX_Q_EXECUTABLE: /usr/local/anaconda3/envs/qenv/q/m64/q
+PYKX_THREADING: False
+PYKX_4_1_ENABLED: False
+PYKX_QDEBUG: False
+PYKX_DEBUG_INSIGHTS_LIBRARIES: False
+PYKX_DEFAULT_CONVERSION:
+PYKX_EXECUTABLE: /usr/local/anaconda3/lib/python3.8/bin/python3.8
+PYKX_PYTHON_LIB_PATH:
+PYKX_PYTHON_BASE_PATH:
+PYKX_PYTHON_HOME_PATH:
PYKX_DIR: /usr/local/anaconda3/lib/python3.8/site-packages/pykx
-PYKX_QDEBUG:
-PYKX_THREADING:
-PYKX_4_1_ENABLED:
-
-**** PyKX Deprecated Environment Variables ****
-SKIP_UNDERQ:
-UNSET_PYKX_GLOBALS:
-KEEP_LOCAL_TIMES:
-IGNORE_QHOME:
-UNDER_PYTHON:
-PYKX_NO_SIGINT:
**** q Environment Variables ****
QARGS:
diff --git a/docs/beta-features/index.md b/docs/beta-features/index.md
index c23c398..47341b5 100644
--- a/docs/beta-features/index.md
+++ b/docs/beta-features/index.md
@@ -1,5 +1,9 @@
# Beta Features
+!!! "Note"
+
+ There are currently no active features in beta status, the following page outlines broadly the concept of beta features within PyKX and how it is managed today
+
## What is a Beta Feature?
As used commonly within software development "Beta Features" within PyKX describe features which have completed an initial development process phase and are being released in an opt-in manner to users of PyKX wishing to test these features. These features are not intended to be for production use while in beta and are subject to change prior to release as full features. Usage of these features will not effect the default behaviour of the library outside of the scope of the new functionality being added.
@@ -15,7 +19,7 @@ Within PyKX beta features are enabled through the use of a configuration/environ
>>> os.environ['PYKX_BETA_FEATURES'] = 'True'
>>> import pykx as kx
>>> kx.beta_features
-['Streamlit Integration', 'Compression and Encryption', 'Database Management', 'Remote Functions']
+[]
```
Alternatively you can set beta features to be available at all times by adding `PYKX_BETA_FEATURES` to your `.pykx-config` file as outlined [here](../user-guide/configuration.md#configuration-file). An example of a configuration making use of this is as follows:
@@ -35,19 +39,7 @@ As mentioned above the list of available features to a user is contained within
```python
>>> import pykx as kx
>>> kx.beta_features
-['Database Management', 'Remote Functions']
+[]
```
-The following are the currently available beta features:
-
-- [Database Management](db-management.md) provides users with the ability to create, load and maintain databases and their associated tables including but not limited to:
-
- - Database table creation and renaming.
- - Enumeration of in-memory tables against on-disk sym file.
- - Column listing, addition, reordering, renaming copying, function application and deletion on-disk.
- - Attribute setting and removal.
- - Addition of missing tables from partitions within a database.
-
-- [Remote Functions](remote-functions.md) let you define functions in Python which interact directly with kdb+ data on a q process. These functions can seamlessly integrate into existing Python infrastructures and also benefit systems that use q processes over Python for performance reasons or as part of legacy applications.
-- [PyKX Threading](threading.md) provides users with the ability to call into `EmbeddedQ` from multithreaded python programs and allow any thread to modify global state safely.
-- [Streamlit Integration](streamlit.md) provides users with the ability to query kdb+ infrastructure through direct integration with Streamlit.
+There are currently no active features in beta status. This page will be updated when new beta features are added at a future point in time.
diff --git a/docs/contributors.md b/docs/contributors.md
index 0c25089..21786ac 100644
--- a/docs/contributors.md
+++ b/docs/contributors.md
@@ -22,3 +22,4 @@ The aim of this page is to include a list of the contributors to our project bot
- [nipsn](https://github.com/nipsn)
- [marcosvm13](https://github.com/marcosvm13)
- [tortolavivo23](https://github.com/tortolavivo23)
+- [MiguelGomezC](https://github.com/MiguelGomezC)
diff --git a/docs/examples/charting.ipynb b/docs/examples/charting.ipynb
index 325c11d..dc21cb8 100644
--- a/docs/examples/charting.ipynb
+++ b/docs/examples/charting.ipynb
@@ -5,9 +5,9 @@
"id": "0cee3f27-46b2-4ed8-9199-a6c83968b76d",
"metadata": {},
"source": [
- "# Charting Data with PyKX\n",
+ "# Python charting libraries\n",
"\n",
- "This workbook details example of interfacing PyKX with Python charting libraries.\n",
+ "_This workbook details example of interfacing PyKX with Python charting libraries._\n",
"\n",
"PyKX supports rich datatype mapping meaning you can convert data from PyKX objects to:\n",
"- Python objects using `.py()`\n",
@@ -17,7 +17,7 @@
"\n",
"The full breakdown of how these map is documented [here.](https://code.kx.com/pykx/api/pykx-q-data/type_conversions.html)\n",
"\n",
- "These resulting objects will behave as expected with all Python libraries.\n",
+ "The resulting objects behave as expected with all Python libraries.\n",
"\n",
"For efficiency and exactness the examples below aim to use PyKX objects directly, minimising conversions when possible."
]
@@ -34,7 +34,7 @@
"outputs": [],
"source": [
"import os\n",
- "os.environ['IGNORE_QHOME'] = '1' # Ignore symlinking PyKX q libraries to QHOME\n",
+ "os.environ['PYKX_IGNORE_QHOME'] = '1' # Ignore symlinking PyKX q libraries to QHOME\n",
"os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation."
]
},
@@ -145,13 +145,13 @@
"source": [
"## Matplotlib\n",
"\n",
- "Generating a scatter plot using the `price` and `size` columns of our table. \n",
+ "Let's generate a scatter plot using the `price` and `size` columns of our table. \n",
"\n",
- "The `scatter(tab['price'], tab['quantity'])` notation is used to access PyKX objects directly. \n",
+ "Use the `scatter(tab['price'], tab['quantity'])` notation to access PyKX objects directly. \n",
"\n",
- "To use `x=` and `y=` syntax requires conversion to a dataframe using `.pd()` .i.e `scatter(tab.pd(), x='price' ,y='quantity')` \n",
+ "The `x=` and `y=` syntax requires conversion to a dataframe using `.pd()` .i.e `scatter(tab.pd(), x='price' ,y='quantity')` \n",
"\n",
- "`scatter` fundamentally uses a series of 1D arrays and is therefore one of the only charts where the column values do not need to first be converted in Numpy objects using `.np()`."
+ "`scatter` uses a series of 1D arrays and one of the only charts where you don't need to start by converting the column values in NumPy objects using `.np()`."
]
},
{
@@ -183,12 +183,12 @@
"id": "2e76c5d1-7dd3-482c-90cb-c263d31ad808",
"metadata": {},
"source": [
- "In order for the column values to be compatible with most of matplotlib charts, they first must be converted to numpy objects using the `.np()` function."
+ "To make the column values compatible with most Matplotlib charts, first convert them to NumPy objects using the `.np()` function."
]
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"id": "b62a4a3f-90bb-4f9f-8df6-46fdfb6bc4b9",
"metadata": {},
"outputs": [
@@ -215,7 +215,7 @@
"source": [
"## Plotly\n",
"\n",
- "Plotly allows `vector` objects to be passed as the `color` argument. This parameter is set using the `sym` column resulting in the scatter chart below.\n"
+ "Plotly allows you to pass `vector` objects as the `color` argument. Set this parameter using the `sym` column to obtain the scatter chart below.\n"
]
},
{
@@ -250,7 +250,7 @@
"source": [
"Unlike with Pandas, a PyKX table cannot be passed as the first argument with the following data being passed as column names. Each axis must be explicitly set. \n",
"\n",
- "To use this feature, first convert to Pandas using the `.pd()` function"
+ "To use this feature, first convert to Pandas using the `.pd()` function."
]
},
{
@@ -258,7 +258,7 @@
"id": "cdd20942-1a7d-419c-a0ff-3e6e07f3acf9",
"metadata": {},
"source": [
- "A density heatmap using Plotly. This time the table is converted to a Pandas Dataframe and then the axes are simply assigned the column names as strings."
+ "To create a density heatmap using Plotly, convert the table to a Pandas Dataframe. The axes are simply assigned the column names as strings."
]
},
{
@@ -290,9 +290,9 @@
"source": [
"## Seaborn\n",
"\n",
- "Seaborn allows the user to set `data` as a PyKX table name without conversions and then call the `x` and `y` parameters using only the column names of that table.\n",
+ "Seaborn allows you to set `data` as a PyKX table name without conversions and then call the `x` and `y` parameters using only the column names of that table.\n",
"\n",
- "A bar chart below demonstrates this with the data being set as the table object and all of the parameters being set using the column names, all without conversions."
+ "The bar chart below demonstrates this by setting the data as the table object and using the column names for all parameters, without any conversions."
]
},
{
@@ -330,7 +330,7 @@
"id": "c6c63b3f-c8a2-48d3-a63f-334af2c158ab",
"metadata": {},
"source": [
- "Seaborn supports joining plots together, allowing the user access to another layer of visualisation."
+ "Seaborn supports joining plots together, allowing you access to another layer of visualisation."
]
},
{
diff --git a/docs/examples/compress_and_encrypt/readme.md b/docs/examples/compress_and_encrypt/readme.md
index 5756eaf..c7fb1ae 100644
--- a/docs/examples/compress_and_encrypt/readme.md
+++ b/docs/examples/compress_and_encrypt/readme.md
@@ -1,12 +1,20 @@
-# Compression and Encryption
+---
+title: PyKX Compress and encrypt
+description: Compress and encrypt Examples
+date: October 2024
+author: KX Systems, Inc.,
+tags: compression, encryption, PyKX
+---
-This example shows how to use various `q` compression and encryption algorithms on a `PyKX` table.
+# Compress and encrypt example
-To follow along with this example please feel free to download this zip archive that contains a copy of the python script and this writeup.
+_This example shows how to use various `#!python q` compression and encryption algorithms on a `#!python PyKX` table._
-Here are the various compression algorithms used and the compression levels that they can use.
+To follow along, download this zip archive that contains a copy of the python script and this writeup.
-algorithm | compression level
+Here are the compression algorithms and their levels:
+
+**Algorithm** | **Compression level**
--------- | -----------------
`ipc` | `0`
`gzip` | `0`-`9`
@@ -15,15 +23,15 @@ algorithm | compression level
## Quickstart
-This example can be ran by executing the `compress_and_encrypt.py` file.
+To run this example, execute the `#!python compress_and_encrypt.py` file.
-```
+```py
$ python compress_and_encrypt.py
```
## Outcome
-```
+```py
Writing in-memory trades table with gzip: {
"compressedLength": 12503352,
"uncompressedLength": 36666552,
diff --git a/docs/examples/db-management.ipynb b/docs/examples/db-management.ipynb
index 115d747..3360253 100644
--- a/docs/examples/db-management.ipynb
+++ b/docs/examples/db-management.ipynb
@@ -9,28 +9,28 @@
"\n",
"This notebook provides a walkthrough of some of the functionality available for users looking to create and maintain large databases using PyKX.\n",
"\n",
- "In particular, this notebook refers to creating and maintaining [partitioned kdb+ databases](https://code.kx.com/q/kb/partition/). Go to [Q for Mortals](https://code.kx.com/q4m3/14_Introduction_to_Kdb+/#143-partitioned-tables) for more in-depth information about partitioned databases in kdb+.\n",
+ "This notebook refers to creating and maintaining large [partitioned kdb+ databases](https://code.kx.com/q/kb/partition/) using PyKX. Go to [Q for Mortals](https://code.kx.com/q4m3/14_Introduction_to_Kdb+/#143-partitioned-tables) for more in-depth information about partitioned databases in kdb+.\n",
"\n",
- "You can download this walkthrough as a `.ipynb` notebook file using the following link.\n",
+ "You can download this walkthrough as a `.ipynb` notebook file.\n",
"\n",
"This walkthrough provides examples of the following tasks:\n",
"\n",
"1. Creating a database from a historical dataset\n",
"1. Adding a new partition to the database\n",
"1. Managing the on-disk database by:\n",
- " 1. Renaming a table and column\n",
- " 2. Creating a copy of a column to the database\n",
- " 3. Applying a Python function to a column of the database\n",
- " 4. Updating the data type of a column\n",
+ " - Renaming a table and column\n",
+ " - Creating a copy of a column to the database\n",
+ " - Applying a Python function to a column of the database\n",
+ " - Updating the data type of a column\n",
"1. Adding a new table to the most recent partition of the database\n",
"\n",
- "For full information on the functions available you can reference the [API section](https://code.kx.com/pykx/api/db.html).\n",
+ "For full information on the functions available, go to the [API section](https://code.kx.com/pykx/api/db.html).\n",
"\n",
"---\n",
"\n",
"## Initial setup\n",
"\n",
- "Import all required libraries and create a temporary directory which will be used to store the database we create for this walkthrough"
+ "Import all required libraries and create a temporary directory which will be used to store the database we create for this walkthrough."
]
},
{
@@ -45,7 +45,7 @@
"outputs": [],
"source": [
"import os\n",
- "os.environ['IGNORE_QHOME'] = '1' # Ignore symlinking PyKX q libraries to QHOME\n",
+ "os.environ['PYKX_IGNORE_QHOME'] = '1' # Ignore symlinking PyKX q libraries to QHOME\n",
"os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation."
]
},
@@ -99,7 +99,7 @@
"id": "143e0886",
"metadata": {},
"source": [
- "For details on any methods contained within this class, you can use the `help` method. "
+ "For details on any methods contained within this class, use the `help` method. "
]
},
{
@@ -312,7 +312,7 @@
"id": "c0ecec19",
"metadata": {},
"source": [
- "When a table is saved, an attribute is added to the `db` class for it. For our newly generated table, this is `db.trade_data`"
+ "When a table is saved, an attribute is added to the `db` class for it. For our newly generated table, this is `db.trade_data`."
]
},
{
@@ -896,7 +896,7 @@
"id": "4c44fab2",
"metadata": {},
"source": [
- "Renaming a column in a table is achieved using the `rename_column` method. For example, let's update the `sym` column in the `trade` table to be called `ticker`."
+ "To rename a column in a table, use the `rename_column` method. For example, let's rename the `sym` column (in the `trade` table) to `ticker`."
]
},
{
@@ -1465,7 +1465,7 @@
"id": "3c63e2bb",
"metadata": {},
"source": [
- "You can now apply a function to the copied column without the risk of losing the original data. Below we are modifying the copied column by multiplying the contents by 2."
+ "You can now apply a function to the copied column without the risk of losing the original data. Below, let's modify the copied column by multiplying the contents by 2."
]
},
{
@@ -2038,7 +2038,7 @@
"id": "119a373b",
"metadata": {},
"source": [
- "To convert the data type of a column, you can use the `set_column_type` method. Before we do that, we can look at the metadata information for the table using the `meta` method. \n"
+ "To convert the data type of a column, use the `set_column_type` method. Before we do that, let's look at the metadata information for the table using the `meta` method:\n"
]
},
{
@@ -2118,7 +2118,7 @@
"id": "ffad39b1",
"metadata": {},
"source": [
- "Currently the `size` column is the type `LongAtom`. We will update this to be a type `ShortAtom`."
+ "Currently the `size` column is the type `LongAtom`. Let's update this to be a type `ShortAtom`:"
]
},
{
@@ -2146,7 +2146,7 @@
"id": "319317bf",
"metadata": {},
"source": [
- "Now let's apply the `grouped` attribute to the size column. For more information on attributes in kdb+, please refer to the Q for Mortals [Attributes section](https://code.kx.com/q4m3/8_Tables/#88-attributes)."
+ "Now let's apply the `grouped` attribute to the size column. For more information on attributes in kdb+, refer to the Q for Mortals [Attributes section](https://code.kx.com/q4m3/8_Tables/#88-attributes)."
]
},
{
@@ -2254,9 +2254,9 @@
"id": "e75b07ae",
"metadata": {},
"source": [
- "## Onboarding your next table\n",
+ "## Onboard your next table\n",
"\n",
- "Now that you have successfully set up one table, you may want to add a second table. We follow the same method as before and create the `quotes` table using the `create` method. In this example, the `quotes` table only contains data for `2020.01.03`."
+ "Now that you have successfully set up one table, you may want to add a second table. We follow the same method as before and create the `quotes` table using the `create` method. In this example, the `quotes` table only contains data for `2020.01.03`:"
]
},
{
@@ -2298,7 +2298,7 @@
"id": "87670793",
"metadata": {},
"source": [
- "All tables within a database must contain the same partition structure. To ensure the new table can be accessed, the `quotes` table needs to exist in every partition within the database, even if there is no data for that partition. This is called backfilling data. For the partitions where the `quotes` table is missing, we use the `fill_database` method. "
+ "All tables within a database must contain the same partition structure. To ensure you can access the new table, the `quotes` table needs to exist in every partition within the database, even if there is no data for that partition. This is called backfilling data. For the partitions where the `quotes` table is missing, we use the `fill_database` method:"
]
},
{
@@ -2325,7 +2325,7 @@
"id": "e41e8589",
"metadata": {},
"source": [
- "Now that the database has resolved the missing tables within the partitions, we can view the new `quotes` table"
+ "Now that the database has resolved the missing tables within the partitions, we can view the new `quotes` table:"
]
},
{
@@ -2630,7 +2630,7 @@
"id": "43366fab",
"metadata": {},
"source": [
- "Finally, to view the amount of saved data you can count the number of rows per partition using `partition_count`"
+ "Finally, to view the amount of saved data, count the number of rows per partition using `partition_count`:"
]
},
{
@@ -2698,7 +2698,7 @@
"id": "b03cfb4b",
"metadata": {},
"source": [
- "## Cleanup temporary database created"
+ "## Clean up temporary database created"
]
},
{
diff --git a/docs/examples/interface-overview.ipynb b/docs/examples/interface-overview.ipynb
new file mode 100644
index 0000000..939450c
--- /dev/null
+++ b/docs/examples/interface-overview.ipynb
@@ -0,0 +1,1150 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# PyKX introduction notebook\n",
+ "\n",
+ "_The purpose of this notebook is to introduce you to PyKX capabilities and functionality._\n",
+ "\n",
+ "For the best experience, visit [what is PyKX](https://code.kx.com/pykx/2.5/getting-started/what_is_pykx.html) and the [quickstart guide](https://code.kx.com/pykx/2.5/getting-started/quickstart.html) first.\n",
+ "\n",
+ "To follow along, we recommend to download the notebook. \n",
+ "\n",
+ "Now let's go through the following sections:\n",
+ "\n",
+ "1. [Import PyKX](#1-import-pykx)\n",
+ "1. [Basic PyKX data structures](#2-basic-pykx-data-structures)\n",
+ "1. [Access and create PyKX objects](#3-access-and-create-pykx-objects)\n",
+ "1. [Run analytics on PyKX objects](#4-run-analytics-on-pykx-objects)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 1. Import PyKX\n",
+ "\n",
+ "To access PyKX and its functions, import it in your Python code as follows:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "tags": [
+ "hide_code"
+ ]
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "os.environ['PYKX_IGNORE_QHOME'] = '1' # Ignore symlinking PyKX q libraries to QHOME\n",
+ "os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pykx as kx\n",
+ "kx.q.system.console_size = [10, 80]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Tip: We recommend to always use `import pykx as kx`. The shortened import name `kx` makes the code more readabile and is standard for the PyKX library. \n",
+ "\n",
+ "Below we load additional libraries used through this notebook:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 2. Basic PyKX data structures\n",
+ "\n",
+ "Central to your interaction with PyKX are the data types supported by the library. PyKX is built atop the `q` programming language. This provides small footprint data structures for analytic calculations and the creation of highly-performant databases. The types we show below are generated from Python-equivalent types.\n",
+ "\n",
+ "This section describes the basic elements in the PyKX library and explains why/how they are different:\n",
+ "\n",
+ "- 2.1 [Atom](#21-atom)\n",
+ "- 2.2 [Vector](#22-vector)\n",
+ "- 2.3 [List](#23-list)\n",
+ "- 2.4 [Dictionary](#24-dictionary)\n",
+ "- 2.5 [Table](#25-table)\n",
+ "- 2.6 [Other data types](#26-other-data-types)\n",
+ "\n",
+ "\n",
+ "### 2.1 Atom\n",
+ "\n",
+ "In PyKX, an `atom` is a single irreducible value of a specific type. For example, you may come across `pykx.FloatAtom` or `pykx.DateAtom` objects which may have been generated as follows, from an equivalent Pythonic representation. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.FloatAtom(1.0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from datetime import date\n",
+ "kx.DateAtom(date(2020, 1, 1))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 2.2 Vector\n",
+ "\n",
+ "Like PyKX atoms, PyKX `Vectors` are a data structure with multiple elements of a single type. These objects in PyKX, along with lists described below, form the basis for most of the other important data structures that you will encounter including dictionaries and tables.\n",
+ "\n",
+ "Vector objects provide significant benefits when applying analytics over Python lists. Like Numpy, PyKX gains from the underlying speed of its analytic engine when operating on these strictly typed objects.\n",
+ "\n",
+ "Vector type objects are always 1-D and are/can be indexed along a single axis.\n",
+ "\n",
+ "In the following example, we create PyKX vectors from common Python equivalent `numpy` and `pandas` objects:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.IntVector(np.array([1, 2, 3, 4], dtype=np.int32))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.toq(pd.Series([1, 2, 3, 4]))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 2.3 List\n",
+ "\n",
+ "A PyKX `List` is an untyped vector object. Unlike vectors which are optimised for the performance of analytics, lists are mostly used for storing reference information or matrix data.\n",
+ "\n",
+ "Unlike vector objects which are 1-D in shape, lists can be ragged N-Dimensional objects. This makes them useful for storing complex data structures, but limits their performance when dealing with data-access/data modification tasks."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.List([[1, 2, 3], [1.0, 1.1, 1.2], ['a', 'b', 'c']])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 2.4 Dictionary\n",
+ "\n",
+ "A PyKX `Dictionary` is a mapping between a direct key-value association. The list of keys and values to which they are associated must have the same count. While it can be considered as a key-value pair, it's physically stored as a pair of lists."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.Dictionary({'x': [1, 2, 3], 'x1': np.array([1, 2, 3])})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 2.5 Table\n",
+ "\n",
+ "PyKX `Tables` are a first-class typed entity which lives in memory. They're a collection of named columns implemented as a dictionary. This mapping construct means that PyKX tables are column oriented. This makes analytic operations on columns much faster than for a relational database equivalent.\n",
+ "\n",
+ "PyKX Tables come in many forms, but the key table types are as follows:\n",
+ "\n",
+ " - `pykx.Table` \n",
+ " - `pykx.KeyedTable`\n",
+ " - `pykx.SplayedTable`\n",
+ " - `pykx.PartitionedTable`\n",
+ "\n",
+ "In this section we exemplify the first two, which are the in-memory data table types.\n",
+ "\n",
+ "#### pykx.Table"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(kx.Table([[1, 2, 'a'], [2, 3, 'b'], [3, 4, 'c']], columns = ['col1', 'col2', 'col3']))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(kx.Table(data = {'col1': [1, 2, 3], 'col2': [2 , 3, 4], 'col3': ['a', 'b', 'c']}))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### pykx.KeyedTable"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.Table([[1, 2, 'a'], [2, 3, 'b'], [3, 4, 'c']],\n",
+ " columns = ['col1', 'col2', 'col3'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.Table(data = {\n",
+ " 'col1': [1, 2, 3],\n",
+ " 'col2': [2 , 3, 4],\n",
+ " 'col3': ['a', 'b', 'c']})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "##### `pykx.KeyedTable`\n",
+ "\n",
+ "[pykx.KeyedTable](../api/pykx-q-data/wrappers.html#pykx.wrappers.KeyedTable)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.Table(data = {'x': [1, 2, 3], 'x1': [2, 3, 4], 'x2': ['a', 'b', 'c']}\n",
+ " ).set_index(['x'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 2.6 Other data types\n",
+ "\n",
+ "Below we outlined some of the important PyKX data type structures that you will run into through the rest of this notebook.\n",
+ "\n",
+ "#### pykx.Lambda\n",
+ "\n",
+ "A `pykx.Lambda` is the most basic kind of function within PyKX. They take between 0 and 8 parameters and are the building blocks for most analytics written by users when interacting with data from PyKX."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pykx_lambda = kx.q('{x+y}')\n",
+ "type(pykx_lambda)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pykx_lambda(1, 2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### pykx.Projection\n",
+ "\n",
+ "Like [functools.partial](https://docs.python.org/3/library/functools.html#functools.partial), functions in PyKX can have some of their parameters set in advance, resulting in a new function, which is called a projection. When you call this projection, the set parameters are no longer required and cannot be provided.\n",
+ "\n",
+ "If the original function had `n` total parameters and `m` provided, the result would be a function (projection) that requires the user to input `n-m` parameters."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "projection = kx.q('{x+y}')(1)\n",
+ "projection"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "projection(2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 3. Access and create PyKX objects\n",
+ "\n",
+ "Now that you're familiar with the PyKX object types, let's see how they work in real-world scenarios, such as:\n",
+ "\n",
+ "- 3.1 [Create PyKX objects from Pythonic data types](#31-create-pykx-objects-from-pythonic-data-types)\n",
+ "- 3.2 [Random data generation](#32-random-data-generation)\n",
+ "- 3.3 [Run q code to generate data](#33-run-q-code-to-generate-data)\n",
+ "- 3.4 [Read data from a CSV file](#34-read-data-from-a-csv-file)\n",
+ "- 3.5 [Query external processes via IPC](#35-query-external-processes-via-ipc)\n",
+ "\n",
+ "### 3.1 Create PyKX objects from Pythonic data types\n",
+ "\n",
+ "One of the most common ways to generate PyKX data is by converting equivalent Pythonic data types. PyKX natively supports conversions to and from the following common Python data formats:\n",
+ "\n",
+ "- Python\n",
+ "- Numpy\n",
+ "- Pandas\n",
+ "- PyArrow\n",
+ "\n",
+ "You can generate PyKX objects by using the `kx.toq` PyKX function:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pydict = {'a': [1, 2, 3], 'b': ['a', 'b', 'c'], 'c': 2}\n",
+ "kx.toq(pydict)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nparray = np.array([1, 2, 3, 4], dtype = np.int32)\n",
+ "kx.toq(nparray)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pdframe = pd.DataFrame(data = {'a':[1, 2, 3], 'b': ['a', 'b', 'c']})\n",
+ "kx.toq(pdframe)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 3.2 Random data generation\n",
+ "\n",
+ "PyKX provides a module to create random data of user-specified PyKX types or their equivalent Python types. The creation of random data helps in prototyping analytics.\n",
+ "\n",
+ "As a first example, generate a list of 1,000,000 random floating-point values between 0 and 1 as follows:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.random.random(1000000, 1.0)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "If you wish to choose values randomly from a list, use the list as the second argument to your function:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.random.random(5, [kx.LongAtom(1), ['a', 'b', 'c'], np.array([1.1, 1.2, 1.3])])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Random data does not only come in 1-Dimensional forms. To create multi-Dimensional PyKX Lists, turn the first argument into a list. The following examples include a PyKX trick that uses nulls/infinities to generate random data across the full allowable range:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.random.random([2, 5], kx.GUIDAtom.null)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.random.random([2, 3, 4], kx.IntAtom.inf)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Finally, to have consistency over the generated objects, set the seed for the random data generation explicitly. You can complete this globally or for individual function calls:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.random.seed(10)\n",
+ "kx.random.random(10, 2.0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.random.random(10, 2.0, seed = 10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 3.3 Run q code to generate data\n",
+ "\n",
+ "PyKX is an entry point to the vector programming language q. This means that PyKX users can execute q code directly via PyKX within a Python session, by calling `kx.q`.\n",
+ "\n",
+ "For example, to create q data, run the following command:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.q('0 1 2 3 4')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.q('([idx:desc til 5]col1:til 5;col2:5?1f;col3:5?`2)')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Next, apply arguments to a user-specified function `x+y`:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.q('{x+y}', kx.LongAtom(1), kx.LongAtom(2))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 3.4 Read data from a CSV file\n",
+ "\n",
+ "A lot of data that you run into for data analysis tasks comes in the form of CSV files. PyKX, like Pandas, provides a CSV reader called via `kx.q.read.csv`. In the next cell we create a CSV that can be read in PyKX:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import csv\n",
+ "\n",
+ "with open('pykx.csv', 'w', newline='') as file:\n",
+ " writer = csv.writer(file)\n",
+ " field = [\"name\", \"age\", \"height\", \"country\"]\n",
+ " \n",
+ " writer.writerow(field)\n",
+ " writer.writerow([\"Oladele Damilola\", \"40\", \"180.0\", \"Nigeria\"])\n",
+ " writer.writerow([\"Alina Hricko\", \"23\", \"179.2\", \"Ukraine\"])\n",
+ " writer.writerow([\"Isabel Walter\", \"50\", \"179.5\", \"United Kingdom\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.q.read.csv('pykx.csv', types = {'age': kx.LongAtom, 'country': kx.SymbolAtom})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "os.remove('pykx.csv')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 3.5 Query external processes via IPC\n",
+ "\n",
+ "One of the most common usage patterns in organizations with access to data in kdb+/q is to query data from an external server process infrastructure. For the example below you need to [install q](https://kx.com/kdb-insights-personal-edition-license-download/).\n",
+ "\n",
+ "First, set up a q/kdb+ server. Set it on port 5050 and populate it with some data in the form of a table `tab`:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import subprocess\n",
+ "import time\n",
+ "\n",
+ "try:\n",
+ " with kx.PyKXReimport():\n",
+ " proc = subprocess.Popen(\n",
+ " ('q', '-p', '5000')\n",
+ " )\n",
+ " time.sleep(2)\n",
+ "except:\n",
+ " raise kx.QError('Unable to create q process on port 5000')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Once a q process is available, connect to it for synchronous query execution:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "conn = kx.SyncQConnection(port = 5000)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can now run q commands against the q server:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "conn('tab:([]col1:100?`a`b`c;col2:100?1f;col3:100?0Ng)')\n",
+ "conn('select from tab where col1=`a')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Alternatively, use the PyKX query API:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "conn.qsql.select('tab', where=['col1=`a', 'col2<0.3'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Or use PyKX's context interface to run SQL server side if you have access to it:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "conn('\\l s.k_')\n",
+ "conn.sql('SELECT * FROM tab where col2>=0.5')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Finally, shut down the q server used for this demonstration:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "proc.kill()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 4. Run analytics on PyKX objects\n",
+ "\n",
+ "Like many Python libraries (including Numpy and Pandas), PyKX provides many ways to use its data with analytics that you generated and defined within the library. Let's explore the following:\n",
+ "\n",
+ "- 4.1 [Use in-built methods on PyKX Vectors](#41-use-in-built-methods-on-pykx-vectors)\n",
+ "- 4.2 [Use in-built methods on PyKX Tables](#42-use-in-built-methods-on-pykx-tables)\n",
+ "- 4.3 [Use PyKX/q native functions](#43-use-pykxq-native-functions)\n",
+ "\n",
+ "\n",
+ "### 4.1 Use in-built methods on PyKX Vectors\n",
+ "\n",
+ "When you interact with PyKX Vectors, you may wish to gain insights into these objects through the application of basic analytics such as calculation of the `mean`/`median`/`mode` of the vector:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "q_vector = kx.random.random(1000, 10.0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "q_vector.mean()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "q_vector.max()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The above is useful for basic analysis. For bespoke analytics on these vectors, use the `apply` method:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def bespoke_function(x, y):\n",
+ " return x*y\n",
+ "\n",
+ "q_vector.apply(bespoke_function, 5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 4.2 Use in-built methods on PyKX Tables\n",
+ "\n",
+ "In addition to the vector processing capabilities of PyKX, it's important to have the ability to manage tables. Highlighted in depth within the Pandas-Like API documentation [here](../user-guide/advanced/Pandas_API.ipynb), these methods allow you to apply functions and gain insights into your data in a familiar way.\n",
+ "\n",
+ "The example below uses combinations of the most used elements of this Table API operating on the following table:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "N = 1000000\n",
+ "example_table = kx.Table(data = {\n",
+ " 'sym' : kx.random.random(N, ['a', 'b', 'c']),\n",
+ " 'col1' : kx.random.random(N, 10.0),\n",
+ " 'col2' : kx.random.random(N, 20)\n",
+ " }\n",
+ ")\n",
+ "example_table"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can search for and filter data within your tables using `loc` similarly to how this is achieved by Pandas:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "example_table.loc[example_table['sym'] == 'a']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This also happens when retrieving data from a table through the `__get__` method:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "example_table[example_table['sym'] == 'b']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Next, you can set the index columns of a table. In PyKX, this means converting the table from a `pykx.Table` object to a `pykx.KeyedTable` object:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "example_table.set_index('sym')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Or you can apply basic data manipulation operations such as `mean` and `median`:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print('mean:')\n",
+ "display(example_table.mean(numeric_only = True))\n",
+ "\n",
+ "print('median:')\n",
+ "display(example_table.median(numeric_only = True))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Next, use the `groupby` method to group PyKX tabular data so you can use it for analytic purposes.\n",
+ "\n",
+ "In the first example, let's start by grouping the dataset based on the `sym` column and calculate the `mean` for each column based on their `sym`:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "example_table.groupby('sym').mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "To extend the above `groupby`, consider a more complex example which uses `numpy` to run calculations on the PyKX data. You will notice later that you can simplify this specific use-case further."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def apply_func(x):\n",
+ " nparray = x.np()\n",
+ " return np.sqrt(nparray).mean()\n",
+ "\n",
+ "example_table.groupby('sym').apply(apply_func)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For time-series specific joining of data, use `merge_asof` joins. In this example, you have several tables with temporal information namely a `trades` and `quotes` table:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "trades = kx.Table(data={\n",
+ " \"time\": [\n",
+ " pd.Timestamp(\"2016-05-25 13:30:00.023\"),\n",
+ " pd.Timestamp(\"2016-05-25 13:30:00.023\"),\n",
+ " pd.Timestamp(\"2016-05-25 13:30:00.030\"),\n",
+ " pd.Timestamp(\"2016-05-25 13:30:00.041\"),\n",
+ " pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n",
+ " pd.Timestamp(\"2016-05-25 13:30:00.049\"),\n",
+ " pd.Timestamp(\"2016-05-25 13:30:00.072\"),\n",
+ " pd.Timestamp(\"2016-05-25 13:30:00.075\")\n",
+ " ],\n",
+ " \"ticker\": [\n",
+ " \"GOOG\",\n",
+ " \"MSFT\",\n",
+ " \"MSFT\",\n",
+ " \"MSFT\",\n",
+ " \"GOOG\",\n",
+ " \"AAPL\",\n",
+ " \"GOOG\",\n",
+ " \"MSFT\"\n",
+ " ],\n",
+ " \"bid\": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],\n",
+ " \"ask\": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03]\n",
+ "})\n",
+ "quotes = kx.Table(data={\n",
+ " \"time\": [\n",
+ " pd.Timestamp(\"2016-05-25 13:30:00.023\"),\n",
+ " pd.Timestamp(\"2016-05-25 13:30:00.038\"),\n",
+ " pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n",
+ " pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n",
+ " pd.Timestamp(\"2016-05-25 13:30:00.048\")\n",
+ " ],\n",
+ " \"ticker\": [\"MSFT\", \"MSFT\", \"GOOG\", \"GOOG\", \"AAPL\"],\n",
+ " \"price\": [51.95, 51.95, 720.77, 720.92, 98.0],\n",
+ " \"quantity\": [75, 155, 100, 100, 100]\n",
+ "})\n",
+ "\n",
+ "print('trades:')\n",
+ "display(trades)\n",
+ "print('quotes:')\n",
+ "display(quotes)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "When applying the `asof` join, you can additionally use named arguments to make a distinction between the tables that the columns originate from. In this case, suffix with `_trades` and `_quotes`:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "trades.merge_asof(quotes, on='time', suffixes=('_trades', '_quotes'))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 4.3 Use PyKX/q native functions\n",
+ "\n",
+ "While the Pandas-like API and methods provided off PyKX Vectors provides an effective method of applying analytics on PyKX data, the most efficient and performant way to run analytics on your data is by using PyKX/q primitives available through the `kx.q` module.\n",
+ "\n",
+ "These include functionality for calculating moving averages, asof/window joins, column reversal etc. Now let's see a few examples with how you can use these functions, grouped into the following sections:\n",
+ "\n",
+ "- 4.3.1 [Mathematical functions](#431-mathematical-functions)\n",
+ "- 4.3.2 [Iteration functions](#432-iteration-functions)\n",
+ "- 4.3.3 [Table functions](#433-table-functions)\n",
+ "\n",
+ "#### 4.3.1 Mathematical functions\n",
+ "\n",
+ "##### mavg\n",
+ "\n",
+ "Calculate a series of average values across a list using a rolling window:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.q.mavg(10, kx.random.random(10000, 2.0))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "##### cor\n",
+ "\n",
+ "Calculate the correlation between two lists:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.q.cor([1, 2, 3], [2, 3, 4])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.q.cor(kx.random.random(100, 1.0), kx.random.random(100, 1.0))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "##### prds\n",
+ "\n",
+ "Calculate the cumulative product across a supplied list:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.q.prds([1, 2, 3, 4, 5])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### 4.3.2 Iteration functions\n",
+ "\n",
+ "##### each\n",
+ "\n",
+ "Supplied both as a standalone primitive and as a method for PyKX Lambdas `each` allows you to pass individual elements of a PyKX object to a function:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.q.each(kx.q('{prd x}'), kx.random.random([5, 5], 10.0, seed=10))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.q('{prd x}').each(kx.random.random([5, 5], 10.0, seed=10))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### 4.3.3 Table functions\n",
+ "\n",
+ "##### meta\n",
+ "\n",
+ "Retrieve metadata information about a table:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "qtab = kx.Table(data = {\n",
+ " 'x' : kx.random.random(1000, ['a', 'b', 'c']).grouped(),\n",
+ " 'y' : kx.random.random(1000, 1.0),\n",
+ " 'z' : kx.random.random(1000, kx.TimestampAtom.inf)\n",
+ "})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.q.meta(qtab)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "##### xasc\n",
+ "\n",
+ "Sort the contents of a specified column in ascending order:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.q.xasc('z', qtab)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can find the full list of the functions and some examples of their usage [here](../api/pykx-execution/q.md).\n",
+ "\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "file_extension": ".py()",
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ },
+ "mimetype": "text/x-python",
+ "name": "python",
+ "npconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": 3
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/docs/examples/ipc/README.md b/docs/examples/ipc/README.md
index 4477213..d9b6f5c 100644
--- a/docs/examples/ipc/README.md
+++ b/docs/examples/ipc/README.md
@@ -1,22 +1,33 @@
+---
+title: PyKX IPC interface
+description: IPC interface example
+date: October 2024
+author: KX Systems, Inc.,
+tags: compression, encryption, PyKX
+---
+
# IPC interface example
-The purpose of this example is to provide a quickstart for interfacing with external q processes using PyKX. The example presented should operate in the presence or absence of a `k4.lic`, and as such is intended to show the flexibility of this interface for users who had previously used both PyKX and those who are familiar with qPython.
+_This example provides a quickstart for interfacing with external q processes using PyKX._
+
+This example should work whether or not a `#!python k4.lic` file is present. The purpose is to show the flexibility of this interface for users who had previously used PyKX or are familiar with qPython.
-To follow along with this example please feel free to download this zip archive that contains a copy of the python script and this writeup.
+To follow along, feel free to download this zip archive that contains a copy of the Python script and this writeup.
## Quickstart
-This example shows a basic tickerplant configured as follows
+This example shows a basic tickerplant configured as follows:
![tick](./imgs/tickerplant.png)
Here we have:
-1. A q data feed publishing trade messages to a tick process
-2. A q process running a modified tick.q
-3. A Python process subscribing to the tick process, running a Python analytic on the trade data and pushing the results to another process
-4. A q process to which the results of the Python analytic can be pushed
-For more information about the differences between the licensed and unlicensed version of this example please consult `readwrite.py` this has a breakdown of the steps taken in the presence/absence of a licensed shared object.
+1. A q data feed publishing trade messages to a tick process.
+2. A q process running a modified `#!python tick.q`.
+3. A Python process subscribing to the tick process, running a Python analytic on the trade data and pushing the results to another process.
+4. A q process to which the results of the Python analytic can be pushed.
+
+!!! tip "For more information about the differences between the licensed and unlicensed versions of this example consult `#!python readwrite.py` for a breakdown of the steps taken in the presence/absence of a licensed shared object."
### Start the required q processes
@@ -35,6 +46,7 @@ q)
```
### Start the pykx subscriber/publisher
+
```bash
// When running with a valid k4.lic in $QHOME
$ python readwrite.py
@@ -47,7 +59,7 @@ Running example in absence of licensed q
### Outcome
-What should be observed on invocation of the above is that the process running on 5130 should begin to receive summaries of the average size/price of the individual tick symbols being published. The licensed and unlicensed versions are not the same in this regard.
+On invocation of the above, the process running on 5130 should begin to receive summaries of the average size/price of the individual tick symbols being published. The licensed and unlicensed versions are not the same in this regard.
1. The licensed version will return the average over the entire trade table that it is subscribed to
2. The unlicensed version will display the the average over the most recent batch of data received
diff --git a/docs/examples/jupyter-integration.ipynb b/docs/examples/jupyter-integration.ipynb
new file mode 100644
index 0000000..c908059
--- /dev/null
+++ b/docs/examples/jupyter-integration.ipynb
@@ -0,0 +1,638 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "5c1e9b66",
+ "metadata": {},
+ "source": [
+ "# Jupyter Notebooks\n",
+ "\n",
+ "_This notebook demonstrates how to use the q Magic command in a Jupyter notebook._\n",
+ "\n",
+ "\n",
+ "The Jupyter q magic command in PyKX allows you to execute q code within a Jupyter notebook. It provides seamless integration with the q programming language.\n",
+ "\n",
+ "This example Notebook has the following sections:\n",
+ "\n",
+ "1. [Import PyKX](#1-import-pykx)\n",
+ "1. [Create the external q process](#2-create-the-external-q-process)\n",
+ "1. [Execute against Embedded q](#3-execute-against-embedded-q)\n",
+ "1. [SQL interface](#4-sql-interface)\n",
+ "1. [q namespaces](#5-q-namespaces)\n",
+ "1. [(Advanced) q over IPC](#6-advanced-q-over-ipc)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e2748405",
+ "metadata": {
+ "tags": [
+ "hide_code"
+ ]
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "os.environ['PYKX_IGNORE_QHOME'] = '1' # Ignore symlinking PyKX q libraries to QHOME\n",
+ "os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "688b9ed0",
+ "metadata": {},
+ "source": [
+ "## 1. Import PyKX\n",
+ "\n",
+ "To run this example, first import the PyKX library:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d4d3694e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pykx as kx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9c520c21",
+ "metadata": {},
+ "source": [
+ "## 2. Create the external q process\n",
+ "\n",
+ "You can run an external q process by using the following Python code:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "57e66aca",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import subprocess\n",
+ "import time\n",
+ "\n",
+ "try:\n",
+ " with kx.PyKXReimport():\n",
+ " proc = subprocess.Popen(\n",
+ " ('q', '-p', '5000')\n",
+ " )\n",
+ " time.sleep(2)\n",
+ "except:\n",
+ " raise kx.QError('Unable to create q process on port 5000')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1b318ba2",
+ "metadata": {},
+ "source": [
+ "\n",
+ "Or execute this command in a terminal:\n",
+ "\n",
+ "```sh\n",
+ "$ q -p 5000\n",
+ "```\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bc7219fb",
+ "metadata": {},
+ "source": [
+ "## 3. Execute against Embedded q\n",
+ "\n",
+ "To execute q code within PyKX's `EmbeddedQ` module, type `%%q` at the beginning of the cell:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ff309a5a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%q\n",
+ "til 10"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "89ec26e4",
+ "metadata": {},
+ "source": [
+ "After `%%q` you can further add two execution options:\n",
+ "\n",
+ "| **Execution option** | **Description** |\n",
+ "|---------------|----------------------------------------------------|\n",
+ "| --debug | Prints the q backtrace before raising a QError if the cell gives an error.|\n",
+ "| --display | Calls display rather than the default print on returned objects.|"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2c168914",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%q\n",
+ "([] a: 1 2 3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4c5c2f85",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%q --display\n",
+ "([] a: 1 2 3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cda0d38a",
+ "metadata": {},
+ "source": [
+ "#### Executing against an external q process over IPC\n",
+ "\n",
+ "Connection information can also be included after the `%%q` to connect to a remote `q` process over\n",
+ "IPC.\n",
+ "\n",
+ "Here is the list of currently supported connection parameters.\n",
+ "If they specify a type a second value is expected to follow them to be used as the parameter.\n",
+ "If no type follows them they can be used as a stand alone flag.\n",
+ "\n",
+ "```\n",
+ "--host: A string object denoting the host to connect to\n",
+ "--port: An int object denoting the port to connect over\n",
+ "--user: A str object denoting the username to use when connecting\n",
+ "--password: A str object denoting the password to use when connecting\n",
+ "--timeout: A float object denoting the time in seconds before the query\n",
+ " times out, defaults to no timeout\n",
+ "--nolarge: Disable messages over 2GB being sent / received\n",
+ "--tls: Use a tls connection\n",
+ "--unix: Use a unix connection\n",
+ "--reconnection_attempts: An int object denoting how many\n",
+ " reconnection attempts to make\n",
+ "--noctx: Disable the context interface\n",
+ "```\n",
+ "\n",
+ "Connect to a q server running on `localhost` at port `5001` as `user` using password `password`\n",
+ "and disable the context interface."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1faca1e1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%q --host localhost --port 5000 --user user --pass password --noctx\n",
+ "til 10"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f046ebb6",
+ "metadata": {},
+ "source": [
+ "All connection arguments are optional with the exception of the `--port` argument. If `--host` is not provided `localhost` will be used as the default host."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "615d7d2e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%q --port 5000\n",
+ "tab:([]a:1000?1000; b:1000?500.0; c:1000?`AAPL`MSFT`GOOG);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d756f342",
+ "metadata": {},
+ "source": [
+ "It is possible to execute `q` code spanning multiple lines."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c739a80a",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "%%q --port 5000\n",
+ "afunc: {[x; y]\n",
+ " x + y \n",
+ " };\n",
+ "afunc[0; 1]\n",
+ "afunc[2; 3]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2905895e",
+ "metadata": {},
+ "source": [
+ "## 4. SQL interface\n",
+ "\n",
+ "The `s)` syntax runs SQL queries against local tables within the `q` process.\n",
+ "\n",
+ "Note: To use the SQL interface, first you need to load the `s.k_` library."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "56220bb5",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "%%q\n",
+ "\\l s.k_\n",
+ "tab:([]a:1000?1000; b:1000?500.0; c:1000?`AAPL`MSFT`GOOG);\n",
+ "s) select * from tab where a>500 and b<250.0 limit 5"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "da906296",
+ "metadata": {},
+ "source": [
+ "## 5. q namespaces\n",
+ "\n",
+ "You can use `q` namespaces, and switch between them with `\\d`.\n",
+ "\n",
+ "Note: The namespace is reset back to the base namespace `.` between cells."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "502af937",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%q\n",
+ "\\d .example\n",
+ "f: {[x] til x};"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "58d0c7c9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%q\n",
+ "\\d\n",
+ ".example.f[10]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "52ca850e",
+ "metadata": {},
+ "source": [
+ "## 6. (Advanced) q over IPC\n",
+ "\n",
+ "After `%%q` you can include connection information, if you wish to connect to a remote `q` process over IPC. \n",
+ "\n",
+ "The list of supported connection parameters is below. The rule is:\n",
+ "\n",
+ "- If they have a type, it must be followed by a second value/parameter.\n",
+ "- If there's no type after them, you can use them as a standalone flag.\n",
+ "\n",
+ "| **Parameter** | **Object type and description**|\n",
+ "|-----------------------|-----------------------------------------------|\n",
+ "|--host | (string) The host to connect to. |\n",
+ "|--port | (integer) The port to connect over. |\n",
+ "|--user | (string) The username to use when connecting. |\n",
+ "|--password | (string) The password to use when connecting. |\n",
+ "|--timeout | (float) The time in seconds before the query times out. Defaults to no timeout.|\n",
+ "|--nolarge | Disable messages over 2GB being sent / received. |\n",
+ "|--tls | Use a tls connection. |\n",
+ "|--unix | Use a unix connection. |\n",
+ "|--reconnection_attempts| (integer) How many reconnection attempts to make.|\n",
+ "|--noctx | Disable the context interface. |\n",
+ "\n",
+ "Connect to a q server running on `localhost` at port `5000` as `user` using password `password`\n",
+ "and disable the context interface."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a282e069",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%q --host localhost --port 5000 --user user --pass password --noctx\n",
+ "til 10"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a1fe3b8e",
+ "metadata": {},
+ "source": [
+ "All connection arguments are optional, except the `--port` argument. If `--host` is not provided `localhost` is the default host."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "18d8416b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%q --port 5000\n",
+ "tab:([]a:1000?1000; b:1000?500.0; c:1000?`AAPL`MSFT`GOOG);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e143c382",
+ "metadata": {},
+ "source": [
+ "Note that it's possible to execute `q` code spanning multiple lines:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ccb197e8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%q --port 5000\n",
+ "afunc: {[x; y]\n",
+ " x + y \n",
+ " };\n",
+ "afunc[0; 1]\n",
+ "afunc[2; 3]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c12a7d38",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Shutdown the q process we were connected to for the IPC demo\n",
+ "proc.kill()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "607997ac-e7d7-4cc9-a06f-aa1cd3d742ce",
+ "metadata": {},
+ "source": [
+ "#### q first mode\n",
+ "q first mode can be enabled by importing PyKX after setting the environment variable `PYKX_JUPYTERQ` to `true`, or at runtime use:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1aaa78ff-d3a7-4c57-8064-57994d0cbd9f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kx.util.jupyter_qfirst_enable()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d7c06591-e272-4a7c-9fcb-278687c33598",
+ "metadata": {},
+ "source": [
+ "Once enabled, you can call `q` code without needing to include `%%q` at the beginning of a cell."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c8575f62-a801-42cd-bd45-07b153c513e6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "t:3?15t*3\n",
+ "t"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "dec08871-a771-4848-8283-f88206f54785",
+ "metadata": {},
+ "source": [
+ "In this state, you can execute Python code as well, but those cells must include `%%python`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "99ba8ac4-4c82-4819-846d-0a835feeb869",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%python\n",
+ "for fruit in ['apple', 'orange', 'banana']:\n",
+ " print(fruit)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6236ce20-598b-4216-bd53-ad2ccffc539d",
+ "metadata": {},
+ "source": [
+ "If you wish to exit q first mode, simply run the following code and the notebook will revert back to default, Python first execution. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c989a184-1990-4492-9344-3eeb5d673d36",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%python\n",
+ "kx.util.jupyter_qfirst_disable()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2b2ae217-4c0f-47b1-be8c-1e43b1955c52",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for x in range(3):\n",
+ " print(x * 1.5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9a8e9850-2f75-462c-adc0-ebaf24c70744",
+ "metadata": {},
+ "source": [
+ "To enable qfirst mode from q, run the following."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d68fe60b-0abc-4171-b6e2-8487d483f28f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%q\n",
+ ".pykx.enableJupyter()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ea6d07a7-3c7c-4eab-8312-4e7800fca766",
+ "metadata": {},
+ "source": [
+ "And to return to Python first execution run the code below."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e5110ccd-d117-45a4-be6a-002ff8627372",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ ".pykx.disableJupyter()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "080dd085-e54f-478c-a305-eac9f23db020",
+ "metadata": {},
+ "source": [
+ "#### Saving code blocks\n",
+ "The `--save` feature allows user to save code in a cell as a q file.\n",
+ "\n",
+ "To use this feature, include `--save` followed by the `path` of the file.\n",
+ "\n",
+ "*Note:* If the `q` script errors the file will not be saved.\n",
+ "\n",
+ "*Note:* Using `--save` on an IPC connection cell will save the file on the remote host."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f69436e6-8ce2-4583-befb-69310be60bae",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%q --save ../../new_file.q\n",
+ "vals:til 10\n",
+ "vals * 3"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5f96985a-235b-4d01-bd5d-38ed4122d28e",
+ "metadata": {},
+ "source": [
+ "If the user wants to save a code block without executing them first, they can include `--execute False` at beginning of a cell.\n",
+ "\n",
+ "*Note:* Nothing is outputted when the code below is ran."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "db295305-2006-494d-8f2d-3e81cac0226f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%q --save ../../new_file.q --execute False\n",
+ "new_val:3 6 9\n",
+ "new_val"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ca9cd0ae-e726-48a8-bdc7-9c2df40d95d7",
+ "metadata": {},
+ "source": [
+ "File paths that end in `.q_` will automatically be created as locked files without the need for any additional input."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0251b387-ed38-45d0-acaa-e9bcaaebc3fe",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%q --save ../../new_secretfile.q_\n",
+ "pub_vals:til 10\n",
+ "secret_func:{x+7}\n",
+ "secret_func pub_vals"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3047a416",
+ "metadata": {
+ "tags": [
+ "hide_code"
+ ]
+ },
+ "outputs": [],
+ "source": [
+ "os.remove('../../new_file.q')\n",
+ "os.remove('../../new_secretfile.q_')"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/examples/server/server.md b/docs/examples/server/server.md
index 989e63c..4e34eb8 100644
--- a/docs/examples/server/server.md
+++ b/docs/examples/server/server.md
@@ -1,35 +1,42 @@
-# Using PyKX as a `q` Server
+---
+title: PyKX as q server
+description: PyKX as q server example
+date: October 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, server
+---
-The purpose of this example is to provide a quick start for setting up PyKX as a `q` server that other
-`q` and PyKX sessions can connect to.
+# Use PyKX as a `#!python q` Server
-To follow along with this example please feel free to download this zip archive that contains a copy of the python script and this writeup.
+_This example provides a quick start for setting up PyKX as a `#!python q` server that other `#!python q` and PyKX sessions can connect to._
+
+To follow along, feel free to download this zip archive that contains a copy of the python script and this writeup.
## Quick start
-To run this example simply run the `server.py` script and it will launch a `PyKX` server on port 5000 or
-you can run `server_async.py` to run an asyncronous version of the server.
-The server will print out any queries it receives as well as the result of executing the query before replying.
+To run this example, run the `#!python server.py` script to launch a `#!python PyKX` server on port 5000. Alternatively, run `#!python server_async.py` to run an asynchronous version of the server.
+
+The server prints out any queries it receives as well as the result of executing the query before replying.
```bash
python server.py
// or
python server_async.py
```
+## Extra configuration options
-## Extra Configuration Options
+### User validation
-### User Validation
+You can add a function to validate users when they try to connect to the server. You can do so by overriding the `#!python .z.pw` function. By default all connection attempts will be accepted.
-It is possible to add a function to validate users when they try to connect to the server. This can
-be done by overriding the `.z.pw` function. By default all connection attempts will be accepted.
+The function receives 2 arguments when a user connects:
-The function will be passed 2 arguments when a user connects, the first will be the username, and the
-second will be the password (if no password is provided `None`/`::` will be passed in place of a password).
+ - username
+ - password (if no password is provided `#!python None`/`#!python ::` will be passed in place of a password).
-Note: The function needs to be overridden using `EmbeddedQ` not on the q connection.
+!!! note "Important! You need to override the function using `#!python EmbeddedQ` not on the q connection."
-Here is an example of overriding it using a python function as a validation function.
+Here is an example of overriding it using a Python function as a validation function:
```python
def validate(user, password):
@@ -40,21 +47,21 @@ def validate(user, password):
kx.q.z.pw = validate
```
-Here is an example of overriding it using a q function as a validation function.
+Here is an example of overriding it using a q function as a validation function:
```q
kx.q.z.pw = kx.q('{[user; password] $[password=`password; 1b; 0b]}')
```
-### Message Handler
+### Message handler
-The message handler can be overridden to apply custom logic to incoming queries. By default it just returns
-the result of calling `kx.q.value()` on the incoming query. This function will be passed a `CharVector`
+You can override the message handler to apply custom logic to incoming queries. By default, it returns
+the result of calling `#!python kx.q.value()` on the incoming query. This function will be passed a `#!python CharVector`
containing the incoming query.
-Note: The function needs to be overridden using `EmbeddedQ` not on the q connection.
+!!! note "Important! You need to override the function using `#!python EmbeddedQ` not on the q connection."
-Here is an example of overriding it using a python function as a message handler.
+Here is an example of overriding it using a Python function as a message handler:
```python
def qval(query):
@@ -65,18 +72,19 @@ def qval(query):
kx.q.z.pg = qval
```
-Here is an example of overriding it using a q function as a message handler.
+Here is an example of overriding it using a q function as a message handler:
```q
kx.q.z.pg = kx.q('{[x] show x; show y: value x; y}')
```
-For async messages `kx.q.z.ps` can be managed in the same fashion.
+For async messages, manage `#!python kx.q.z.ps` in the same fashion.
-### Connection Garbage Collection Frequency
+### Connection garbage collection frequency
-One of the keyword arguments you can use when creating a server is `conn_gc_time` this argument takes
+One of the keyword arguments to use when creating a server is `#!python conn_gc_time`. This argument takes
a float as input and the value denotes how often the server will attempt to clear old closed connections.
-By default the value is 0.0 and this will cause the list of connections to be cleaned on every call
-to `poll_recv`, with lots of incoming connections this can cause performance to deteriorate. If you
-set the `conn_gc_time` to `10.0` then this clean-up will happen at most every 10 seconds.
+
+By default the value is `#!python 0.0` and this will cause the list of connections to be cleaned on every call
+to `#!python poll_recv`. With lots of incoming connections, this can deteriorate the performance. If you
+set the `#!python conn_gc_time` to `#!python 10.0` then this clean-up happens every 10 seconds.
diff --git a/docs/examples/streaming/Evolving System.ipynb b/docs/examples/streaming/Evolving System.ipynb
new file mode 100644
index 0000000..1770554
--- /dev/null
+++ b/docs/examples/streaming/Evolving System.ipynb
@@ -0,0 +1,659 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "9ef8ef6d",
+ "metadata": {},
+ "source": [
+ "## Example Summary\n",
+ "\n",
+ "The following example shows an end-to-end real-time data ingest and persistence framework orchestrated from Python which once running will allow:\n",
+ "\n",
+ "1. Ingestion of trade and quote data in real-time from a data feed\n",
+ "2. Derivation of analytic insights into this data\n",
+ "3. Persistence of the raw and derived data at end-of-day\n",
+ "4. Querying across multiple real-time and historical process via a password protected gateway\n",
+ "5. Subscription to raw datasets for users requiring access to the data in real-time\n",
+ " \n",
+ "The full infrastructure we will build will look as follows:\n",
+ "\n",
+ "\n",
+ "\n",
+ "For a full breakdown on streaming within PyKX see our documentation [here](https://code.kx.com/pykx/user-guide/advanced/streaming/index.html) to start your journey."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8345fa7e",
+ "metadata": {},
+ "source": [
+ "#### Initialise PyKX"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a2171df2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pykx as kx\n",
+ "import subprocess"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "92e87fee",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "os.environ['QHOME'] = '/usr/local/anaconda3/envs/qenv/q'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "dd189d64",
+ "metadata": {},
+ "source": [
+ "#### Create a Historical Database\n",
+ "\n",
+ "To test queries across multiple processes and database types (in-memory vs on-disk) you can generate a Historical Database in the below cell by calling the Python script `generate_hdb.py` available within your zip file."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b1c75f13",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with kx.PyKXReimport():\n",
+ " db = subprocess.Popen(\n",
+ " ['python', 'generate_hdb.py',\n",
+ " '--datapoints', '100000',\n",
+ " '--days', '5',\n",
+ " '--name', 'db'],\n",
+ " stdin=subprocess.PIPE,\n",
+ " stdout=None,\n",
+ " stderr=None,\n",
+ " )\n",
+ "\n",
+ "rc = db.wait()\n",
+ "if rc !=0:\n",
+ " db.stdin.close()\n",
+ " db.kill()\n",
+ " raise Exception('Generating HDB failed')\n",
+ "else:\n",
+ " db.stdin.close()\n",
+ " db.kill()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3b1475ac",
+ "metadata": {},
+ "source": [
+ "#### Define Required Schemas\n",
+ "\n",
+ "The data published to the real-time system comes in the form of a `trade` and `quote` table with derived analytics stored in an `aggregate` table.\n",
+ "\n",
+ "This functionality makes use of the `kx.schema.builder` functionality, see [here](https://code.kx.com/pykx/api/schema.md#builder) for full API definition."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "454f21b8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "trade = kx.schema.builder({\n",
+ " 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,\n",
+ " 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,\n",
+ " 'px': kx.FloatAtom})\n",
+ "\n",
+ "quote = kx.schema.builder({\n",
+ " 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,\n",
+ " 'exchange': kx.SymbolAtom, 'bid': kx.FloatAtom,\n",
+ " 'ask': kx.FloatAtom , 'bidsz': kx.LongAtom,\n",
+ " 'asksz': kx.LongAtom})\n",
+ "\n",
+ "aggregate = kx.schema.builder({\n",
+ " 'time': kx.TimespanAtom, 'sym': kx.SymbolAtom,\n",
+ " 'trdvol': kx.FloatAtom , 'maxpx': kx.FloatAtom,\n",
+ " 'minpx': kx.FloatAtom , 'maxbpx': kx.FloatAtom,\n",
+ " 'minapx': kx.FloatAtom , 'baspread': kx.FloatAtom})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6d0994d1",
+ "metadata": {},
+ "source": [
+ "#### Core Ingest framework\n",
+ "\n",
+ "The central pillar of the ingestion framework is composed of three connected processes a Tickerplant, Real-Time Database(RDB) and Historical Database(HDB). For this example each of these processes is configured using a single function call to the class [`kx.tick.BASIC`](https://code.kx.com/pykx/api/tick.html#BASIC). A full breakdown of these processes and how they interact can be found [here](https://code.kx.com/pykx/user-guide/advanced/streaming/basic.html). "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "46d81811",
+ "metadata": {},
+ "source": [
+ "\n",
+ "\n",
+ "The library functions called in this cell are as follows:\n",
+ "\n",
+ "- [kx.tick.BASIC](https://code.kx.com/pykx/api/tick.html#pykx.tick.BASIC)\n",
+ "- [simple.start](https://code.kx.com/pykx/api/tick.html#pykx.tick.BASIC.start)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3e96731e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "simple = kx.tick.BASIC(\n",
+ " tables = {'trade': trade, 'quote': quote, 'aggregate': aggregate},\n",
+ " ports={'tickerplant': 5010, 'rdb': 5012, 'hdb': 5011},\n",
+ " log_directory = 'log',\n",
+ " database = 'db'\n",
+ ")\n",
+ "simple.start()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c839f02d",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "581ea672",
+ "metadata": {},
+ "source": [
+ "### Add Data Feed and Python Subscriber\n",
+ "\n",
+ "The following section adds a data-feed which publishes data to the trade and quote tables and a subscriber which validates that the data is available to subscribers.\n",
+ "\n",
+ "\n",
+ "\n",
+ "Firstly let's generate a data feed which publishes trade and quote messages to the Tickerplant on port 5010."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d5b1ee89",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with kx.PyKXReimport():\n",
+ " feed = subprocess.Popen(\n",
+ " ['python', 'feed.py'],\n",
+ " stdin=subprocess.PIPE,\n",
+ " stdout=None,\n",
+ " stderr=None,\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d810ed9c",
+ "metadata": {},
+ "source": [
+ "Now that data is being published to our system you can generate a subscribing process to get access to the latest trade information printing the number of datapoints which have been processed."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5f4fce8f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with kx.PyKXReimport():\n",
+ " subscriber = subprocess.Popen(\n",
+ " ['python', 'subscriber.py'],\n",
+ " stdin=subprocess.PIPE,\n",
+ " stdout=None,\n",
+ " stderr=None,\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3341000b",
+ "metadata": {},
+ "source": [
+ "The above cell provides real-time information about the number of messages that have been processed. Printing to standard out in later cells will introduce noise that may be unwanted, you can stop printing your subscriber by running the following cell"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cca35b9b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "subscriber.stdin.close()\n",
+ "subscriber.kill()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cc6a7953",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b9ed4a97",
+ "metadata": {},
+ "source": [
+ "### Add Chained Tickerplant and Real-Time Event Processor\n",
+ "\n",
+ "As highlighted [here](https://code.kx.com/pykx/user-guide/advanced/streaming/rta.html) the application of real-time analytics on your data can result in issues with slow subscribers which can be potentially lead to data corruption and loss.\n",
+ "\n",
+ "A common usage pattern to avoid this is the addition of a [`chained tickerplant`](https://code.kx.com/pykx/user-guide/advanced/streaming/rta.html#protecting-data-ingest). In the below cells we will build a chained tickerplant which subscribes to the primary data ingestion pipeline and to which a real-time analytic process subscribes creating analytics which join information from the trade and quote table to derive analytic insights.\n",
+ "\n",
+ "\n",
+ "\n",
+ "The library functions called in this cell are as follows:\n",
+ "\n",
+ "- [kx.tick.TICK](https://code.kx.com/pykx/api/tick.html#pykx.tick.TICK)\n",
+ "- [chained_tp.start](https://code.kx.com/pykx/api/tick.html#pykx.tick.TICK.start)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6c2a1503",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "chained_tp = kx.tick.TICK(port=5013, chained=True)\n",
+ "chained_tp.start({'tickerplant': 'localhost:5010'})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "40bb7526",
+ "metadata": {},
+ "source": [
+ "Now that your chained tickerplant is started we can initialize a real-time processor which subscribes to `trade` and `quote` data\n",
+ "\n",
+ "The library functions called in this cell are as follows:\n",
+ "\n",
+ "- [kx.tick.RTP](https://code.kx.com/pykx/api/tick.html#pykx.tick.RTP)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4b73aa78",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "rte = kx.tick.RTP(port=5014, subscriptions = ['trade', 'quote'], vanilla=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "75588b4d",
+ "metadata": {},
+ "source": [
+ "In our real-time processor we are looking to achieve three things:\n",
+ "\n",
+ "1. Filter out any messages from tables other than `trade`/`quote` if received\n",
+ "2. Apply a post-processing function which derives aggregate information about all `trade`/`quote` data seen in the current day to keep up-to-date information about the market.\n",
+ "3. Publish the aggregate data back to the primary tickerplant on port 5010 to ensure that the information is persisted. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "43bdeea2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def pre_processor(table, message):\n",
+ " if table in ['trade', 'quote']:\n",
+ " return message\n",
+ " return None\n",
+ "\n",
+ "# Define a Python post-processing function which publishes back to\n",
+ "# the tickerplant\n",
+ "def post_processor(table, message):\n",
+ " tradeagg = kx.q.qsql.select('trade',\n",
+ " columns={'trdvol': 'sum px*sz',\n",
+ " 'maxpx': 'max px',\n",
+ " 'minpx': 'min px'},\n",
+ " by='sym')\n",
+ " quoteagg = kx.q.qsql.select('quote',\n",
+ " columns={'maxbpx': 'max bid',\n",
+ " 'minapx': 'min ask',\n",
+ " 'baspread': 'max[bid]-min[ask]'},\n",
+ " by='sym')\n",
+ " tab = tradeagg.merge(quoteagg, how='left', q_join=True).reset_index()\n",
+ " tab['time'] = kx.TimespanAtom('now')\n",
+ " aggregate = kx.q.xcols(['time', 'sym'], tab)\n",
+ " kx.q['aggregate'] = aggregate\n",
+ " with kx.SyncQConnection(port=5010, wait=False, no_ctx=True) as q:\n",
+ " q('.u.upd', 'aggregate', aggregate._values)\n",
+ " return None"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bd611d83",
+ "metadata": {},
+ "source": [
+ "Now that the functions to be used are defined can do the following:\n",
+ "\n",
+ "1. Specify that the process requires the Python libraries `pykx` to be available as `kx`\n",
+ "2. Register the pre and post processing functions\n",
+ "\n",
+ "The library functions called in this cell are as follows:\n",
+ "\n",
+ "- [rte.libraries](https://code.kx.com/pykx/api/tick.html#pykx.tick.RTP.start)\n",
+ "- [rte.pre_processor](https://code.kx.com/pykx/api/tick.html#pykx.tick.RTP.pre_processor)\n",
+ "- [rte.post_processor](https://code.kx.com/pykx/api/tick.html#pykx.tick.RTP.post_processor)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6b1f5d07",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "rte.libraries({'kx': 'pykx'})\n",
+ "rte.pre_processor(pre_processor)\n",
+ "rte.post_processor(post_processor)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8d19367e",
+ "metadata": {},
+ "source": [
+ "Finally we can start the real-time processor listening for messages from the chained-tickerplant on port 5013\n",
+ "\n",
+ "The library functions called in this cell are as follows:\n",
+ "\n",
+ "- [rte.start](https://code.kx.com/pykx/api/tick.html#pykx.tick.RTP.start)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8991f86b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "rte.start({'tickerplant': 'localhost:5013'})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "416146aa",
+ "metadata": {},
+ "source": [
+ "While the above steps allow this processing to be possible it can all be configured in two steps when setting up your real-time processor. This is outlined [here](https://code.kx.com/pykx/user-guide/advanced/streaming/rta.html#running-all-setup-at-once)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9b7856bb",
+ "metadata": {},
+ "source": [
+ "#### Add a Query API across real-time and historical data\n",
+ "\n",
+ "The following section adds query APIs to the existing real-time processor and historical database processes which in each case queries the trade table to calculate the the number of trades for a specific symbol. How this is done varies slightly for each process type:\n",
+ "\n",
+ "- Real-Time Processor: Query the in-memory table using SQL\n",
+ "- Historical Database: Query using QSQL the on-disk database limiting the search to N-Days in the past"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ceb4b05f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def RTE_query(sym):\n",
+ " return kx.q.sql('select count(sym) from trade where sym=$1', sym)\n",
+ "\n",
+ "def HDB_query(sym, n):\n",
+ " today = kx.DateAtom('today')\n",
+ " return kx.q.qsql.select('trade', {'sym':'count sym'}, where = [f'date in({today-n};{today})', f'sym like \"{sym}\"'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4c69919f",
+ "metadata": {},
+ "source": [
+ "The library functions called in the following cells are:\n",
+ "\n",
+ "- [rte.register_api](https://code.kx.com/pykx/api/tick.html#pykx.tick.STREAMING.register_api)\n",
+ "- [simple.hdb.libraries](https://code.kx.com/pykx/api/tick.html#pykx.tick.STREAMING.libraries)\n",
+ "- [simple.hdb.register_api](https://code.kx.com/pykx/api/tick.html#pykx.tick.STREAMING.register_api)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "82071757",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "rte.register_api('custom_rte', RTE_query)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "998906fe",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "simple.hdb.libraries({'kx': 'pykx'})\n",
+ "simple.hdb.register_api('custom_hdb', HDB_query)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b2875f05",
+ "metadata": {},
+ "source": [
+ "### Add a gateway to allow querying across the real-time and historical datasets\n",
+ "\n",
+ "As a final step we will add a gateway process to which all users querying the system will connect and will allow data to be queried from both the real-time and historical datasets\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "52664d1a",
+ "metadata": {},
+ "source": [
+ "The following cell defines a function which can call named functions on the `custom_rte` and `custom_hdb` defined on ports named at initialization of the gateway below."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "de4e6d52",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def gateway_function(sym, n_days=0):\n",
+ " rte_data = gateway.call_port('rte', 'custom_rte', sym)\n",
+ " if n_days>0:\n",
+ " hdb_data = gateway.call_port('hdb', 'custom_hdb', sym, n_days)\n",
+ " else:\n",
+ " hdb_data = kx.Table(data={'sym': [0]})\n",
+ " return rte_data + hdb_data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "658af4c7",
+ "metadata": {},
+ "source": [
+ "Additionally we can define a function which specifies the username/password information required by users querying the gateway."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9b299983",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def user_validation(username, password):\n",
+ " if username == 'test_user':\n",
+ " return True\n",
+ " return False"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0c85449b",
+ "metadata": {},
+ "source": [
+ "The library functions called in the following cells are:\n",
+ "\n",
+ "- [kx.tick.GATEWAY](https://code.kx.com/pykx/api/tick.html#pykx.tick.GATEWAY)\n",
+ "- [gateway.start](https://code.kx.com/pykx/api/tick.html#pykx.tick.GATEWAY.start)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "59f76ae4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gateway = kx.tick.GATEWAY(\n",
+ " port=5015,\n",
+ " libraries = {'kx': 'pykx'},\n",
+ " apis = {'gateway_function': gateway_function},\n",
+ " connections={'hdb': 'localhost:5011', 'rte': 'localhost:5014'},\n",
+ " connection_validator = user_validation\n",
+ ")\n",
+ "gateway.start()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e291f3b0",
+ "metadata": {},
+ "source": [
+ "We can now emulate a user querying the gateway as follows"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "797dc756",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with kx.SyncQConnection(port=5015, no_ctx=True, username='test_user') as q:\n",
+ " data = q('gateway_function', 'AAPL', 0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3bba8cce",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "06667df5",
+ "metadata": {},
+ "source": [
+ "### Infrastructure shutdown\n",
+ "\n",
+ "To finish this notebook we can finally stop each of the specified processes and the data feed established."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "69c4c1e7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "feed.stdin.close()\n",
+ "feed.kill()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "daa51d16",
+ "metadata": {},
+ "source": [
+ "The library functions called in the following cells are:\n",
+ "\n",
+ "- [rte.stop](https://code.kx.com/pykx/api/tick.html#pykx.tick.STREAMING.stop)\n",
+ "- [chained_tp.stop](https://code.kx.com/pykx/api/tick.html#pykx.tick.STREAMING.stop)\n",
+ "- [gateway.stop](https://code.kx.com/pykx/api/tick.html#pykx.tick.STREAMING.stop)\n",
+ "- [simple.stop](https://code.kx.com/pykx/api/tick.html#pykx.tick.BASIC.stop)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5725a693",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "rte.stop()\n",
+ "chained_tp.stop()\n",
+ "simple.stop()\n",
+ "gateway.stop()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/examples/streaming/feed.py b/docs/examples/streaming/feed.py
new file mode 100644
index 0000000..cdb8af0
--- /dev/null
+++ b/docs/examples/streaming/feed.py
@@ -0,0 +1,68 @@
+import pykx as kx
+
+import argparse
+import time
+
+parser=argparse.ArgumentParser()
+
+parser.add_argument(
+ "--datapoints",
+ help="Number of datapoints published per message",
+ default=1,
+ type=int)
+
+parser.add_argument(
+ "--timer",
+ help="timer delay between published messages in seconds",
+ default=1,
+ type=float)
+
+variables = vars(parser.parse_args())
+datapoints = variables['datapoints']
+timer = variables['timer']
+
+print('Starting Feedhandler ...')
+print(f'Publishing {datapoints} datpoint(s) every {timer} second(s)')
+
+init = False
+
+
+def main():
+ global init
+ symlist = ['AAPL', 'JPM', 'GOOG', 'BRK', 'WPO', 'IBM']
+ exlist = ['NYSE', 'LON', 'CHI', 'HK']
+ while True:
+ sz = 10*kx.random.random(datapoints, 100)
+ px = 20+kx.random.random(datapoints, 100.0)
+ ask = kx.random.random(datapoints, 100.0)
+ asksz = 10*kx.random.random(datapoints, 100)
+ bd = ask - kx.random.random(datapoints, ask)
+ bdsz = asksz - kx.random.random(datapoints, asksz)
+ trade = [kx.random.random(datapoints, symlist),
+ kx.random.random(datapoints, exlist),
+ sz,
+ px]
+ quote = [kx.random.random(datapoints, symlist),
+ kx.random.random(datapoints, exlist),
+ bd,
+ ask,
+ bdsz,
+ asksz]
+ # Setting of not init for wait is intended to raise initial error
+ # if the first message is unsuccessful
+ with kx.SyncQConnection(port=5010, wait=not init, no_ctx=True) as q:
+ q('.u.upd', 'trade', trade)
+ if 0 == kx.random.random(1, 3)[0]:
+ q('.u.upd', 'quote', quote)
+ if not init:
+ print('First message(s) sent, data-feed publishing ...')
+ init=True
+ if time != 0:
+ time.sleep(timer)
+
+
+if __name__ == '__main__':
+ try:
+ main()
+ except KeyboardInterrupt:
+ print('Data feed stopped')
diff --git a/docs/examples/streaming/feed.q b/docs/examples/streaming/feed.q
new file mode 100644
index 0000000..c84e9a6
--- /dev/null
+++ b/docs/examples/streaming/feed.q
@@ -0,0 +1,29 @@
+config:.Q.def[`port`points!5030 1] .Q.opt .z.x
+h:neg hopen config`port
+points:config`points
+
+// Define a .u.upd function just in case it needs to call itself if the above handle open fails
+.u.upd: {[x;y]};
+
+// Makes the IPC handle call to ticker plant or its ownself
+/ calls the .u.upd function on the tickerplant to publish the Trade/Quotes
+/ A protected evaluation is used to ensure that when the ticker goes down, there will no longer be an error message
+.z.ts: {
+ symlist:`AAPL`JPM`GOOG`BRK`WPO`IBM;
+ exlist:`NYSE`LON`CHI`HK;
+ sz:10*points?til 100;px:20+points?100f;
+ ask:points?100f;asksz:10*points?til 100;
+ bd:ask-points?ask;bdsz:asksz-points?asksz;
+ trade_vals:(points?symlist;points?exlist;sz;px);
+ quote_vals:(points?symlist;points?exlist;bd;ask;bdsz;asksz);
+ upd_vals[`trade;trade_vals]; // update a trade every run
+ if[0=(1?3)0;upd_vals[`quote;quote_vals]]; // if statement just for show to reduce number of quotes
+ }
+
+upd_vals:{h(".u.upd";x;y)}
+
+// Set feedhandler to publish messages at a 1 second interval is timer is not set at startup
+if[not system"t";
+ -1"\nTimer was not set, messages are now being set to send at 1 second intervals\n";
+ system"t 1000"
+ ];
diff --git a/docs/examples/streaming/generate_hdb.py b/docs/examples/streaming/generate_hdb.py
new file mode 100644
index 0000000..d8bba9a
--- /dev/null
+++ b/docs/examples/streaming/generate_hdb.py
@@ -0,0 +1,94 @@
+import argparse
+import os
+os.environ['PYKX_BETA_FEATURES'] = "true"
+
+import pykx as kx
+
+parser=argparse.ArgumentParser()
+
+parser.add_argument(
+ "--datapoints",
+ help="Number of datapoints to be used when populating "
+ "each day of the database. Default = 10000",
+ default=10000,
+ type=int)
+
+parser.add_argument(
+ "--date",
+ help="The day prior to today's date which will be used "
+ "as the first date in the Database. Default = 1 -> Yesterday",
+ default=1,
+ type=int)
+
+parser.add_argument(
+ "--days",
+ help="The number of days prior to '--date' which will be generated. Default = 1",
+ default=1,
+ type=int)
+
+parser.add_argument(
+ "--name",
+ help="The name to be given to the database. Default = 'db'",
+ default='db',
+ type=str)
+
+# Define Schemas
+trade = kx.schema.builder({
+ 'time': kx.TimespanAtom, 'sym': kx.SymbolAtom,
+ 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ 'px': kx.FloatAtom})
+
+quote = kx.schema.builder({
+ 'time': kx.TimespanAtom, 'sym': kx.SymbolAtom,
+ 'exchange': kx.SymbolAtom, 'bid': kx.FloatAtom,
+ 'ask': kx.FloatAtom, 'bidsz': kx.LongAtom,
+ 'asksz': kx.LongAtom})
+
+variables = vars(parser.parse_args())
+datapoints = variables['datapoints']
+date = variables['date']
+days = variables['days']
+db_name = variables['name']
+
+symlist = ['AAPL', 'JPM', 'GOOG', 'BRK', 'WPO', 'IBM']
+exlist = ['NYSE', 'LON', 'CHI', 'HK']
+
+num_trades = datapoints
+num_quotes = round(datapoints/3)
+
+if days <= 0:
+ raise ValueError('--days supplied as a value <=0')
+if date < 0:
+ raise ValueError('--date supplied as a value < 0')
+
+db = kx.DB(path=db_name)
+
+while days>0:
+ # Generate random trade data
+ trade_data = [
+ kx.q.asc(kx.random.random(num_trades, kx.q('1D00:00:00.000'))),
+ kx.random.random(num_trades, symlist),
+ kx.random.random(num_trades, exlist),
+ 10*kx.random.random(num_trades, 100),
+ 20+kx.random.random(num_trades, 100.0)]
+
+ # Generate random quote data
+ ask = kx.random.random(num_quotes, 100.0)
+ asksz = 10*kx.random.random(num_quotes, 100)
+ bd = ask - kx.random.random(num_quotes, ask)
+ bdsz = asksz - kx.random.random(num_quotes, asksz)
+ quote_data = [
+ kx.q.asc(kx.random.random(num_quotes, kx.q('1D'))),
+ kx.random.random(num_quotes, symlist),
+ kx.random.random(num_quotes, exlist),
+ bd,
+ ask,
+ bdsz,
+ asksz]
+
+ # Generate trade and quote database partitions
+ db.create(trade.insert(trade_data, inplace=False), 'trade', kx.DateAtom('today') - days)
+ db.create(quote.insert(quote_data, inplace=False), 'quote', kx.DateAtom('today') - days)
+
+ # Decrement the number of days that need to be supplied
+ days -= 1
diff --git a/docs/examples/streaming/images/evolving-system/analytic-addition.png b/docs/examples/streaming/images/evolving-system/analytic-addition.png
new file mode 100644
index 0000000..a0b5abf
Binary files /dev/null and b/docs/examples/streaming/images/evolving-system/analytic-addition.png differ
diff --git a/docs/examples/streaming/images/evolving-system/chained-no-analytics.png b/docs/examples/streaming/images/evolving-system/chained-no-analytics.png
new file mode 100644
index 0000000..c756f3e
Binary files /dev/null and b/docs/examples/streaming/images/evolving-system/chained-no-analytics.png differ
diff --git a/docs/examples/streaming/images/evolving-system/dash.png b/docs/examples/streaming/images/evolving-system/dash.png
new file mode 100644
index 0000000..4c52602
Binary files /dev/null and b/docs/examples/streaming/images/evolving-system/dash.png differ
diff --git a/docs/examples/streaming/images/evolving-system/feed-sub.png b/docs/examples/streaming/images/evolving-system/feed-sub.png
new file mode 100644
index 0000000..d318978
Binary files /dev/null and b/docs/examples/streaming/images/evolving-system/feed-sub.png differ
diff --git a/docs/examples/streaming/images/evolving-system/full-infra.png b/docs/examples/streaming/images/evolving-system/full-infra.png
new file mode 100644
index 0000000..2b240b0
Binary files /dev/null and b/docs/examples/streaming/images/evolving-system/full-infra.png differ
diff --git a/docs/examples/streaming/images/evolving-system/gateway.png b/docs/examples/streaming/images/evolving-system/gateway.png
new file mode 100644
index 0000000..8f1995c
Binary files /dev/null and b/docs/examples/streaming/images/evolving-system/gateway.png differ
diff --git a/docs/examples/streaming/images/evolving-system/simple-no-feed.png b/docs/examples/streaming/images/evolving-system/simple-no-feed.png
new file mode 100644
index 0000000..1ff9f7c
Binary files /dev/null and b/docs/examples/streaming/images/evolving-system/simple-no-feed.png differ
diff --git a/docs/examples/streaming/index.md b/docs/examples/streaming/index.md
new file mode 100644
index 0000000..83331c4
--- /dev/null
+++ b/docs/examples/streaming/index.md
@@ -0,0 +1,38 @@
+---
+title: Example: Real-Time Streaming
+description: The development of a basic streaming workflow using PyKX
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, streaming, basic
+---
+
+# Example: Real-Time Streaming using PyKX
+
+_This page outlines the steps taken and functionality shown in demonstrating your first PyKX streaming application_
+
+To run this example please download the [zip](./real-time-pykx.zip) file containing the notebook or visit our github repository [here](https://github.com/pykx/docs/examples/streaming) to view the code directly.
+
+In this example we will generate a real-time and historical analysis system which completes the following actions:
+
+1. Allows ingestion of high-volume trade and quote financial data
+2. Persists this data at end of day to a historical database.
+3. Develop a real-time analytic which combines data from two independent real-time tables
+4. Develop a number of query analytics on the historical database and real-time database which provide the count of the number of trades/quotes for a specified ticker symbol.
+5. Generate a username/password protected gateway process which a user can query to combine the results from the real-time and historical data view into one value.
+
+Each of the analytics provided in steps 3, 4 and 5 are Python analytics operating on data in kdb+/PyKX format.
+
+## Want more information?
+
+The documentation surrounding real-time streaming with PyKX is extensively outlined [here](../../user-guide/advanced/streaming/index.md). For information on specific parts of the infrastructures that can be generated you might find the following links useful:
+
+| Title | Description |
+|--------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| [Start basic ingest](../../user-guide/advanced/streaming/basic.md) | Learn how to start the core components of a basic streaming ingest framework. |
+| [Publish data](../../user-guide/advanced/streaming/publish.md) | Learn how to publish data to your real-time capture system using Python, q and C. |
+| [Subscribe to data](../../user-guide/advanced/streaming/subscribe.md) | How do you subscribe to new updates being received in your system? |
+| [Real-Time Analytics](../../user-guide/advanced/streaming/rta.md) | Generate insights into your real-time data and account for common problems. |
+| [Custom query APIs](../../user-guide/advanced/streaming/custom_apis.md) | Learn how to querying historical and real-time data using custom Python APIs. |
+| [Query access gateways](../../user-guide/advanced/streaming/gateways.md) | Learn how to create a query API which traverses multiple processes and can limit user access to only information they need to know. |
+
+If you want to read through the API documentation for this functionality it is contained in it's entirety [here](../../api/tick.md).
\ No newline at end of file
diff --git a/docs/examples/streaming/real-time-pykx.zip b/docs/examples/streaming/real-time-pykx.zip
new file mode 100644
index 0000000..449eeb4
Binary files /dev/null and b/docs/examples/streaming/real-time-pykx.zip differ
diff --git a/docs/examples/streaming/subscriber.py b/docs/examples/streaming/subscriber.py
new file mode 100644
index 0000000..eb98a3f
--- /dev/null
+++ b/docs/examples/streaming/subscriber.py
@@ -0,0 +1,44 @@
+import pykx as kx
+
+import sys
+import asyncio
+
+trade = kx.schema.builder({
+ 'time': kx.TimespanAtom, 'sym': kx.SymbolAtom,
+ 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ 'px': kx.FloatAtom})
+
+quote = kx.schema.builder({
+ 'time': kx.TimespanAtom, 'sym': kx.SymbolAtom,
+ 'exchange': kx.SymbolAtom, 'bid': kx.FloatAtom,
+ 'ask': kx.FloatAtom, 'bidsz': kx.LongAtom,
+ 'asksz': kx.LongAtom})
+
+
+async def main_loop(q, trade, quote):
+ while True:
+ await asyncio.sleep(0.005)
+ result = q.poll_recv()
+ if result is None:
+ continue
+ table = result[1]
+ if table == 'trade':
+ trade.upsert(result[2], inplace=True)
+ elif table == 'quote':
+ quote.upsert(result[2], inplace=True)
+ sys.stdout.write(f"Trade count: {len(trade)}\r")
+ sys.stdout.flush()
+
+
+async def main():
+ global quote
+ global trade
+ async with kx.RawQConnection(port=5010) as q:
+ await q('.u.sub', 'trade', '')
+ await q('.u.sub', 'quote', '')
+
+ await main_loop(q, trade, quote)
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/examples/subscriber/readme.md b/docs/examples/subscriber/readme.md
index b7f31dd..292ed81 100644
--- a/docs/examples/subscriber/readme.md
+++ b/docs/examples/subscriber/readme.md
@@ -1,91 +1,201 @@
-# PyKX Subscribing to a `q` Process
-
-The purpose of this example is to provide a quickstart for setting up a python process using `PyKX` to subscribe to a running q process.
-
-To follow along with this example please feel free to download this zip archive that contains a copy of the python script and this writeup.
-
-## Quickstart
-
-This example creates a python subscriber to a q process, that appends data received to the end of a table.
-
-Here we have:
-
-1. A q process running on port 5001
-2. A Python process subscribing to the q process
-
-### Start the required q processes
-
-```q
-// run q
-$ q -p 5001
-q)
-```
-
-### Start the pykx subscriber
-
-```bash
-// run the subscriber which will automatically connect
-$ python subscriber.py
-// you can also run the asnychronous example with
-$ python subscriber_async.py
-```
-
-### Outcome
-
-What should be observed on invocation of the above is that the q process should have the variable `py_server` set to the handle of the python process once the python process connects. Once this variable is set you can send rows of the table to the python process and they will be appended as they are received.
-
-```q
-// run q
-$ q -p 5001
-q)
-```
-
-q process is started.
-
-```bash
-// run the subscriber which will automatically connect
-$ python subscriber.py
-===== Initial Table =====
-a b
---
-4 8
-9 1
-2 9
-7 5
-0 4
-1 6
-9 6
-2 1
-1 8
-8 5
-===== Initial Table =====
-
-```
-
-Python process is started with a table, and it connects to the q server and sets the `py_server` variable.
-
-```q
-q)py_server[1 2]
-
-```
+title: PyKX installation guide
+description: Subscriber Examples
+date: October 2024
+author: KX Systems, Inc.,
+tags: subscriber, synchronous, asynchronous, PyKX
+---
+# PyKX Subscribing to a `q` Process
-Send a new table row (1, 2) to the python process from q.
+_This example demonstrates using `PyKX` to setup a python process as a subscriber to data messages published from a q process._
+
+## Pre-requisites
+
+A kdb+ license is required to complete this example. [Sign-up for a license](https://code.kx.com/q/learn/licensing/).
-```python
-Recieved new table row from q: 1 2
-a b
----
-4 8
-9 1
-2 9
-7 5
-0 4
-1 6
-9 6
-2 1
-1 8
-8 5
-1 2
-```
-
-The new row has been appended to the table.
+The following python libraries are required to run this example:
+
+1. pykx
+1. asyncio
+
+The source code for this example is available in the examples directory here:
+
+1. [Synchronous subscriber](https://github.com/KxSystems/pykx/blob/main/examples/subscriber/subscriber.py)
+1. [Asynchronous subscriber](https://github.com/KxSystems/pykx/blob/main/examples/subscriber/subscriber_async.py)
+
+## Summary of steps
+
+Both example scripts for setting up a subscriber follow the same steps:
+
+1. Start a q process running with some open port (5001 is used for the example, but you may choose any open port).
+1. Run the python subscriber by executing the script from the github repository.
+
+### Run the subscriber example
+
+1. Begin by running a q process with an open port:
+
+ ```q
+ // run q
+ $ q -p 5001
+ q)
+ ```
+1. In a separate terminal start a python process running the subscriber script:
+
+ ```bash
+ // run the subscriber, which connects automatically
+ $ python subscriber.py
+ ```
+ The python process opens an IPC connection to the q process and sets a new global variable on the q process as part of the main function:
+
+ ```q
+ async def main():
+ global table
+ async with kx.RawQConnection(port=5001) as q:
+ print('===== Initial Table =====')
+ print(table)
+ print('===== Initial Table =====')
+ await q('py_server:neg .z.w')
+ await main_loop(q)
+ ```
+ The q process now has the variable `py_server` set to the handle of the python process once the python process connects.
+
+1. Once this variable is set, you can send rows of the table to the python process and they are appended as they are received.
+
+ ```bash
+ // run the subscriber, which automatically connects
+ $ python subscriber.py
+ ===== Initial Table =====
+ a b
+ ---
+ 4 8
+ 9 1
+ 2 9
+ 7 5
+ 0 4
+ 1 6
+ 9 6
+ 2 1
+ 1 8
+ 8 5
+ ===== Initial Table =====
+
+ ```
+
+1. As the Python process is initiated, it connects to the q server and sets the `py_server` variable and creates the initial table.
+
+ ```q
+ q)py_server[1 2]
+
+ ```
+
+1. Send a new table row (1, 2) to the python process from q.
+
+ ```python
+ Received new table row from q: 1 2
+ a b
+ ---
+ 4 8
+ 9 1
+ 2 9
+ 7 5
+ 0 4
+ 1 6
+ 9 6
+ 2 1
+ 1 8
+ 8 5
+ 1 2
+ ```
+
+ The new row has been appended to the table.
+
+### Run the asynchronous subscriber example
+
+1. Begin by running a q process with an open port:
+
+ ```q
+ // run q
+ $ q -p 5001
+ q)
+ ```
+1. In a separate terminal start a python process running the asynchronous subscriber script:
+
+ ```bash
+ // run the asynchronous subscriber which automatically connects
+ $ python subscriber_async.py
+ ```
+ The python process opens an IPC connection to the q process and sets a new global variable on the q process as part of the main function:
+
+ ```q
+ async def main():
+ global table
+ async with kx.RawQConnection(port=5001) as q:
+ print('===== Initial Table =====')
+ print(table)
+ print('===== Initial Table =====')
+ await q('py_server:neg .z.w')
+ await main_loop(q)
+ ```
+ The q process now has the variable `py_server` set to the handle of the python process once the python process connects.
+
+1. Once this variable is set, you can send rows of the table to the python process and they are appended as they are received.
+
+ ```bash
+ // run the subscriber, which automatically connects
+ $ python subscriber_async.py
+ ===== Initial Table =====
+ a b
+ ---
+ 4 8
+ 9 1
+ 2 9
+ 7 5
+ 0 4
+ 1 6
+ 9 6
+ 2 1
+ 1 8
+ 8 5
+ ===== Initial Table =====
+
+ ```
+
+1. As the Python process is initiated, it connects to the q server and sets the `py_server` variable and creates the initial table.
+
+ ```q
+ q)py_server[1 2]
+
+ ```
+
+1. Send a new table row (1, 2) to the python process from q.
+
+ ```python
+ Received new table row from q: 1 2
+ a b
+ ---
+ 4 8
+ 9 1
+ 2 9
+ 7 5
+ 0 4
+ 1 6
+ 9 6
+ 2 1
+ 1 8
+ 8 5
+ 1 2
+ ```
+
+ The new row has been appended to the table.
+
+
+## Summary
+
+This example has demonstrated how to initiate a q process, subscribe to an existing table, and append rows to it either synchronously or asynchronously.
+
+## Next steps
+
+Check out more examples such as:
+
+- [Real-Time Streaming]
+- [Compression and Encryption]
diff --git a/docs/examples/subscriber/subscriber.py b/docs/examples/subscriber/subscriber.py
index e187d34..99d6164 100644
--- a/docs/examples/subscriber/subscriber.py
+++ b/docs/examples/subscriber/subscriber.py
@@ -18,7 +18,7 @@ async def main_loop(q):
await asyncio.sleep(0.5) # allows other async tasks to run along side
result = q.poll_recv() # this will return None if no message is available to be read
if assert_result(result):
- print(f'Recieved new table row from q: {result}')
+ print(f'Received new table row from q: {result}')
table = kx.q.upsert(table, result)
print(table)
result = None
diff --git a/docs/examples/subscriber/subscriber_async.py b/docs/examples/subscriber/subscriber_async.py
index 30b628d..616264d 100644
--- a/docs/examples/subscriber/subscriber_async.py
+++ b/docs/examples/subscriber/subscriber_async.py
@@ -16,7 +16,7 @@ async def main_loop(q):
while True:
result = await q.poll_recv_async()
if assert_result(result):
- print(f'Recieved new table row from q: {result}')
+ print(f'Received new table row from q: {result}')
table = kx.q.upsert(table, result)
print(table)
result = None
diff --git a/docs/examples/threaded_execution/threading.md b/docs/examples/threaded_execution/threading.md
index 14f051c..24e75af 100644
--- a/docs/examples/threaded_execution/threading.md
+++ b/docs/examples/threaded_execution/threading.md
@@ -1,22 +1,25 @@
+---
+title: Multithreaded Execution Example
+description: Example of PyKX Calling into q from multiple threads
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, threading, python, asyncio, multithreaded
+---
+
# PyKX Calling into q from multiple threads
-The purpose of this example is to provide a quickstart for setting up a python process using `PyKX`
-to call into `EmbeddedQ` from multiple threads.
+_This example provides a quick start for setting up a Python process using `#!python PyKX` to call into `#!python EmbeddedQ` from multiple threads._
-To follow along with this example please feel free to download this
-zip archive that contains a copy of the python scripts and this
-writeup.
+To follow along, feel free to download this zip archive that contains a copy of the python scripts and this writeup.
## Quickstart
-This example creates a python process that creates multiple tasks/threads that subscribe to a `q`
-process over IPC and upon receiving a new row upsert it to a local table. There are 2 scripts
-included: `asyncio_threading.py` and `threads.py`, the first uses asyncio tasks running on
-seperate threads and the second example uses the python `threading` library directly to spawn
-threads.
+This example creates a Python process that spawns multiple tasks or threads to subscribe to a `#!python q` process over IPC. Upon receiving a new row, it upserts the row to a local table. There are two scripts included:
+- `#!python asyncio_threading.py`, which uses asyncio tasks running on separate threads.
+- `#!python threads.py`, which uses the Python threading library to spawn threads directly.
-### Running the example
+### Run the example
```bash
$ python asyncio_threading.py
@@ -26,8 +29,7 @@ $ python threads.py
### Outcome
-The inital table will be printed upon starting the program, once all the threads/tasks have
-upserted all of the rows they have received to the table the final table will be printed.
+This command prints the initial table at startup. Once all the threads or tasks have upserted their received rows to the table, it prints the final table:
```
$ python asyncio_threading.py
@@ -70,12 +72,12 @@ a b
..
```
-### Important Note on usage
+### Important note on usage
-Since using `PYKX_THREADING` creates a background thread to run the calls into `q`, the
+Since using `#!python PYKX_THREADING` creates a background thread to run the calls into `#!python q`, the
background thread must be shutdown when finished. The easiest way to ensure this is done is by using
-a `try` - `finally` block around the entrypoint to your script. This will ensure that even in the
-event of an error the background thread will still be shutdown correctly so python can exit.
+a `#!python try` - `#!python finally` block around the entrypoint to your script. This ensures that even in the
+event of an error, the background thread shuts down correctly so Python can exit.
```
import os
diff --git a/docs/extras/glossary.md b/docs/extras/glossary.md
new file mode 100644
index 0000000..ff5a28c
--- /dev/null
+++ b/docs/extras/glossary.md
@@ -0,0 +1,89 @@
+---
+title: PyKX Glossary
+description: Common terms explained for PyKX
+date: June 2024
+author: KX Systems, Inc.,
+tags: glossary
+---
+
+# Glossary
+
+_This page contains descriptions of commonly used terms in PyKX._
+
+## Attributes
+PyKX attributes are characteristics and features of PyKX objects that define their behavior and interaction within the Python environment.
+
+## Grouped attribute
+The `#!python grouped` attribute ensures that all items in the `#!python Vector`/`#!python Table` column are stored in a different format to help reduce memory usage.
+
+## HDB
+An HDB is a mount for historical data within a database. It’s the ultimate repository for interval data. Learn how [HDB](https://code.kx.com/insights/1.11/enterprise/database/configuration/assembly/database.html) works in database configuration.
+
+## IDB
+An IDB serves as a mount to store interval data in a database. It collects data from a real-time database (RDB), retains it for a specified duration, such as 10 minutes, and then transfers the data to a historical database (HDB). Learn how [IDB](https://code.kx.com/insights/1.9/enterprise/database/configuration/assembly/database.html) works in database configuration.
+
+## IPC
+Interprocess Communication (IPC) forms a central mechanism by which you can connect to and query existing kdb+/q infrastructures. Read more about [communicating via IPC](../user-guide/advanced/ipc.md).
+
+## kdb+
+kdb+ is a powerful, ultra-fast column-based relational time series database (TSDB) with in-memory (IMDB) capabilities. Operating with temporal data types as a first class entity, the use of q and its query language qSQL against this database creates a highly performant time-series analysis tool. Learn more about [kdb+](https://code.kx.com/q/).
+
+## Mount
+In databases, mounting refers to making a set of databases available online. A mounted database is ready for use. A database may have three types of mounts: real-time (RDB), interval (IDB), and/or historic (HDB). Learn more about [mounts](https://code.kx.com/insights/1.9/enterprise/database/configuration/assembly/database.html#mounts) in database configuration.
+
+## Multithreading
+Multithreading means running multiple threads concurrently within a single process. It allows better resource utilization and improved responsiveness. When a program has multiple threads, the operating system switches between them rapidly, giving the illusion of parallel execution. Learn about [multithreading in PyKX](../user-guide/advanced/threading.md) and run an [example](../examples/threaded_execution/threading.md).
+
+## Objects
+PyKX objects are Python representations of kdb+ data structures that allow developers to interact with kdb+ databases, perform complex queries, and manipulate data efficiently.
+
+## Object storage
+Object storage is a data storage architecture designed to handle large amounts of unstructured data. A data storage system that manages data as objects is distinct from file hierarchy or block-based storage architectures. Object storage is ideal for unstructured data because it overcomes the scaling limitations of traditional file storage systems. The capacity for limitless scaling is why object storage is the backbone of cloud storage; major players like Amazon, Google, and Microsoft utilize object storage as their primary data storage solution. Learn [how to interact with PyKX objects](../user-guide/fundamentals/creating.md) or [how to index PyKX objects](../user-guide/fundamentals/indexing.md).
+
+## Parallelization
+Parallelization involves distributing computational tasks across multiple threads to improve performance and efficiency.
+
+## Parted attribute
+The `#!python parted` attribute is similar to the `#!python grouped` attribute with the additional requirement that each unique value must be adjacent to its other copies, where the grouped attribute allows them to be dispersed throughout the `#!python Vector`/`#!python Table`.
+
+## Partitioned database
+A partitioned database is a database that is divided into smaller, more manageable units, improving scalability while maintaining security. These partitions can be created for various reasons, such as manageability, performance optimization, availability, or load balancing. Learn more about [creating and maintaining partitioned kdb+ databases](https://code.kx.com/q/kb/partition/). Go to [Q for Mortals](https://code.kx.com/q4m3/14_Introduction_to_Kdb+/#143-partitioned-tables) for in-depth information about partitioned databases in kdb+.
+
+## Partitioning
+When writing a data table to a database, it must be partitioned to ensure compatibility with a kdb+ time series database. In PyKX, partitioning is managed through a `#!python Timestamp` column defined in the schema, and each table is required to have a `#!python Timestamp` column. Learn more about [partitioning tables across directories](https://code.kx.com/q/kb/partition/).
+
+## Persisted database
+A persisted database (or on-disk database) stores data on non-volatile storage like a hard drive or SSD, ensuring the data remains intact even after the application is closed or the system is restarted. In contrast, in-memory databases store data in RAM and lose all data when the system is powered down. Persisted databases are crucial for applications needing long-term data storage and reliability, such as financial systems, customer databases, and many web applications.
+
+## Python byte object
+A Python byte object is an immutable sequence of bytes, used to handle binary data. Each byte is an integer between 0 and 255. Byte objects are essential for tasks like file I/O and network communication.
+
+## q
+q is a versatile vector programming language mainly used to query a kdb+ database. q is known both for its speed and expressiveness. [Learn more about q](https://code.kx.com/q/learn/) including [q terminology](https://code.kx.com/q/basics/glossary/).
+
+## q/SQL
+q/SQL is a collection of SQL-like functions for interacting with a kdb+ database. Learn more about [q/SQL](https://code.kx.com/q4m3/9_Queries_q-sql/).
+
+## RDB
+Real-time event data is stored on an RDB mount of the database, before being moved to the interval database (IDB). Learn how [RDB](https://code.kx.com/insights/1.9/enterprise/database/configuration/assembly/database.html) works in database configuration.
+
+## Schema
+A database schema is a fundamental concept in database management. It describes how data is structured within a relational database. It serves as a blueprint for the database’s architecture, outlining the relationships between different entities, such as tables, columns, data types, views, stored procedures, primary keys, and foreign keys. Learn more about [API schema generation in PyKX](../api/schema.md) or [schema configuration in kdb Insights Enterprise](https://code.kx.com/insights/1.9/enterprise/database/configuration/assembly/schema.html).
+
+## Sorted attribute
+The `#!python sorted` attribute ensures that all items in the `#!python Vector`/`#!python Table` column are sorted in ascending order.
+
+## Time-series analysis
+Time-series analysis is a specific way of analyzing a sequence of data points collected over an interval of time. Unlike sporadic or random data collection, time-series analysis involves recording data points at consistent intervals within a set period. Use cases include finance, Internet of Things (IoT), and other domains where data evolves over time. PyKX time-series analysis uses q’s query language (qSQL) against kdb+. Learn more about [time-series models](https://code.kx.com/insights/1.9/api/machine-learning/q/analytics/api/variadic/timeseries.html).
+
+## Thread
+A thread is an independent sequence of instructions within a program that can be executed independently of other code. Threads share the same memory space as the process they belong to, allowing them to communicate and share data efficiently. In Python, the threading module provides an intuitive API for working with threads. Learn about [PyKX calling into q from multiple threads](../examples/threaded_execution/threading.md).
+
+## Unique attribute
+The `#!python unique` attribute ensures that all items in the `#!python Vector`/`#!python Table` column are unique (there are no duplicated values).
+
+## Upsert
+In the context of databases, upsert is an operation that combines both updating and inserting data into a table. When you perform an upsert, the database checks whether a record with a specific key already exists in the table. If a record with that key exists, the database updates the existing record with new values. If no record with that key exists, the database inserts a new record with the provided data. Learn more about [upsert](../api/pykx-execution/q.md#upsert).
+
+## Vector
+A vector is a mathematical concept used to represent quantities that have both magnitude (size) and direction. In other words, vectors are arrays of numerical values that represent points in multidimensional space. A vector is typically represented as an arrow in space, pointing from one point to another. Learn more about [using in-built methods on PyKX vectors](../examples/interface-overview.ipynb#using-in-built-methods-on-pykx-vectors) and [adding values to PyKX vectors/lists](../user-guide/fundamentals/indexing.md#assigning-and-adding-values-to-vectorslists).
diff --git a/docs/getting-started/installing.md b/docs/getting-started/installing.md
index 021927f..adb021b 100644
--- a/docs/getting-started/installing.md
+++ b/docs/getting-started/installing.md
@@ -7,14 +7,14 @@ tags: PyKX, setup, install,
---
# PyKX installation guide
-_This section explains how to install PyKX on your machine._
+_This page explains how to install PyKX on your machine._
## Pre-requisites
Before you start, make sure you have:
-- **Python** (versions 3.8-3.12)
-- **pip**
+- [**Python**](https://www.python.org/downloads/) (versions 3.8-3.12)
+- [**pip**](https://pypi.org/project/pip/)
Recommended: a virtual environment with packages such as [venv](https://docs.python.org/3/library/venv.html) from the standard library.
@@ -26,81 +26,71 @@ KX only supports versions of PyKX built by KX (installed from wheel files) for:
- **macOS** (`macosx_10_10_x86_64`, `macosx_10_10_arm`) with CPython 3.8-3.12
- **Windows** (`win_amd64`) with CPython 3.8-3.12
-??? Note "Special instructions for Windows users."
+We provide assistance to user-built installations of PyKX only on a best-effort basis.
- To run q or PyKX on Windows, you have two options:
+## 1. Install PyKX
- - **Install** `#!bash msvcr100.dll`, included in the [Microsoft Visual C++ 2010 Redistributable](https://www.microsoft.com/en-ca/download/details.aspx?id=26999).
+You can install PyKX from three sources:
- - **Or Execute** `#!bash w64_install.ps1` supplied at the root of the PyKX GitHub [here](https://github.com/KxSystems/pykx) as follows, using PowerShell:
+=== "Install PyKX from PyPI"
+
+ Ensure you have a recent version of `#!bash pip`:
+
+ ```sh
+ pip install --upgrade pip
- ```PowerShell
- git clone https://github.com/kxsystems/pykx
- cd pykx
- .\w64_install.ps1
```
-We provide assistance to user-built installations of PyKX only on a best-effort basis.
+ Then install the latest version of PyKX with the following command:
-## 1. Install PyKX
+ ```sh
+ pip install pykx
-You can install PyKX from three sources:
+ ```
-!!! Note ""
+=== "Install PyKX from Anaconda"
- === "Install PyKX from PyPI"
+ For Linux x86 and arm-based architectures, you can install PyKX from the `#!bash kx` channel on Anaconda as follows:
- Ensure you have a recent version of `#!bash pip`:
+ ```sh
+ conda install -c kx pykx
- ```
- pip install --upgrade pip
- ```
- Then install the latest version of PyKX with the following command:
+ ```
+ Type `#!bash y` when prompted to accept the installation.
- ```
- pip install pykx
- ```
- === "Install PyKX from Anaconda"
-
- For Linux x86 and arm-based architectures, you can install PyKX from the `#!bash kx` channel on Anaconda as follows:
+=== "Install PyKX from GitHub"
- ```
- conda install -c kx pykx
- ```
- Type `#!bash y` when prompted to accept the installation.
+ Clone the PyKX repository:
+ ```sh
+ git clone https://github.com/kxsystems/pykx
- === "Install PyKX from GitHub"
-
- Clone the PyKX repository:
+ ```
- ```
- git clone https://github.com/kxsystems/pykx
- ```
+ Enter the cloned repository and install PyKX using `#!bash pip`:
- Enter the cloned repository and install PyKX using `#!bash pip`:
+ ```sh
+ cd pykx
+ pip install .
- ```
- cd pykx
- pip install .
- ```
+ ```
-At this point you have [partial access to PyKX](../user-guide/advanced/modes.md#operating-in-the-absence-of-a-kx-license). To gain access to all PyKX features, follow the steps in the next section, otherwise go straight to [3. Verify PyKX Installation](#3-verify-pykx-installation).
+At this point you have [partial access to PyKX](../user-guide/advanced/modes.md#operating-in-the-absence-of-a-kx-license). To gain access to all PyKX features, follow the steps in the next section, otherwise go straight to [3. Verify PyKX Installation](#3-verify-pykx-installation).
-## 2. Install a KDB Insights license
+## 2. Install a kdb Insights license
-To use all PyKX functionalities, you need to download and install a KDB Insights license.
+To use all PyKX functionalities, you need to download and install a kdb Insights license.
!!! Warning "Legacy kdb+/q licenses do not support all PyKX features."
-There are two types of KDB Insights licenses for PyKX: personal and commercial. For either of them, you have two installation options:
+There are two types of kdb Insights licenses for PyKX: personal and commercial. For either of them, you have two installation options:
- a) from Python
- b) using environment variables
### 2.a Install license in Python
-Follow the steps below to install a KDB Insights license for PyKX from Python:
+Follow the steps below to install a kdb Insights license for PyKX from Python:
1. Start your Python session:
@@ -108,7 +98,7 @@ Follow the steps below to install a KDB Insights license for PyKX from Python:
$ python
```
-2. Import the PyKX library. When prompted to accept the installation, type `Y` or press `Enter`:
+2. Import the PyKX library. When prompted to accept the installation, type `#!python Y` or press `#!python Enter`:
```python
>>> import pykx as kx
@@ -116,10 +106,16 @@ Follow the steps below to install a KDB Insights license for PyKX from Python:
Thank you for installing PyKX!
We have been unable to locate your license for PyKX. Running PyKX in unlicensed mode has reduced functionality.
- Would you like to continue with license installation? [Y/n]:
+ Would you like to install a license? [Y/n]:
```
-3. Choose whether you wish to install a personal or commercial license, type `Y` or press `Enter` to choose a personal license
+3. Indicate whether you have access to an existing PyKX enabled license or not, type `#!python N` or press `#!python Enter` to continue with accessing a new license:
+
+ ```python
+ Do you have access to an existing license for PyKX that you would like to use? [N/y]:
+ ```
+
+4. Choose whether you wish to install a personal or commercial license, type `#!python Y` or press `#!python Enter` to choose a personal license
```python
Is the intended use of this software for:
@@ -128,7 +124,7 @@ Follow the steps below to install a KDB Insights license for PyKX from Python:
Enter your choice here [1/2]:
```
-4. When asked if you would like to apply for a license, type `Y` or press `Enter`:
+5. When asked if you would like to apply for a license, type `#!python Y` or press `#!python Enter`:
=== "Personal license"
@@ -142,30 +138,30 @@ Follow the steps below to install a KDB Insights license for PyKX from Python:
```bash
To apply for your PyKX license, contact your KX sales representative or sales@kx.com.
- Alternately apply through https://kx.com/book-demo.
+ Alternately apply through https://kx.com/book-demo.
Would you like to open this page? [Y/n]:
```
-5. For personal use, complete the form to receive your welcome email. For commercial use, the license will be provided over email after the commercial evaluation process has been followed with the support of your sales representative.
+6. For personal use, complete the form to receive your welcome email. For commercial use, the license will be provided over email after the commercial evaluation process has been followed with the support of your sales representative.
-6. Choose the desired method to activate your license by typing `1`, `2`, or `3` as appropriate:
+7. Choose the desired method to activate your license by typing `#!python 1`, `#!python 2`, or `#!python 3` as appropriate:
```bash
Select the method you wish to use to activate your license:
[1] Download the license file provided in your welcome email and input the file path (Default)
[2] Input the activation key (base64 encoded string) provided in your welcome email
[3] Proceed with unlicensed mode
- Enter your choice here [1/2/3]:
+ Enter your choice here [1/2/3]:
```
-7. Depending on your choice (`1`, `2`, or `3`), complete the installation by following the final step as below:
+8. Depending on your choice (`#!python 1`, `#!python 2`, or `#!python 3`), complete the installation by following the final step as below:
=== "1"
=== "Personal license"
```bash
- Provide the download location of your license (for example, ~/path/to/kc.lic):
+ Provide the download location of your license (for example, ~/path/to/kc.lic):
```
=== "Commercial license"
@@ -177,7 +173,7 @@ Follow the steps below to install a KDB Insights license for PyKX from Python:
=== "2"
```bash
- Provide your activation key (base64 encoded string) provided with your welcome email:
+ Provide your activation key (base64 encoded string) provided with your welcome email:
```
=== "3"
@@ -185,7 +181,7 @@ Follow the steps below to install a KDB Insights license for PyKX from Python:
No further actions needed.
```
-8. Validate the correct installation of your license:
+9. Validate the correct installation of your license:
```python
>>> kx.q.til(10)
@@ -227,16 +223,18 @@ $ python
pykx.LongVector(pykx.q('0 1 2 3 4'))
```
-As you approach the expiry date for your license you can have PyKX automatically update your license by updating the environment variable `KDB_LICENSE_B64` or `KDB_K4LICENSE_B64` with your new license information. Once PyKX is initialised with your expired license it will attempt to overwrite your license with the newly supplied value. This is outlined as follows:
+!!! Tip "Tip: automatic license renewal setup"
-```python
-$python
->>> import pykx as kx
-Initialisation failed with error: exp
-Your license has been updated using the following information:
- Environment variable: 'KDB_K4LICENSE_B64'
- License write location: /user/path/to/license/k4.lic
-```
+ When your license nears its expiry date, you can set PyKX to automatically renew it. To do this, modify the environment variable `#!bash KDB_LICENSE_B64` or `#!bash KDB_K4LICENSE_B64` with your new license information. When PyKX initializes with the expired license, it will attempt to overwrite it with the new value:
+
+ ```shell
+ $python
+ >>> import pykx as kx
+ Initialisation failed with error: exp
+ Your license has been updated using the following information:
+ Environment variable: 'KDB_K4LICENSE_B64'
+ License write location: /user/path/to/license/k4.lic
+ ```
## 3. Verify PyKX installation
@@ -256,22 +254,24 @@ This command should display the installed version of PyKX.
PyKX depends on the following third-party Python packages:
- - `numpy~=1.20, <2.0; python_version=='3.7'`
- - `numpy~=1.22, <2.0; python_version<'3.11', python_version>'3.7'`
+ - `numpy~=1.22, <2.0; python_version<'3.11'`
- `numpy~=1.23, <2.0; python_version=='3.11'`
- `numpy~=1.26, <2.0; python_version=='3.12'`
- - `pandas>=1.2, < 2.2.0`
+ - `pandas>=1.2, < 2.0; python_version=='3.8'`
+ - `pandas>=1.2, < 2.2.0; python_version>'3.8'`
- `pytz>=2022.1`
- `toml~=0.10.2`
+ - `dill>=0.2.0`
**Note**: All are installed automatically by `#!bash pip` when you install PyKX.
Here's a breakdown of how PyKX uses these libraries:
- [NumPy](https://pypi.org/project/numpy): converts data from PyKX objects to NumPy equivalent Array/Recarray style objects; direct calls to NumPy functions such as `numpy.max` with PyKX objects relies on the NumPy Python API.
- - [Pandas](https://pypi.org/project/pandas): converts PyKX data to Pandas Series/DataFrame equivalent objects or to PyArrow data formats. Pandas is used as an intermendiary data format.
+ - [Pandas](https://pypi.org/project/pandas): converts PyKX data to Pandas Series/DataFrame equivalent objects or to PyArrow data formats. Pandas is used as an intermediary data format.
- [pytz](https://pypi.org/project/pytz/): converts data with timezone information to PyKX objects to ensure that the offsets are accurately applied.
- [toml](https://pypi.org/project/toml/): for configuration parsing and management, with `.pykx-config` as outlined [here](../user-guide/configuration.md).
+ - [dill](https://pypi.org/project/dill): use in the serialization and deserialization of Python objects when interfacing between kdb+ and Python processes using [remote functions](../user-guide/advanced/remote-functions.md) or [real-time capture](../user-guide/advanced/streaming/index.md) functionality.
=== "Optional"
@@ -281,14 +281,17 @@ This command should display the installed version of PyKX.
- **`pyarrow >=3.0.0`**: install `pyarrow` extra, for example `pip install pykx[pyarrow]`.
- **`find-libpython ~=0.2`**: install `debug` extra, for example `pip install pykx[debug]`.
- **`ast2json ~=0.3`**: install with `dashboards` extra, for example `pip install pykx[dashboards]`
- - **`dill >=0.2`**: install via pip, with`beta` extra, for example `pip install pykx[beta]`
+ - **`dill >=0.2`**: install via pip, with `remote` extra, for example `pip install pykx[remote]`
+ - **`beautifulsoup4 >=4.10.0`**: install with `help` extra, for example `pip install pykx[help]`
+ - **`markdown2 >=2.5.0`**: install with `help` extra, for example `pip install pykx[help]`
+ - **`psutil >=5.0.0`**: install via pip, with `streaming` extra, for example `pip install pykx[streaming]`
- Here's a breakdown of how PyKX uses these libraries:
+ Here's a breakdown of how PyKX uses these libraries:
- - [PyArrow](https://pypi.org/project/pyarrow): converts PyKX objects to and from their PyArrow equivalent table/array objects.
+ - [PyArrow](https://pypi.org/project/pyarrow): converts PyKX objects to and from their PyArrow equivalent table/array objects.
- [find-libpython](https://pypi.org/project/find-libpython): provides the `libpython.{so|dll|dylib}` file required by [PyKX under q](../pykx-under-q/intro.md).
- [ast2json](https://pypi.org/project/ast2json/): required for KX Dashboards Direct integration.
- - [dill](https://pypi.org/project/dill/): required for the Beta feature `Remote Functions`.
+ - [psutil](https://pypi.org/project/psutil/): facilitates the stopping and killing of a q process on a specified port allowing for orphaned q processes to be stopped, functionality defined [here](../api/util.md#pykxutilkill_q_process).
**Optional non-Python dependencies:**
@@ -299,14 +302,87 @@ This command should display the installed version of PyKX.
If you encounter any issues during the installation process, refer to the following sources for assistance:
- - Visit our [troubleshooting](../troubleshooting.md) guide.
+ - Visit our [troubleshooting](../help/troubleshooting.md) guide.
- Ask a question on the KX community at [learninghub.kx.com](https://learninghub.kx.com/forums/forum/pykx/).
- Use Stack Overflow and tag [`pykx`](https://stackoverflow.com/questions/tagged/pykx) or [`kdb`](https://stackoverflow.com/questions/tagged/kdb) depending on the subject.
- - Go to [support](../support.md).
+ - Go to [support](../help/support.md).
+
+## Optional: Installing a q executable
+
+The following section is optional and primarily required if you are looking to make use of the [Real-Time Capture](../user-guide/advanced/streaming/index.md) functionality provided by PyKX.
+
+### Do I need a q executable?
+
+For the majority of functionality provided by PyKX you do not explicitly need access to a q executable. Users within a Python process who do not have a q executable will be able to complete tasks such as the following:
+
+- Convert data to/from Python types
+- Run analytics on in-memory and on-disk databases
+- Create databases
+- Query remote q/kdb+ processes via IPC
+- Execute numpy functions with PyKX data
+
+If however you need to make use of the [Real-Time Capture](../user-guide/advanced/streaming/index.md) functionality you will need access to a q executable. Fundamentally the capture and persistence of real-time data and the application of analytics on this streaming data is supported via deployment of code on q processes.
+
+### Configuring PyKX to use an existing executable
+
+By default when attempting to start a q process for use within the Real-Time Capture workflows PyKX will attempt to call `q` directly, this method however is not fully reliable when using the Python `subprocess` module. As such the following setup can be completed to point more explicitly at your executable.
+
+If you already have a q executable PyKX can use this when initializing the Real-Time Capture APIs through the setting of the following in you [configuration file](../user-guide/configuration.md#configuration-file) or as [environment variables](../user-guide/configuration.md#environment-variables)
+
+| Variable | Explanation |
+| :------------------ | :--------------------------------------------------------------------------------------------------------------- |
+| `PYKX_Q_EXECUTABLE` | Specifies the location of the q executable which should be called. Typically this will be `QHOME/[lmw]64/q[.exe]`|
+| `QHOME` | The directory to which q was installed |
+
+### Installing an executable
+
+#### Installing using PyKX
+
+For users who do not have access to a q executable, PyKX provides a utility function `kx.util.install_q` to allow users access q locally.
+
+The following default information is used when installing the q executable:
+
+| Parameter | Default | Explanation |
+| :--------------- | :------------------ | :--------------------------------------------------------------------------------------------------------------------- |
+| location | `'~/q'` or `'C:\q'` | The location to which q will be installed if not otherwise specified. |
+| date | `'2024.07.08'` | The dated version of kdb+ 4.0 which is to be installed. |
+
+The following provide a number of examples of using the installation functionality under various conditions.
+
+- Installing to default location
+
+ ```python
+ >>> kx.util.install_q()
+ ```
+
+- Installing to a specified location
+
+ ```python
+ >>> kx.util.install_q('~/custom')
+ ```
+
+Installation of q via this method will update the configuration file `.pykx-config` at either `~/q` or `C:\q` to include the location of `QHOME` and `PYKX_Q_EXECUTABLE` to be used.
+
+#### Installing without PyKX
+
+The installed q executable is not required to be installed via PyKX. If you wish to install q following the traditional approach you can follow the install instructions outlined [here](https://code.kx.com/q/learn/install/) or through signing up for a free-trial [here](https://kx.com/download-kdb/).
+
+### Verify PyKX can use the executable
+
+Verifying that PyKX has access to the executable can be done through execution of the function `#!python kx.util.start_q_subprocess` and requires either your configuration file or environment variables to include `PYKX_Q_EXECUTABLE`. This is outlined [here](#configuring-pykx-to-use-an-existing-executable).
+
+```python
+>>> import pykx as kx
+>>> server = kx.util.start_q_subprocess(5052)
+>>> conn = kx.SyncQConnection(port=5052) # Connect to subprocess
+>>> conn('1+1')
+pykx.LongAtom(pykx.q('2'))
+>>> server.kill()
+```
## Next steps
That's it! You can now start using PyKX in your Python projects:
- [Quickstart guide](quickstart.md)
-- [User guide introduction](../user-guide/index.md)
+- [Updating/Upgrading your license](../user-guide/advanced/license.md)
diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md
index 00aa53c..3fd2757 100644
--- a/docs/getting-started/quickstart.md
+++ b/docs/getting-started/quickstart.md
@@ -1,125 +1,130 @@
+---
+title: Quickstart for PyKX
+description: Quickstart guide for setting up PyKX
+date: June 2024
+author: KX Systems, Inc.,
+tags: PyKX, quickstart, import PyKX, use PyKX objects
+---
+
# Quickstart
-This quickstart guide provides first time users with instructions for installing this library and make use of the functionality it contains for the first time.
+_This quickstart guide provides first time users with essential instructions for using the PyKX library._
## Prerequisites
-To complete the quickstart guide below you will need to have completed the following:
+Before you start, make sure to:
-- [Install the PyKX library and a license](installing.md).
+- [Install the PyKX library](installing.md#1-install-pykx).
+- [Have a kdb Insights license](installing.md#2-install-a-kdb-insights-license).
-## How to import PyKX
+## 1. Import PyKX
-To access PyKX and it's functionality import it within your Python code using the following syntax
+To access PyKX, import it within your Python code using the following syntax:
```python
>>> import pykx as kx
```
+!!! Info "The use of the shortened name `#!python kx` is optional and provides a terse convention for interacting with methods and objects from the PyKX library."
-The use of the shortened name `kx` is intended to provide a terse convention for interacting with methods and objects from this library.
+## 2. Generate PyKX objects
-## How to generate PyKX objects
+You can generate PyKX objects in three ways. Click on the tabs below to follow the instructions:
-The generation of PyKX objects is supported principally in two ways
+=== "Use PyKX functions"
-1. Execution of q code to create these entities
-2. Conversion of Python objects to analogous PyKX objects
+ Generate PyKX objects using `#!python pykx` helper functions:
-### Creation of PyKX objects using inbuilt PyKX functions
+ ```python
+ >>> kx.random.random([3, 4], 10.0)
+ pykx.List(pykx.q('
+ 4.976492 4.087545 4.49731 0.1392076
+ 7.148779 1.946509 0.9059026 6.203014
+ 9.326316 2.747066 0.5752516 2.560658
+ '))
-Generation of PyKX objects using `pykx` helper functions
+ >>> kx.Table(data = {'x': kx.random.random(10, 10.0), 'x1': kx.random.random(10, ['a', 'b', 'c'])})
+ pykx.Table(pykx.q('
+ x x1
+ ------------
+ 0.8123546 a
+ 9.367503 a
+ 2.782122 c
+ 2.392341 a
+ 1.508133 b
+ '))
+ ```
-```python
->>> kx.random.random([3, 4], 10.0)
-pykx.List(pykx.q('
-4.976492 4.087545 4.49731 0.1392076
-7.148779 1.946509 0.9059026 6.203014
-9.326316 2.747066 0.5752516 2.560658
-'))
-
->>> kx.Table(data = {'x': kx.random.random(10, 10.0), 'x1': kx.random.random(10, ['a', 'b', 'c'])})
-pykx.Table(pykx.q('
-x x1
-------------
-0.8123546 a
-9.367503 a
-2.782122 c
-2.392341 a
-1.508133 b
-'))
-```
+=== "From Python data types"
-### Creation of PyKX objects from Python data types
+ Generate PyKX objects from Python, NumPy, Pandas and PyArrow objects by using the `#!python kx.toq` method:
-Generation of PyKX objects from Python, NumPy, Pandas and PyArrow objects can be completed as follows using the `kx.toq` method.
+ ```python
+ >>> pylist = [10, 20, 30]
+ >>> qlist = kx.toq(pylist)
+ >>> qlist
+ pykx.LongVector(pykx.q('10 20 30'))
-```python
->>> pylist = [10, 20, 30]
->>> qlist = kx.toq(pylist)
->>> qlist
-pykx.LongVector(pykx.q('10 20 30'))
-
->>> import numpy as np
->>> nplist = np.arange(0, 10, 2)
->>> qlist = kx.toq(nplist)
->>> qlist
-pykx.LongVector(pykx.q('0 2 4 6 8'))
-
->>> import pandas as pd
->>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
->>> df
- col1 col2
-0 1 3
-1 2 4
->>> qtable = kx.toq(df)
-pykx.Table(pykx.q('
-col1 col2
----------
-1 3
-2 4
-'))
-
->>> import pyarrow as pa
->>> patab = pa.Table.from_pandas(df)
->>> patab
-pyarrow.Table
-col1: int64
-col2: int64
->>> qtable = kx.toq(patab)
->>> qtable
-pykx.Table(pykx.q('
-col1 col2
----------
-1 3
-2 4
-'))
-```
+ >>> import numpy as np
+ >>> nplist = np.arange(0, 10, 2)
+ >>> qlist = kx.toq(nplist)
+ >>> qlist
+ pykx.LongVector(pykx.q('0 2 4 6 8'))
+
+ >>> import pandas as pd
+ >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
+ >>> df
+ col1 col2
+ 0 1 3
+ 1 2 4
+ >>> qtable = kx.toq(df)
+ pykx.Table(pykx.q('
+ col1 col2
+ ---------
+ 1 3
+ 2 4
+ '))
-### Creation of PyKX objects using q
+ >>> import pyarrow as pa
+ >>> patab = pa.Table.from_pandas(df)
+ >>> patab
+ pyarrow.Table
+ col1: int64
+ col2: int64
+ >>> qtable = kx.toq(patab)
+ >>> qtable
+ pykx.Table(pykx.q('
+ col1 col2
+ ---------
+ 1 3
+ 2 4
+ '))
+ ```
-Generation of PyKX objects using q can be completed through calling `kx.q`
+=== "Execute q code"
-```python
->>> kx.q('10 20 30')
-pykx.LongVector(pykx.q('10 20 30'))
-
->>> kx.q('([]5?1f;5?`4;5?0Ng)')
-pykx.Table(pykx.q('
-x x1 x2
----------------------------------------------------
-0.439081 ncej 8c6b8b64-6815-6084-0a3e-178401251b68
-0.5759051 jogn 5ae7962d-49f2-404d-5aec-f7c8abbae288
-0.5919004 ciha 5a580fb6-656b-5e69-d445-417ebfe71994
-0.8481567 hkpb ddb87915-b672-2c32-a6cf-296061671e9d
-0.389056 aeaj 580d8c87-e557-0db1-3a19-cb3a44d623b1
-'))
-```
+ Generate PyKX objects using q by calling `#!python kx.q`:
-## Interacting with PyKX Objects
+ ```python
+ >>> kx.q('10 20 30')
+ pykx.LongVector(pykx.q('10 20 30'))
-PyKX objects can be interacted with in a variety of ways, through indexing using Pythonic syntax, passing PyKX objects to q/NumPy functions, querying via SQL/qSQL syntax or through the use of q functionality via the context interface. Each of these is described in more depth throughout this documentation but examples of each are provided here.
+ >>> kx.q('([]5?1f;5?`4;5?0Ng)')
+ pykx.Table(pykx.q('
+ x x1 x2
+ ---------------------------------------------------
+ 0.439081 ncej 8c6b8b64-6815-6084-0a3e-178401251b68
+ 0.5759051 jogn 5ae7962d-49f2-404d-5aec-f7c8abbae288
+ 0.5919004 ciha 5a580fb6-656b-5e69-d445-417ebfe71994
+ 0.8481567 hkpb ddb87915-b672-2c32-a6cf-296061671e9d
+ 0.389056 aeaj 580d8c87-e557-0db1-3a19-cb3a44d623b1
+ '))
+ ```
-* Create a PyKX list and interact with the list using indexing and slices.
+## 3. Interact with PyKX objects
+
+You can interact with PyKX objects in a variety of ways, for example, through [indexing using Pythonic syntax](../user-guide/fundamentals/indexing.md), passing [PyKX objects to q/NumPy](../user-guide/fundamentals/creating.md#converting-pykx-objects-to-pythonic-types) functions, [querying via SQL/qSQL](https://code.kx.com/pykx/user-guide/fundamentals/querying.html) syntax or by [using the q functionality](https://code.kx.com/pykx/user-guide/advanced/context_interface.html) via the context interface. Each way is described in more depth under the the User guide > Fundamentals section. For now, we recommend a few examples:
+
+* Create a PyKX list and interact with it using indexing and slices:
```python
>>> qarray = kx.random.random(10, 1.0)
@@ -131,7 +136,7 @@ PyKX objects can be interacted with in a variety of ways, through indexing using
pykx.FloatVector(pykx.q('0.08123546 0.9367503 0.2782122'))
```
-* Assign objects to PyKX lists
+* Assign objects to PyKX lists:
```python
>>> qarray = kx.random.random(3, 10.0, seed=10)
@@ -141,7 +146,7 @@ PyKX objects can be interacted with in a variety of ways, through indexing using
pykx.FloatVector(pykx.q('0.891041 0.1 3.621949'))
```
-* Create a PyKX table and manipulate using Pythonic syntax
+* Create a PyKX table and manipulate using Pythonic syntax:
```python
>>> N = 100
@@ -156,26 +161,26 @@ PyKX objects can be interacted with in a variety of ways, through indexing using
pykx.Table(pykx.q('
x x1 x2
-----------------------
- 0.3550381 1.185644 c
- 0.3615143 2.835405 a
- 0.9089531 2.134588 b
- 0.2062569 3.852387 a
- 0.481821 0.07970141 a
- 0.2065625 1.786519 a
- 0.5229178 0.1273692 c
- 0.3338806 3.440445 c
- 0.414621 3.188777 c
- 0.9725813 0.1922818 b
- 0.5422726 4.486179 b
- 0.6116582 3.967756 a
- 0.3414991 1.018642 b
- 0.9516746 3.878809 c
- 0.1169475 0.3469163 c
- 0.8158957 2.050957 a
- 0.6091539 1.168774 a
- 0.9830794 3.562923 b
- 0.7543122 0.6961287 a
- 0.3813679 1.350938 b
+ 0.3550381 1.185644 c
+ 0.3615143 2.835405 a
+ 0.9089531 2.134588 b
+ 0.2062569 3.852387 a
+ 0.481821 0.07970141 a
+ 0.2065625 1.786519 a
+ 0.5229178 0.1273692 c
+ 0.3338806 3.440445 c
+ 0.414621 3.188777 c
+ 0.9725813 0.1922818 b
+ 0.5422726 4.486179 b
+ 0.6116582 3.967756 a
+ 0.3414991 1.018642 b
+ 0.9516746 3.878809 c
+ 0.1169475 0.3469163 c
+ 0.8158957 2.050957 a
+ 0.6091539 1.168774 a
+ 0.9830794 3.562923 b
+ 0.7543122 0.6961287 a
+ 0.3813679 1.350938 b
..
'))
>>> qtable[['x', 'x1']]
@@ -187,15 +192,15 @@ PyKX objects can be interacted with in a variety of ways, through indexing using
pykx.Table(pykx.q('
x x1 x2
-----------------------
- 0.3550381 1.185644 c
- 0.3615143 2.835405 a
- 0.9089531 2.134588 b
- 0.2062569 3.852387 a
- 0.481821 0.07970141 a
+ 0.3550381 1.185644 c
+ 0.3615143 2.835405 a
+ 0.9089531 2.134588 b
+ 0.2062569 3.852387 a
+ 0.481821 0.07970141 a
'))
```
-* Pass a PyKX object to q function
+* Pass a PyKX object to a q function:
```python
>>> qfunction = kx.q('{x+til 10}')
@@ -203,7 +208,7 @@ PyKX objects can be interacted with in a variety of ways, through indexing using
pykx.FloatVector(pykx.q('0.3992327 1.726329 2.488636 3.653597 4.028107 5.444905 6.542917 7.00628 8.152..'))
```
-* Apply a Python function on a PyKX Vector
+* Apply a Python function on a PyKX Vector:
```python
>>> qvec = kx.random.random(10, 10, seed=42)
@@ -213,7 +218,7 @@ PyKX objects can be interacted with in a variety of ways, through indexing using
pykx.LongVector(pykx.q('5 8 3 3 10 5 3 1 9 1'))
```
-* Pass a PyKX array objects to a Numpy functions
+* Pass PyKX arrays of objects to Numpy functions:
```python
>>> qarray1 = kx.random.random(10, 1.0)
@@ -231,7 +236,7 @@ PyKX objects can be interacted with in a variety of ways, through indexing using
pykx.FloatVector(pykx.q('0.8297059 1.609535 1.093438 1.341447 0.9670269 1.167818 0.9770923 0.3773123 1..'))
```
-* Query using SQL/qSQL
+* Query using SQL/qSQL:
```python
>>> N = 100
@@ -244,23 +249,23 @@ PyKX objects can be interacted with in a variety of ways, through indexing using
)
>>> qtable[0:5]
pykx.Table(pykx.q('
- x x1 x2
+ x x1 x2
----------------------
- a 0.8236115 0.7306473
- a 0.3865843 1.01605
- c 0.9931491 1.155324
- c 0.9362009 1.569154
+ a 0.8236115 0.7306473
+ a 0.3865843 1.01605
+ c 0.9931491 1.155324
+ c 0.9362009 1.569154
c 0.4849499 0.09870703
'))
>>> kx.q.sql("SELECT * FROM $1 WHERE x='a'", qtable)
pykx.Table(pykx.q('
- x x1 x2
+ x x1 x2
---------------------
a 0.8236115 0.7306473
- a 0.3865843 1.01605
- a 0.259265 2.805719
- a 0.6140826 1.730398
- a 0.6212161 3.97236
+ a 0.3865843 1.01605
+ a 0.259265 2.805719
+ a 0.6140826 1.730398
+ a 0.6212161 3.97236
..
'))
>>> kx.q.qsql.select(qtable, where = 'x=`a')
@@ -276,7 +281,7 @@ PyKX objects can be interacted with in a variety of ways, through indexing using
'))
```
-* Applying q keyword functions
+* Apply q keyword functions:
```python
>>> qvec = kx.q.til(10)
@@ -286,11 +291,38 @@ PyKX objects can be interacted with in a variety of ways, through indexing using
pykx.FloatVector(pykx.q('0 0.5 1 2 3 4 5 6 7 8'))
```
-## Converting PyKX objects to common Python types
+* Getting help on a q keyword functions (see [the installation docs](../getting-started/installing.md#dependencies)):
+
+ ```Python
+ >>> help(kx.q.max)
+ Help on UnaryPrimitive in pykx:
+
+ pykx.UnaryPrimitive = pykx.UnaryPrimitive(pykx.q('max'))
+ • max
+
+ Maximum.
+
+ >>> pykx.q.max([0, 7, 2, 4 , 1, 3])
+ pykx.LongAtom(q('7'))
+
+
+ >>> help(kx.q.abs)
+ Help on UnaryPrimitive in pykx:
+
+ pykx.UnaryPrimitive = pykx.UnaryPrimitive(pykx.q('abs'))
+ • abs
+
+ Where x is a numeric or temporal, returns the absolute value of x. Null is returned if x is null.
+
+ >>> pykx.q.abs(-5)
+ pykx.LongAtom(q('5'))
+ ```
+
+## 4. Convert PyKX objects to Python types
-Objects generated via the PyKX library can be converted where reasonable to `Python`, `Numpy`, `Pandas` and `PyArrow` types which are analogous to their underlying q representation. For example q tables are converted to Pandas Dataframes and PyArrow tables respectively. This is facilitated in each case through the use of the `py`, `np`, `pd` and `pa` methods.
+To convert the objects generated via the PyKX library to the corresponding `#!python Python`, `#!python Numpy`, `#!python Pandas`, and `#!python PyArrow` types, use `#!python py`, `#!python np`, `#!python pd`, and `#!python pa` methods. Click on the tabs below to go through the examples:
-* Convert PyKX objects to Python
+=== "Convert to Python"
```python
>>> qdictionary = kx.toq({'a': 5, 'b': range(10), 'c': np.random.uniform(low=0.0, high=1.0, size=(5,))})
@@ -308,7 +340,7 @@ Objects generated via the PyKX library can be converted where reasonable to `Pyt
[0, 2, 4, 1, 2, 1, 0, 1, 0, 1]
```
-* Convert PyKX objects to Numpy
+=== "Convert to Numpy"
```python
>>> import numpy as np
@@ -322,19 +354,19 @@ Objects generated via the PyKX library can be converted where reasonable to `Pyt
pykx.Table(pykx.q('
x x1
------------
- 0.8247812 4
- 0.2149847 0
- 0.1007832 2
- 0.4520411 4
- 0.0196153 0
+ 0.8247812 4
+ 0.2149847 0
+ 0.1007832 2
+ 0.4520411 4
+ 0.0196153 0
'))
>>> qtab.np()
rec.array([(0.82478116, 4), (0.21498466, 0), (0.10078323, 2),
- (0.45204113, 4), (0.0196153 , 0)],
- dtype=[('x', '>> qvec = kx.toq(np.random.randint(5, size=10))
@@ -353,7 +385,7 @@ Objects generated via the PyKX library can be converted where reasonable to `Pyt
>>> df = pd.DataFrame(data={'x': [random() for _ in range(5)], 'x1': [randint(0, 4) for _ in range(5)]})
>>> qtab = kx.toq(df)
>>> qtab.pd()
- x x1
+ x x1
0 0.824781 4
1 0.214985 0
2 0.100783 2
@@ -361,7 +393,7 @@ Objects generated via the PyKX library can be converted where reasonable to `Pyt
4 0.019615 0
```
- If using `pandas>=2.0` it is possible to also use the `as_arrow` keyword argument to convert to
+ If using `#!python pandas>=2.0` it is possible to also use the `#!python as_arrow` keyword argument to convert to
pandas types using pyarrow as the backend instead of the default numpy backed pandas objects.
```python
@@ -381,7 +413,7 @@ Objects generated via the PyKX library can be converted where reasonable to `Pyt
>>> df = pd.DataFrame(data={'x': [random() for _ in range(5)], 'x1': [randint(0, 4) for _ in range(5)]})
>>> qtab = kx.toq(df)
>>> qtab.pd(as_arrow=True)
- x x1
+ x x1
0 0.541059 3
1 0.886690 1
2 0.674300 4
@@ -393,23 +425,23 @@ Objects generated via the PyKX library can be converted where reasonable to `Pyt
dtype: object
```
-* Convert PyKX objects to PyArrow
+=== "Convert to PyArrow"
```python
>>> qvec = kx.random.random(10, 5)
>>> qvec.pa()
[
- 0,
- 2,
- 4,
- 1,
- 2,
- 1,
- 0,
- 1,
- 0,
- 1
+ 0,
+ 2,
+ 4,
+ 1,
+ 2,
+ 1,
+ 0,
+ 1,
+ 0,
+ 1
]
>>> df = pd.DataFrame(data={'x': [random() for _ in range(5)], 'x1': [randint(0, 4) for _ in range(5)]})
>>> qtab = kx.toq(df)
@@ -424,5 +456,4 @@ Objects generated via the PyKX library can be converted where reasonable to `Pyt
## Next steps
-- [Interface Overview Notebook](PyKX%20Introduction%20Notebook.ipynb#ipc-communication)
-- [PyKX User Guide](../user-guide/index.md)
+- [Introduction Notebook](../examples/interface-overview.ipynb#ipc-communication)
diff --git a/docs/getting-started/what_is_pykx.md b/docs/getting-started/what_is_pykx.md
index c487b6c..f2bce8c 100644
--- a/docs/getting-started/what_is_pykx.md
+++ b/docs/getting-started/what_is_pykx.md
@@ -1,36 +1,78 @@
+---
+title: What is PyKX?
+description: Overview of Pykx
+date: June 2024
+author: KX Systems, Inc.,
+tags: about PyKX, q/kdb+, use cases,
+---
+
# What is PyKX?
+_This page briefly describes PyKX, its use cases, and its connection with q/kdb+._
+
## Introduction
-PyKX is a Python first interface to the world's fastest time-series database kdb+ and its underlying vector programming language, q. PyKX takes a Python first approach to integrating q/kdb+ with Python following 10+ years of integrations between these two languages. Fundamentally it provides users with the ability to efficiently query and analyze huge amounts of in-memory and on-disk time-series data.
+**PyKX** is a Python-first interface to **kdb+** (the world's fastest time-series database) and **q** (kdb+'s underlying vector programming language). PyKX is the result of 10+ years of integrations between two languages: Python and q. Its aim is to help users query and analyze huge amounts of in-memory and on-disk time-series data, significantly faster than other libraries.
+
+## Use cases
+
+PyKX supports three main use cases, allowing Python data engineers and data scientists to:
+
+1. Store, query, manipulate and use q objects within a Python process.
+2. Query external q processes via an [Inter-Process Communication (IPC)](..//user-guide/advanced/ipc.md) interface.
+3. Embed Python functionality within a native q session using its [under q](../pykx-under-q/intro.md) functionality.
+
+??? Note "Expand to learn more about q/kdb+"
+
+ Used throughout the financial sector for 25+ years, q and kdb+ have been a cornerstone of modern financial markets. This technology provides a storage mechanism for historical market data and performant tooling to analyze this vast streaming, real-time and historical data.
+
+ - **Kdb+** is a high-performance column-oriented database designed to process and store large amounts of data. Commonly accessed data is available in RAM which makes it faster to access than disk stored data. Operating with temporal data types as a first class entity the use of q and it's query language qsql against this database creates a highly performant time-series analysis tool available.
+
+ - **q** is the vector programming language which is used for all interactions with kdb+ databases, known both for its speed and expressiveness. PyKX exposes q as a domain-specific language (DSL) embedded within Python. The assumption is that q is mainly used for data processing and database management.
+
+ This approach benefits users in two ways:
+
+ - Helps users familiar with q to make the most of its advanced analytics and database management.
+ - Empowers kdb+/q users who lack q expertise to get up and running quickly.
-This interface exposes q as a domain-specific language (DSL) embedded within Python, taking the approach that q should principally be used for data processing and management of databases. This approach does not diminish the ability for users familiar with q, or those wishing to learn more about it, from making the most of its advanced analytics and database management functionality. Rather it empowers those who want to make use of the power of kdb+/q who lack this expertise to get up and running quickly.
+ For more information on using q/kdb+ and getting started with see the following links:
-PyKX supports three principal use cases:
+ - [An introduction to q/kdb+](https://code.kx.com/q/learn/tour/)
+ - [Tutorial videos introducing kdb+/q](https://code.kx.com/q/learn/q-for-all/)
-1. It allows users to store, query, manipulate and use q objects within a Python process.
-2. It allows users to query external q processes via an IPC interface.
-3. It allows users to embed Python functionality within a native q session using it's [under q](../pykx-under-q/intro.md) functionality.
+## PyKX vs. Python/q interfaces
-Users wishing to install the library can do so following the instructions [here](installing.md).
+There are three historical interfaces which allow interoperability between Python and q/kdb+:
-Once you have the library installed you can get up and running with PyKX following the quickstart guide [here](quickstart.md).
+1. [Embedpy](https://code.kx.com/q/ml/embedpy)
+2. [PyQ](https://github.com/KxSystems/pyq)
+3. [qPython](https://github.com/KxSystems/pyq)
-## What is q/kdb+?
+How does PyKX compare to other q interfaces for Python? Here’s a TL;DR comparison table highlighting the key differences between EmbedPy, PyQ, qPython, and PyKX:
-Mentioned throughout the documentation q and kdb+ are respectively a highly efficient vector programming language and highly optimised time-series database used to analyse streaming, real-time and historical data. Used throughout the financial sector for 25+ years this technology has been a cornerstone of modern financial markets providing a storage mechanism for historical market data and tooling to make the analysis of this vast data performant.
+| **Feature** | **EmbedPy** | **PyQ** | **qPython** | **PyKX** |
+| ---------------- | ------------------------------------------------ | ------------------------------------- | ----------------------------------------------------------------------- | -------------------------------------------------------------------------------- |
+| Interoperability | Python from q | Python & q in same process | Python-first, over IPC | Python-first, in-process & IPC |
+| Execution | Designed for q developers, run in q session | Requires PyQ binary or start from q | Processing completed on q session via IPC, deserialization using Python | Run from Python session, class-based type system |
+| Use Case | Leverage Python functionality not available in q | Operate on same data across languages | Common use case, expensive in processing & memory | Store, query, manipulate q objects within Python, via IPC or Python in q session |
+| Flexibility | \- | Locked into using PyQ binary | \- | Pythonic interface, context interface for q scripts, q first mode, IPC available |
+| Data Conversion | q to/from Numpy/Python only | q to/from Numpy/Pandas only | Data converted directly from socket | Leverages q memory space embedded within Python, supports NumPy, Pandas, PyArrow |
-Kdb+ is a high-performance column-oriented database designed to process and store large amounts of data. Commonly accessed data is available in RAM which makes it faster to access than disk stored data. Operating with temporal data types as a first class entity the use of q and it's query language qsql against this database creates a highly performant time-series analysis tool available.
+??? Note "Expand to learn more about EmbedPy, PyQ, qPython, and PyKX"
-q is the vector programming language which is used for all interactions with kdb+ databases and which is known both for its speed and expressiveness.
+ To give you a clear understanding of how each interface operates and their suitability for different use cases, here are some additional details:
+
+ - **EmbedPy** allows using Python from q but does not interface with q from Python. It’s mainly for q developers to access Python functionalities like machine learning, statistical methods, and plotting.
+ - **PyQ** integrates Python and q interpreters in the same process, but it requires executing a special PyQ binary or starting from q, which is not ideal for Python use cases that require a standard Python binary.
+ - **qPython** takes a Python-first approach but works entirely over IPC (Inter-Process Communication), meaning Python objects sent to q and q objects returned are serialized, sent over a socket, and then deserialized, which can be resource-intensive.
+ - **PyKX** supports storing, querying, manipulating, and using q objects within a Python process and querying external q processes via IPC. PyKX provides a more Pythonic approach with a class-based hierarchical type system and a context interface for interacting with q scripts in a Pythonic manner.
-For more information on using q/kdb+ and getting started with see the following links:
+!!! tip "EmbedPy, PyQ, qPython: Interface support"
-- [An introduction to q/kdb+](https://code.kx.com/q/learn/tour/)
-- [Tutorial videos introducing kdb+/q](https://code.kx.com/q/learn/q-for-all/)
+ KX maintains both embedPy and PyQ on a best-efforts basis under the [Fusion](https://code.kx.com/q/interfaces) initiative. qPython is in maintenance mode, not supported by KX. If you're using EmbedPy, PyQ, or qPython, we recommend switching to PyKX to pick up the latest updates from KX.
## Next steps
-- [Installation guide](installing.md)
-- [Quickstart guide](quickstart.md)
-- [User guide introduction](../user-guide/index.md)
+- Follow the [Installation guide](installing.md)
+- Get up and running with the PyKX [Quickstart guide](quickstart.md)
+- Get to know more functionalities with the [PyKX Introduction Notebook](../examples/interface-overview.ipynb)
diff --git a/docs/help/faq.md b/docs/help/faq.md
new file mode 100644
index 0000000..cd481a8
--- /dev/null
+++ b/docs/help/faq.md
@@ -0,0 +1,56 @@
+---
+title: FAQ
+description: Frequently questions for PyKX
+maintained by: KX Systems, Inc.
+date: Aug 2024
+tags: PyKX, FAQ
+---
+# FAQ
+
+## How do I prevent the `#!python 'cores` licensing error when I run `#!python import pykx`?
+
+```python
+>>> import pykx as kx
+:228: PyKXWarning: Failed to initialize embedded q; falling back to unlicensed mode, which has limited functionality. Refer to https://code.kx.com/pykx/user-guide/advanced/modes.html for more information. Captured output from initialization attempt:
+ '2022.09.15T10:32:13.419 license error: cores
+```
+
+This error indicates PyKX tried to use more cores than your license allows. You can fix this by limiting the number of cores used by the python process.
+
+- On Linux you can use `#!bash taskset` to limit the number of cores used by a process:
+
+```bash
+# Example to limit python to the 4 first cores on a 8 cores CPU
+$ taskset -c 0-3 python
+```
+
+- You can also do this in python before importing PyKX (Linux only):
+
+```python
+>>> import os
+>>> os.sched_setaffinity(0, [0, 1, 2, 3])
+>>> import pykx as kx
+>>> kx.q('til 10')
+pykx.LongVector(pykx.q('0 1 2 3 4 5 6 7 8 9'))
+```
+
+- On Windows you can use the `#!bat start` command with its `#!bat /affinity` argument (see: `#!bat > help start`):
+
+```bat
+> start /affinity f python
+```
+
+(above, `#!bat 0xf = 00001111b`, so the python process will only use the four cores for which the mask bits are equal to 1)
+
+## How does PyKX determine the license that is used?
+
+The following steps are run by PyKX to find the license when you execute `#!python import pykx`:
+
+1. Search for **kx.lic**, **kc.lic** and **k4.lic** license files in this order within the following locations:
+ 1. Current working directory
+ 1. Location defined by environment variable `#!bash QLIC` if set
+ 1. Location defined by environment variable `#!bash QHOME` if set
+2. If a license is not found PyKX will use the following environment variables (if they are set) to install and make use of a license:
+ 1. `#!bash KDB_LICENSE_B64` containing a base64 encoded version of a **kc.lic** license
+ 1. `#!bash KDB_K4LICENSE_B64` containing a base64 encoded version of a **k4.lic** license
+3. If a license has not been located you will be guided to install a license following a prompt based license installation.
diff --git a/docs/help/issues.md b/docs/help/issues.md
new file mode 100644
index 0000000..54da94b
--- /dev/null
+++ b/docs/help/issues.md
@@ -0,0 +1,122 @@
+---
+title: Issues and Limitations
+description: Known issues that occur when using python and limitations when using q embedded in python
+maintained by: KX Systems, Inc.
+date: Aug 2024
+tags: PyKX, issues, embedded
+---
+
+# Issues and Limitations
+
+_This page details known issues and functional limitations when using PyKX either as q embedded in a python process via the `#!python import pykx` command, or as a python processes embedded in q via `#!q \l pykx.q`._
+
+## PyKX
+### Known issues
+* Enabling the NEP-49 NumPy allocators will often segfault when running in a multiprocess setting.
+* The timeout value is always set to 0 when using PYKX_Q_LOCK.
+* Enabling PYKX_ALLOCATOR and using PyArrow tables can cause segfaults.
+* `#!python kurl` functions require their `#!python options` dictionary to have mixed type values. Add a `#!python None` value to bypass: `#!python {'': None, ...}`
+* `#!python None` and `#!python pykx.Identity(pykx.q('::'))` do not pass through to single argument Python functions set under q as outlined in this example:
+```python
+>>> def func(n=2):
+... return n
+...
+>>> kx.q('func', None)
+pykx.LongAtom(pykx.q('2'))
+>>> kx.q('func', kx.q('::'))
+pykx.LongAtom(pykx.q('2'))
+```
+
+### Limitations
+Embedding q in a Python process imposes some restrictions on functionality. The embedded q process does not run the main loop that it would when running natively, hence it is limited in usage of q IPC and q timers.
+
+#### IPC
+The embedded q process cannot be used to respond to q IPC requests as a server. Callback functions such as .z.pg defined within a Python process will not operate as expected. Here is an example demonstrating this:
+
+In a Python process, start a q IPC server:
+```python
+>>> import pykx as kx
+>>> kx.q('\\p 5001')
+pykx.Identity(pykx.q('::'))
+```
+
+Now, in a Python or q process attempt to connect to the above embedded q server.
+```python
+>>> import pykx as kx
+>>> q = kx.QConnection(port=5001) # Attempt to create a q connection to a PyKX embedded q instance
+# This process is now hung indefinitely as the embedded q server cannot respond
+```
+
+```q
+q)h:hopen`::5001 /Attempting to create an IPC connection to a PyKX embedded q instance
+/This process is now hung indefinitely as the embedded q server cannot respond
+```
+
+#### Timers
+Timers in q rely on the main loop of the standalone executable so they will not work on the q process embedded in python.
+```python
+>>> import pykx as kx
+>>> kx.q('.z.ts:{0N!x}') # Set callback function which should be run on a timer
+>>> kx.q('\t 1000') # Set timer to tick every 1000ms
+pykx.Identity(pykx.q('::')) # No output follows because the timer never ticks
+```
+Attempting to use the timer callback function directly using PyKX will raise an AttributeError:
+```python
+>>> kx.q.z.ts
+AttributeError: ts: .z.ts is not exposed through the context interface because there is no main loop in the embedded q process
+```
+
+## Python embedded in a q process
+
+### Limitations
+Controlling object return and conversion between a q process and its embedded python instance requires the use of several special characters. In order to use these characters as parameters of functions, as opposed to operations on objects, there are specific steps to be followed.
+
+#### Return characters: `#!q <`, `#!q >`, and `#!q *`
+During function definition you must specify a return type in order to use the return characters as parameters for the function.
+```q
+q)f:.pykx.eval["lambda x: x";<] /Specify < to return output as a q object. *, < and > can now be used as parameters to function f
+q)f[*]
+*
+```
+
+#### Conversion characters: ``#!q ` `` and ``#!q `. ``
+During function definition either define the return type (as above) or use the `#!q .pykx.tok` function:
+```q
+q).pykx.eval["lambda x: x"][`]` /throws error
+'Provided foreign object is not a Python object
+q).pykx.eval["lambda x: x";<][`] /defining the return type using < allows use of ` as a parameter
+`
+q).pykx.eval["lambda x: x"][.pykx.tok[`]]` /wrapping input in function tok allows use of ` as a parameter
+`
+```
+
+#### q default parameter `#!q ::`
+When you execute a q function that has no user defined parameters the accepted q style is to use `#!q []` (e.g. `#!q f:{1+1};f[] /outputs 2`). During execution q will use the generic null as the value passed:
+```q
+q)(::)~{x}[] /x parameter received by lambda is the generic null ::
+1b
+```
+
+Using `#!q ::` as an argument to PyKX functions presents some difficulties:
+```q
+q)f:.pykx.eval["lambda x: x";<]
+q)f[::] /the Python process cannot tell the difference between f[] and f[::] so throws an error
+'TypeError("() missing 1 required positional argument: 'x'")
+ [0] f[::]
+```
+
+You can avoid this by wrapping the input in `#!q .pykx.tok`:
+```q
+q)(::)~f[.pykx.tok[::]]
+1b
+```
+
+Python functions defined with 0 parameters will run without issues as they will ignore the automatically added `#!q ::`:
+```q
+p)def noparam():return 7
+q)f:.pykx.get[`noparam;<]
+q)f[]
+7
+q)f[::] /equivalent
+7
+```
diff --git a/docs/help/support.md b/docs/help/support.md
new file mode 100644
index 0000000..4ba8b27
--- /dev/null
+++ b/docs/help/support.md
@@ -0,0 +1,22 @@
+---
+title: Help and Support
+description: Summary of help and support for PyKX
+maintained by: KX Systems, Inc.
+date: Aug 2024
+tags: support, PyKX
+---
+# Support
+
+_This page provides links users can follow to receive help and support for the PyKX library_
+
+## Community Help
+
+If you have any issues or questions you can post them to the following locations, each of which is monitored by the PyKX development team:
+
+- Ask a question to the KX community at [community.kx.com](https://community.kx.com/t5/PyKX/bd-p/PyKX).
+- Use Stack Overflow with the tags [pykx](https://stackoverflow.com/questions/tagged/pykx) or [kdb](https://stackoverflow.com/questions/tagged/kdb) depending on the subject.
+
+## Customer Support
+
+* Inquires or feedback: [`pykx@kx.com`](mailto:pykx@kx.com)
+* Support for Licensed Subscribers: [support.kx.com](https://support.kx.com/support/home)
diff --git a/docs/help/troubleshooting.md b/docs/help/troubleshooting.md
new file mode 100644
index 0000000..5679479
--- /dev/null
+++ b/docs/help/troubleshooting.md
@@ -0,0 +1,168 @@
+# Troubleshooting
+
+## License issues
+
+The following section outlines practical information useful when dealing with getting access to and managing licenses for PyKX.
+
+### Accessing a license valid for PyKX
+
+A number of trial and enterprise type licenses exist for q/kdb+. Not all licenses for q/kdb+ however are valid for PyKX. In particular users require access to a license which contains the feature flags **pykx** and **embedq** which provide access to the PyKX functionality. The following locations can be used for the retrieval of evaluation/personal licenses
+
+- For non-commercial personal users you can access a 12 month kdb+ license with PyKX enabled [here](https://kx.com/kdb-insights-personal-edition-license-download).
+- For commercial evaluation, contact your KX sales representative or sales@kx.com requesting a PyKX trial license. Alternately apply through https://kx.com/book-demo.
+
+For non-personal or non-commercial usage please contact sales@kx.com.
+
+Once you have access to your license you can install the license following the steps provided [here](../getting-started/installing.md) or through installation using the function `#!python kx.license.install` as follows
+
+```python
+>>> import pykx as kx
+>>> kx.license.install('/path/to/downloaded/kc.lic')
+```
+
+### Initialization failing with a `#!python embedq` error
+
+Failure to initialize PyKX while raising an error `#!python embedq` indicates that the license you are attempting to use for PyKX in [licensed modality](../user-guide/advanced/modes.md) does not have the sufficient feature flags necessary to run PyKX. To access a license which does allow for running PyKX in this modality please following the instructions [here](#accessing-a-license-valid-for-pykx) to get a new license with appropriate feature flags.
+
+### Initialization failing with a `#!python kc.lic` error
+
+If after initially completing the installation guide for PyKX [here](../getting-started/installing.md) you receive the following error:
+
+```python
+pykx.exceptions.PyKXException: Failed to initialize embedded q. Captured output from initialization attempt:
+ '2023.09.02T21:28:45.699 licence error: kc.lic
+```
+
+It usually indicates that your license was not correctly written to disk or a license could not be found, to check that the installed license matches the license you expect.
+
+=== "License file based checking"
+
+ The following shows a successful check being completed:
+
+ ```python
+ >>> import pykx as kx
+ >>> kx.license.check('/path/to/downloaded/kc.lic')
+ True
+ ```
+
+ The following shows an example of a failed check:
+
+ ```python
+ >>> import pykx as kx
+ >>> kx.license.check('/path/to/incorrect/license.txt')
+ Supplied license information does not match.
+ Please consider reinstalling your license using pykx.util.install_license
+
+ On disk license:
+ b'Atc/wy/gMjZgIdn1KlT3JVWfVmPk55dtb0YJVes5V4ed9Zxt9UVr8G/A1Q3aWiQEkfjGbwvlJU3GXpUergObvzxGN1iyYG\nZasG5s8vevfAI2ttndt//Y2th\nrryoQRm9Dy+DIIcmSufwomL+\nPMJkZacYc9DM6ipnQsL0KvLwLXLrQC1fBLV2pZHCdYC/nX/KM6uslgip4EoTxZTcx1pQPyTx56QKD4K4JBNimO929w/0+v4Hy2x+DIS3n89vpGmtVvjjFRQtsF6Sjnd+6RnFGk13hRL/DlqHTv2XbZgVv++YOCIc7G55KL6PVJY\npB\n66lq9OiZCEdq2GFJLCn2T\nNWGJPT2s1YDAKsAPI5W3PqJkC2UeV17gPG4gxlCSHr0kfacINbEJ0kSTm/UsuEBZ5B/jvR/jU7rFErcd9PECeQA1kXB19fa4hgvbd+SxWTPxMUKbiHThHk6X0Bi3T7WAQ+sZWsEWwkMncd+mOGS\n3D+bRav2nfOpKckj8rCdvYum3U8PDv6IHP=S+\nLaCnJM0yqNjW9xGyog5ml\nbX2k3mBRyBjbJH/1OWTcIg7uDYxxoMtDOCJjeBdSqI=aK+5FVTVarfowvudv7QsMGeohGaJMyczNWVPPjsbyvsxbAwdXvJUuP0jcFCFVeF\n'
+
+ Supplied string content:
+ b'8n\nD+HkcJ93xW4oOEtH\nIZxeWkA1glv5wJ5wE2Fsmbc4lg2ntT9JpsclE1hFeG/Ox/jM4=6GjXD2VNpiCAJ80DNVcXuDB+IPEnP22DMGvBIolJt2pdy9kooGZNQpr6svIkRWX/0m/SbydbQOQUVvfNTxsDjZvvsCiGkdQtygs3sDEJbxsT+KfjqJ7Sd6RQ/47HJHG4JyIWdhmvEBVGSLBa5mdAaCLWdCrga3hHZbW3F4e/l3K4nOQvU91WEiMd6PT061r66AOYmjGACCXqmQ9kSsJfMTXPRi9M2i93Oyv895kFVKdZCLCdKdaow790RcjwnKjFFOERGcge=lZdRtp2BL\nA+JbixvTIKTObmfqr7uPYsGQLfXSFnQCq7jbt3yxv1ZPjvjYLPTx7YKIvgo+ITG6vyY\ne+cfwaW1g0tlvFTcVSVb/sxUvvLCLiWMdxGjt5JUxV3GaSm9ysHVk5MrTDpp/5qqXes1\n/BOXsD\n2DmS/QSZr/Mt+Vc2baKuxPw1w5YnGVuY6vHxHffABzkn+WPcguabr86JcmIAcC0zc2TLkbufBPJewYka9PIt1Ng2\n83NKe13huPU\nohnryYVIMPyjrTWpDid+yC5kSGVeP0/5+r\nJvLmFZUB/n0RUjgMZU5V++GPU1QnCBa+\n"
+ False
+ ```
+
+=== "Encoded string based checking"
+
+ The following shows a successful check being completed:
+
+ ```python
+ >>> import pykx as kx
+ >>> license_string = 'Atc/wy/gMjZgIdn1KlT3JVWfVmPk55dtb0YJVes5V4ed9Zxt9UVr8G/A1Q3aWiQEkfjGbwvlJU3GXpUergObvzxGN1iyYG\nZasG5s8vevfAI2ttndt//Y2th\nrryoQRm9Dy+DIIcmSufwomL+\nPMJkZacYc9DM6ipnQsL0KvLwLXLrQC1fBLV2pZHCdYC/nX/KM6uslgip4EoTxZTcx1pQPyTx56QKD4K4JBNimO929w/0+v4Hy2x+DIS3n89vpGmtVvjjFRQtsF6Sjnd+6RnFGk13hRL/DlqHTv2XbZgVv++YOCIc7G55KL6PVJY\npB\n66lq9OiZCEdq2GFJLCn2T\nNWGJPT2s1YDAKsAPI5W3PqJkC2UeV17gPG4gxlCSHr0kfacINbEJ0kSTm/UsuEBZ5B/jvR/jU7rFErcd9PECeQA1kXB19fa4hgvbd+SxWTPxMUKbiHThHk6X0Bi3T7WAQ+sZWsEWwkMncd+mOGS\n3D+bRav2nfOpKckj8rCdvYum3U8PDv6IHP=S+\nLaCnJM0yqNjW9xGyog5ml\nbX2k3mBRyBjbJH/1OWTcIg7uDYxxoMtDOCJjeBdSqI=aK+5FVTVarfowvudv7QsMGeohGaJMyczNWVPPjsbyvsxbAwdXvJUuP0jcFCFVeF\n'
+ >>> kx.license.check(license_string, format = 'STRING')
+ True
+ ```
+
+ The following shows an example of a failed check:
+
+ ```python
+ >>> import pykx as kx
+ >>> license_string = '8n\nD+HkcJ93xW4oOEtH\nIZxeWkA1glv5wJ5wE2Fsmbc4lg2ntT9JpsclE1hFeG/Ox/jM4=6GjXD2VNpiCAJ80DNVcXuDB+IPEnP22DMGvBIolJt2pdy9kooGZNQpr6svIkRWX/0m/SbydbQOQUVvfNTxsDjZvvsCiGkdQtygs3sDEJbxsT+KfjqJ7Sd6RQ/47HJHG4JyIWdhmvEBVGSLBa5mdAaCLWdCrga3hHZbW3F4e/l3K4nOQvU91WEiMd6PT061r66AOYmjGACCXqmQ9kSsJfMTXPRi9M2i93Oyv895kFVKdZCLCdKdaow790RcjwnKjFFOERGcge=lZdRtp2BL\nA+JbixvTIKTObmfqr7uPYsGQLfXSFnQCq7jbt3yxv1ZPjvjYLPTx7YKIvgo+ITG6vyY\ne+cfwaW1g0tlvFTcVSVb/sxUvvLCLiWMdxGjt5JUxV3GaSm9ysHVk5MrTDpp/5qqXes1\n/BOXsD\n2DmS/QSZr/Mt+Vc2baKuxPw1w5YnGVuY6vHxHffABzkn+WPcguabr86JcmIAcC0zc2TLkbufBPJewYka9PIt1Ng2\n83NKe13huPU\nohnryYVIMPyjrTWpDid+yC5kSGVeP0/5+r\nJvLmFZUB/n0RUjgMZU5V++GPU1QnCBa+\n'
+ >>> kx.license.check(license_string, format = 'STRING')
+ Supplied license information does not match.
+ Please consider reinstalling your license using pykx.util.install_license
+
+ On disk license:
+ b'Atc/wy/gMjZgIdn1KlT3JVWfVmPk55dtb0YJVes5V4ed9Zxt9UVr8G/A1Q3aWiQEkfjGbwvlJU3GXpUergObvzxGN1iyYG\nZasG5s8vevfAI2ttndt//Y2th\nrryoQRm9Dy+DIIcmSufwomL+\nPMJkZacYc9DM6ipnQsL0KvLwLXLrQC1fBLV2pZHCdYC/nX/KM6uslgip4EoTxZTcx1pQPyTx56QKD4K4JBNimO929w/0+v4Hy2x+DIS3n89vpGmtVvjjFRQtsF6Sjnd+6RnFGk13hRL/DlqHTv2XbZgVv++YOCIc7G55KL6PVJY\npB\n66lq9OiZCEdq2GFJLCn2T\nNWGJPT2s1YDAKsAPI5W3PqJkC2UeV17gPG4gxlCSHr0kfacINbEJ0kSTm/UsuEBZ5B/jvR/jU7rFErcd9PECeQA1kXB19fa4hgvbd+SxWTPxMUKbiHThHk6X0Bi3T7WAQ+sZWsEWwkMncd+mOGS\n3D+bRav2nfOpKckj8rCdvYum3U8PDv6IHP=S+\nLaCnJM0yqNjW9xGyog5ml\nbX2k3mBRyBjbJH/1OWTcIg7uDYxxoMtDOCJjeBdSqI=aK+5FVTVarfowvudv7QsMGeohGaJMyczNWVPPjsbyvsxbAwdXvJUuP0jcFCFVeF\n'
+
+ Supplied string content:
+ b'8n\nD+HkcJ93xW4oOEtH\nIZxeWkA1glv5wJ5wE2Fsmbc4lg2ntT9JpsclE1hFeG/Ox/jM4=6GjXD2VNpiCAJ80DNVcXuDB+IPEnP22DMGvBIolJt2pdy9kooGZNQpr6svIkRWX/0m/SbydbQOQUVvfNTxsDjZvvsCiGkdQtygs3sDEJbxsT+KfjqJ7Sd6RQ/47HJHG4JyIWdhmvEBVGSLBa5mdAaCLWdCrga3hHZbW3F4e/l3K4nOQvU91WEiMd6PT061r66AOYmjGACCXqmQ9kSsJfMTXPRi9M2i93Oyv895kFVKdZCLCdKdaow790RcjwnKjFFOERGcge=lZdRtp2BL\nA+JbixvTIKTObmfqr7uPYsGQLfXSFnQCq7jbt3yxv1ZPjvjYLPTx7YKIvgo+ITG6vyY\ne+cfwaW1g0tlvFTcVSVb/sxUvvLCLiWMdxGjt5JUxV3GaSm9ysHVk5MrTDpp/5qqXes1\n/BOXsD\n2DmS/QSZr/Mt+Vc2baKuxPw1w5YnGVuY6vHxHffABzkn+WPcguabr86JcmIAcC0zc2TLkbufBPJewYka9PIt1Ng2\n83NKe13huPU\nohnryYVIMPyjrTWpDid+yC5kSGVeP0/5+r\nJvLmFZUB/n0RUjgMZU5V++GPU1QnCBa+\n'
+ False
+ ```
+
+## Environment issues
+
+### Using PyKX under q is raising a `'libpython` error
+
+If you are getting a `'libpython` error when starting PyKX within a q session, this may indicate that PyKX has been unable to source the Python shared libraries that are required to run Python within an embedded setting. To fix this issue users can either
+
+- Find the absolute path to the appropriate shared object and set the environment variable `PYKX_PYTHON_LIB_PATH` with this location.
+- Set the environment variable `PYKX_USE_FIND_LIBPYTHON` to `"true"`, this will use the Python library [`find-libpython`](https://pypi.org/project/find-libpython/) to locate the `libpython` shared library and automatically set `PYKX_PYTHON_LIB_PATH` to the returned location.
+
+### Getting more information about your environment
+
+The following section outlines how a user can get access to a verbose set of environment configuration associated with PyKX. This information is helpful when debugging your environment and should be provided if possible with support requests.
+
+```python
+>>> import pykx as kx
+>>> kx.util.debug_environment() # see below for output
+```
+
+??? output
+
+ ```python
+ >>> kx.util.debug_environment()
+ missing q binary at '/usr/local/anaconda3/lib/python3.8/site-packages/pykx/lib/m64/q'
+ **** PyKX information ****
+ pykx.args: ()
+ pykx.qhome: /usr/local/anaconda3/lib/python3.8/site-packages/pykx/lib
+ pykx.qlic: /usr/local/anaconda3/lib/python3.8/site-packages/pykx/lib
+ pykx.licensed: True
+ pykx.__version__: 1.5.3rc2.dev525+g41f008ad
+ pykx.file: /usr/local/anaconda3/lib/python3.8/site-packages/pykx/util.py
+
+ **** Python information ****
+ sys.version: 3.8.3 (default, Jul 2 2020, 11:26:31)
+ [Clang 10.0.0 ]
+ pandas: 1.5.3
+ numpy: 1.24.4
+ pytz: 2022.7.1
+ which python: /usr/local/anaconda3/bin/python
+ which python3: /usr/local/anaconda3/bin/python3
+
+ **** Platform information ****
+ platform.platform: macOS-10.16-x86_64-i386-64bit
+
+ **** PyKX Configuration Variables ****
+ PYKX_IGNORE_QHOME: False
+ PYKX_KEEP_LOCAL_TIMES: False
+ PYKX_ALLOCATOR: False
+ PYKX_GC: False
+ PYKX_LOAD_PYARROW_UNSAFE: False
+ PYKX_MAX_ERROR_LENGTH: 256
+ PYKX_NOQCE: False
+ PYKX_RELEASE_GIL: False
+ PYKX_Q_LIB_LOCATION: /usr/local/anaconda3/lib/python3.8/site-packages/pykx/lib
+ PYKX_Q_LOCK: False
+ PYKX_SKIP_UNDERQ: False
+ PYKX_Q_EXECUTABLE: /usr/local/anaconda3/envs/qenv/q/m64/q
+ PYKX_THREADING: False
+ PYKX_4_1_ENABLED: False
+ PYKX_QDEBUG: False
+ PYKX_DEBUG_INSIGHTS_LIBRARIES: False
+ PYKX_DEFAULT_CONVERSION:
+ PYKX_EXECUTABLE: /usr/local/anaconda3/lib/python3.8/bin/python3.8
+ PYKX_PYTHON_LIB_PATH:
+ PYKX_PYTHON_BASE_PATH:
+ PYKX_PYTHON_HOME_PATH:
+ PYKX_DIR: /usr/local/anaconda3/lib/python3.8/site-packages/pykx
+
+ **** License information ****
+ pykx.qlic directory: True
+ pykx.lic writable: True
+ pykx.qhome lics: ['kc.lic']
+ pykx.qlic lics: ['kc.lic']
+
+ **** q information ****
+ which q: /usr/local/anaconda3/bin/q
+ q info:
+ ```
diff --git a/docs/index.md b/docs/index.md
index 5bdace8..751c225 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -2,32 +2,40 @@
## About
-PyKX is a Python first interface to the world's fastest time-series database kdb+ and its underlying vector programming language, q.
+Welcome to [PyKX](./getting-started/what_is_pykx.md)! PyKX is a Python first interface to the world's fastest time-series database kdb+ and its underlying vector programming language, q.
For Python developers, PyKX unlocks the speed and power of kdb+ for data processing and storage from within your Python environment. It enables anyone with Python knowledge to apply analytics against vast amounts of data, both in-memory and on-disk, in a fraction of the time, allowing you to focus on getting the best from your data.
For q developers, PyKX brings together Python's data science ecosystem and the power of kdb+'s vector and time-series analytics. This makes them available in both q and Python environments. You can use it to run q code within a Python environment or embed Python analytics within your q session.
-To begin your journey with PyKX follow the sections below.
+To begin your journey with PyKX, follow the sections below:
-## Documentation Breakdown
+## Documentation overview
-### [Getting Started](getting-started/what_is_pykx.md)
+!!! home-page "[Get Started](./getting-started/installing.md)"
-Documentation for users new to PyKX! Contains installation instructions alongside quickstart guides and sample getting started notebooks.
+ This section is for new users. Includes everything you need to begin your journey with PyKX: [installation instructions](../docs/getting-started/installing.md) and a [quickstart guide](../docs/getting-started/quickstart.md).
-### [User Guide](user-guide/index.md)
+!!! home-page "[Learn](./getting-started/what_is_pykx.md)"
-Useful information allowing users to understand the key concepts behind PyKX. Including how the library is intended to be used and examples of this functionality.
+ Explore the core concepts of our technology. This section provides you with detailed explanations and access to [KX Academy](https://learninghub.kx.com/courses/introduction-to-pykx/) materials.
-### [API](api/pykx-execution/q.md)
+!!! home-page "[How To](./user-guide/configuration.md)"
-Detailed descriptions of the functions, modules and objects managed by PyKX. Using the API reference assumes you have an understanding of how PyKX is intended to be used through the getting started and user guide.
+ Follow step-by-step guides to take you from initial setup to advanced production systems. You’ll find practical advice and best practices here.
-### [Release Notes](release-notes/changelog.md)
+!!! home-page "[Reference](./api/pykx-execution/q.md)"
-The latest additions and fixes for PyKX alongside historical changes.
+ Find detailed technical references for methods, functions, and APIs. This is your go-to source for all technical definitions and usage examples.
-### [Roadmap](roadmap.md)
+!!! home-page "[Integrations](./user-guide/advanced/numpy.md)"
-What to look out for in the next weeks, months and years from the PyKX team.
+ Learn how to integrate PyKX with other tools and platforms. This section includes guides and compatibility information to ensure seamless interoperability.
+
+!!! home-page "[Releases](./release-notes/changelog.md)"
+
+ Stay updated with the latest release notes and roadmap details. You’ll find information on the latest releases/fixes, previous versions, and upcoming features.
+
+!!! home-page "[Help and Support](./help/troubleshooting.md)"
+
+ Get assistance with any questions or issues you might have. This section includes [troubleshooting](../docs/help/troubleshooting.md) guides, [FAQs](../docs/help/faq.md), and [support](../docs/help/support.md) contact information.
\ No newline at end of file
diff --git a/docs/learn/objects.md b/docs/learn/objects.md
new file mode 100644
index 0000000..3c39de5
--- /dev/null
+++ b/docs/learn/objects.md
@@ -0,0 +1,47 @@
+---
+title: PyKX Objects and Attributes
+description: Objects explained for PyKX
+date: Septmeber 2024
+author: KX Systems, Inc.,
+tags: PyKX, object
+---
+
+# PyKX objects and attributes
+
+_This page explains objects and attributes in PyKX._
+
+## What are PyKX objects?
+
+PyKX objects are Python representations of kdb+ data structures. They allow Python developers to interact with kdb+ databases, perform complex queries, and manipulate data efficiently.
+
+When you call or connect to a q instance, it returns a PyKX object. This object is an instance of the [`#!python pykx.K`](../api/pykx-q-data/wrappers.md#pykx.wrappers.K) class or one of its subclasses, as documented on the [PyKX wrappers API](..//api/pykx-q-data/wrappers.md) page.
+
+PyKX objects act as wrappers around objects in q’s memory space within the Python process where PyKX (and your program) runs. These wrappers are efficient to create since they don’t require copying data out of q’s memory space.
+
+PyKX objects support various Python features like iteration, slicing, and calling, so converting them to other types (for example, from [`#!python pykx.Vector`](../api/pykx-q-data/wrappers.md#pykx.wrappers.Vector) to `#!python numpy.ndarray`) is often unnecessary.
+
+Examples of PyKX objects:
+
+- **Atoms**: Single values, such as integers, floats, or symbols.
+- **Vectors**: Arrays of values of the same type.
+- **Dictionaries**: Key-value pairs, where keys and values can be of different types.
+- **Tables**: Collections of columns, where each column is a vector.
+- **Lists**: These can contain elements of different types.
+
+### How to use PyKX objects
+
+To leverage the power of kdb+ within a Python environment, you can perform the following key operations with PyKX objects:
+
+| **Operation** | **Description** |
+|--------------------------------------------------------------|-------------|
+| [Create and convert](../user-guide/fundamentals/creating.md) | Create PyKX objects from and to various Python objects, such as lists, dictionaries, and NumPy arrays. |
+| [Use](../user-guide/fundamentals/evaluating.md) | Once created, interact with PyKX objects using familiar Pythonic syntax. For example [querying tables](../user-guide/fundamentals/query/pyquery.md) using Python. |
+| [Index](../user-guide/fundamentals/indexing.md) | Indexing PyKX objects allows you to access and manipulate elements within these objects, similar to how you would with standard Python sequences.|
+
+## What are PyKX attributes?
+
+Attributes are metadata that you attach to lists with special forms. They are also used on table columns to speed up retrieval for certain operations. PyKX can optimize based on the list structure implied by the attribute.
+
+Attributes (except for ``#!python `g#``) are descriptive rather than prescriptive. This means that by applying an attribute, you are asserting that the list has a special form, which PyKX will verify. It does not instruct PyKX to create or remake the list into its special form; that is your responsibility. If a list operation respects the form specified by the attribute, the attribute remains intact (except for ``#!python `p#``). However, if an operation breaks the form, the attribute is removed from the result.
+
+Learn how to [apply attributes](../user-guide/advanced/attributes.md) in PyKX.
diff --git a/docs/pykx-under-q/api.md b/docs/pykx-under-q/api.md
index 2abee19..5ced6a8 100644
--- a/docs/pykx-under-q/api.md
+++ b/docs/pykx-under-q/api.md
@@ -1,5 +1,7 @@
# pykx.q Library Reference Card
+!!! tip "Tip: For the best experience, read [How to use PyKX within q](../pykx-under-q/intro.md) and [Why upgrade from embedPy](../pykx-under-q/upgrade.md) first."
+
This page documents the functions found in the `pykx.q` q library that are available.
This library can be installed by calling a helper function within `PyKX`, this function will move
@@ -49,6 +51,7 @@ q)\l pykx.q
**Data Conversions:**
[setdefault define the default conversion for KX objects to Python](#pykxsetdefault)
[toq convert an (un)wrapped `PyKX` foreign object into a q type](#pykxtoq)
+[toq0 convert an (un)wrapped `PyKX` foreign object into a q type, use 2nd parameter to allow str objects to return as strings](#pykxtoq0)
[tok tag a q object to be indicate conversion to a Pythonic PyKX object when called in Python](#pykxtok)
[topy tag a q object to be indicate conversion to a Python object when called in Python](#pykxtopy)
[tonp tag a q object to be indicate conversion to a Numpy object when called in Python](#pykxtonp)
@@ -127,7 +130,7 @@ q).pykx.console[]
>>> kx.q['table'] = kx.q('([]2?1f;2?0Ng;2?`3)'
>>> quit()
q)table
-x x1 x2
+x x1 x2
--------------------------------------------------
0.439081 49f2404d-5aec-f7c8-abba-e2885a580fb6 mil
0.5759051 656b5e69-d445-417e-bfe7-1994ddb87915 igf
@@ -274,6 +277,68 @@ q)np[`:arange][10]`
0 1 2 3 4 5 6 7 8 9
```
+## `.pykx.listExtensions`
+
+
+_List all q scripts in the extensions directory which can be loaded_
+
+```q
+.pykx.listExtensions[]
+```
+
+**Returns:**
+
+type | description
+-------|------------
+`list` | A list of strings denoting the available extensions in your version of PyKX
+
+**Example:**
+
+```q
+q)\l pykx.q
+q).pykx.listExtensions[]
+"dashboards"
+```
+
+## `.pykx.loadExtension`
+
+
+_Loading of a PyKX extension_
+
+```q
+.pykx.loadExtension[ext]
+```
+
+**Parameters:**
+
+name | type | description
+-------|----------|-------------
+`ext` | `string` | The name of the extension which is to be loaded
+
+**Returns:**
+
+type | description
+-------|------------
+`null` | On successful execution this function will load the extension and return null
+
+**Example:**
+
+```q
+q)\l pykx.q
+q)`dash in key `.pykx
+0b
+q).pykx.listExtensions[]
+"dashboards"
+q)`dash in key `.pykx
+1b
+```
+
+**Parameter:**
+
+|Name|Type|Description|
+|---|---|---|
+|ext|||
+
## `.pykx.print`
@@ -296,10 +361,6 @@ type | description
-----|------------
`::` | Will print the output to stdout but return null
-!!! Note
-
- For back compatibility with embedPy this function is also supported in the shorthand form `print` which uses the `.q` namespace. To not overwrite `print` in your q session and allow use only of the longhand form `.pykx.print` set the environment variable `UNSET_PYKX_GLOBALS` to any value.
-
```q
// Use a wrapped foreign object
q)a: .pykx.eval"1+1"
@@ -371,7 +432,7 @@ _Convert a Python foreign object to a callable function which returns a Python f
**Parameters:**
-name | type | description
+name | type | description
-------------|-----------|-------------
`pyObject` | `foreign` | A Python object representing an underlying callable function
@@ -575,6 +636,11 @@ _Isolated execution of a q function which relies on importing PyKX_
.pykx.safeReimport[qFunction]
```
+For more information on the reimporter module which this functionality calls see
+ https://code.kx.com/pykx/api/reimporting.html#pykx.reimporter.PyKXReimport
+
+
+
**Parameters:**
name | type | description
@@ -589,12 +655,41 @@ type | description
**Example:**
+Initializing a Python process which imports PyKX
+
```q
q)\l pykx.q
q).pykx.safeReimport[{system"python -c 'import pykx as kx'";til 5}]
0 1 2 3 4
```
+Initializing a q child process which uses pykx.q
+
+```q
+q)\cat child.q
+"\l pykx.q"
+".pykx.print \"Hello World\""
+
+q)\l pykx.q
+q)system"q child.q" // Failing execution
+q)'2024.08.29T12:29:39.967 util.whichPython
+ [5] /usr/local/anaconda3/envs/qenv/q/pykx.q:123:
+ (`os ; util.os);
+ (`whichPython ; util.whichPython)
+ ^
+ )
+ [2] /usr/projects/pykx/child.q:1: \l pykx.q
+ ^
+q).pykx.safeReimport {system"q child.q"}
+"Hello World"
+```
+
+**Parameter:**
+
+|Name|Type|Description|
+|---|---|---|
+|x|||
+
## `.pykx.set`
@@ -740,7 +835,7 @@ type | description |
??? "Supported Options"
- The following outline the supported conversion types and the associated values which can be passed to set these values
+ The following outlines the supported conversion types and the associated values which can be passed to set these values
Conversion Format | Accepted inputs |
---------------------------------------------------------------|------------------------------|
@@ -748,11 +843,10 @@ type | description |
[Pandas](https://pandas.pydata.org/docs/user_guide/index.html) | `"pd", "pandas", "Pandas"` |
[Python](https://docs.python.org/3/library/datatypes.html) | `"py", "python", "Python"` |
[PyArrow](https://arrow.apache.org/docs/python/index.html) | `"pa", "pyarrow", "PyArrow"` |
- [K](../api/pykx-q-data/type_conversions.md) | `"k", "q"` |
+ K | `"k", "q"` |
raw | `"raw"` |
default | `"default"` |
-
```q
// Default value on startup is "default"
q).pykx.util.defaultConv
@@ -790,7 +884,7 @@ type | description
```q
// Denote that a q object once passed to Python should be managed as a default object
-// in this case a q list is converted to numpy
+// in this case a q list is converted to numpy
q).pykx.todefault til 10
enlist[`..numpy;;][0 1 2 3 4 5 6 7 8 9]
@@ -982,6 +1076,50 @@ q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.topy til 10
```
+## `.pykx.toq0`
+
+
+_Convert an (un)wrapped `PyKX` foreign object into an analogous q type._
+
+```q
+.pykx.toq0[pythonObject;strAsChar]
+```
+
+**Parameters:**
+
+name | type | description |
+---------------|------------------------|-------------|
+`pythonObject` | foreign/composition | A foreign Python object or composition containing a Python foreign to be converted to q
+`strAsChar` | Optional[boolean] | A boolean indicating if when returned to q a Python `str` should be converted to a q string rather than the default symbol
+
+**Return:**
+
+type | description
+------|------------
+`any` | A q object converted from Python
+
+```q
+// Convert a wrapped PyKX foreign object to q
+q)show a:.pykx.eval["1+1"]
+{[f;x].pykx.util.pykx[f;x]}[foreign]enlist
+q).pykx.toq0 a
+2
+
+// Convert an unwrapped PyKX foreign object to q
+q)show b:a`.
+foreign
+q).pykx.toq0 b
+2
+```
+
+// Convert a Python string to q symbol or string
+
+q).pykx.toq0[.pykx.eval"\"test\""]
+`test
+
+q).pykx.toq0[.pykx.eval"\"test\"";1b]
+"test"
+
## `.pykx.toraw`
@@ -1175,7 +1313,7 @@ name | type | description
type | description
-------------|------------
-`projection` | A projection which when used with a wrapped callable Python
+`projection` | A projection which when used with a wrapped callable Python
**Example:**
@@ -1201,7 +1339,7 @@ pykwargs argDict
!!! Warning
- This function will be set in the root `.q` namespace
+ This function will be set in the root `.q` namespace
**Parameters:**
@@ -1310,3 +1448,102 @@ hello
q).pykx.wrap[.pykx.getattr[a;`y]]`
`hello
```
+
+
+
+## `.pykx.dash.available`
+
+
+_Function to denote if all Python libraries required for dashboards are available_
+
+## `.pykx.dash.runFunction`
+
+
+_Generate and execute a callable Python function using supplied arguments_
+
+```q
+.pykx.dash.runFunction[pycode;args]
+```
+**Parameters:**
+
+name | type | description |
+---------|----------|------------------------------------------------------------------------|
+`pycode` | `string` | The Python code this is to be executed for use as a function |
+`args` | `list` | A mixed/generic list of arguments to be used when calling the function |
+
+**Returns:**
+
+type | description |
+-------|------------------------------------------------------------------------|
+`list` | The list of argument names associated with the user specified function |
+
+**Example:**
+
+Single argument function usage:
+
+```q
+q).pykx.dash.runFunction["def func(x):\n\treturn x";enlist ([]5?1f;5?1f)]
+x x1
+-------------------
+0.9945242 0.6298664
+0.7930745 0.5638081
+0.2073435 0.3664924
+0.4677034 0.9240405
+0.4126605 0.5420167
+```
+
+Multiple argument function usage:
+
+```q
+q).pykx.dash.runFunction["def func(x, y):\n\treturn x*y";(2;5)]
+10
+```
+
+Function using Python dependencies:
+
+```q
+q).pykx.dash.runFunction["import numpy as np\n\ndef func(x):\n\treturn np.linspace(0, x.py(), 5)";enlist 10]
+0 2.5 5 7.5 10
+```
+
+**Parameters:**
+
+|Name|Type|Description|
+|---|---|---|
+|pyCode|||
+|args|||
+
+
+
+## `.pykx.dash.util.getFunction`
+
+
+_Functionality for the generation of a Python function to be called from code_
+
+```q
+.pykx.dash.util.getFunction[pycode]
+```
+**Parameters:**
+
+name | type | description |
+---------------|----------|--------------------------------------------------------------|
+`pycode` | `string` | The Python code this is to be executed for use as a function |
+
+**Returns:**
+
+type | description |
+--------------|-------------|
+`composition` | A wrapped foreign Python object associated with the specified code
+
+**Example:**
+
+```q
+q).pykx.dash.util.getFunction["def func(x):\n\treturn 1"]
+{[f;x].pykx.util.pykx[f;x]}[foreign]enlist
+```
+
+**Parameter:**
+
+|Name|Type|Description|
+|---|---|---|
+|pyCode|||
diff --git a/docs/pykx-under-q/intro.md b/docs/pykx-under-q/intro.md
index 2a6568d..e3e1e17 100644
--- a/docs/pykx-under-q/intro.md
+++ b/docs/pykx-under-q/intro.md
@@ -1,46 +1,56 @@
-# Using PyKX within a q session
+---
+title: PyKX within q
+description: How to use PyKX in a q session
+date: June 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, setup,
+---
+
+# How to use PyKX within q
+
+_This page provides details on how to run PyKX within a q session, including how to evaluate and execute Python code, how to interact with objects, and how to call a function._
+
+!!! tip "Tip: For the best experience, we recommend reading [Why upgrade from embedPy](../pykx-under-q/upgrade.md) first."
## Introduction
-As described in the majority of the documentation associated with PyKX, the principal intended usage of the library is as Python first interface to the programming language q and it's underlying database kdb+. However as described in the limitations section [here](../user-guide/advanced/limitations.md) not all use-cases can be satisfied with this modality. In particular software relying on the use of active subscriptions such as real-time analytic engines or any functionality reliant on timers in q cannot be run from Python directly without reimplementing this logic Pythonically.
+PyKX is a Python-first interface to the programming language q and its underlying database kdb+. To overcome a few [limitations](../help/issues.md), PyKX allows you to run Python within q, similarly to [embedPy](https://github.com/kxsystems/embedpy). The ability to execute and manipulate Python objects within a q session helps two types of users in the following ways:
-As such a modality is distributed with PyKX which allows Python functionality to be run from within a q session. This is achieved through the creation of a domain-specific language (DSL) which allows for the execution and manipulation of Python objects within a q session. Providing this functionality allows users proficient in kdb+/q to build applications which embed machine learning/data science libraries within production q infrastructures and allows users to use plotting libraries to visualise the outcomes of their analyses.
+ - kdb+/q users can build applications which embed machine learning/data science libraries in production q infrastructures.
+ - Users of Python plotting libraries can visualize and explore the outcomes of their analyses.
## Getting started
### Prerequisites
-To make use of PyKX running embedded within a q session a user must have the following set up
-
-1. The user has access to a running `q` environment, follow the q installation guide [here](https://code.kx.com/q/learn/install/) for more information.
-2. The user is permissioned to run PyKX with access to a license containing the feature flags `insights.lib.pykx` and `insights.lib.embedq` For more information see [here](../getting-started/installing.md).
+Before you run PyKX within q, make sure you:
-### Installation
+1. Have access to a running `#!python q` environment. [Follow [the q installation guide](https://code.kx.com/q/learn/install/).]
+2. Have [installed](../getting-started/installing.md) the licensed version of PyKX.
-To facilitate the execution of Python code within a q session a user must first install the PyKX library and the q script used to drive this embedded feature into their `$QHOME` location. This can be done as follows.
+### Install
-1. Install the PyKX library following the instructions [here](../getting-started/installing.md).
-2. Run the following command to install the `pykx.q` script:
+Run the following command to install the `#!python pykx.q` script into your `#!python $QHOME` directory:
- ```python
- python -c "import pykx;pykx.install_into_QHOME()"
- ```
+```python
+python -c "import pykx;pykx.install_into_QHOME()"
+```
- If you previously had `embedPy` installed pass:
+If you previously had `#!python embedPy` installed, pass:
- ```python
- python -c "import pykx;pykx.install_into_QHOME(overwrite_embedpy=True)"
- ```
+```python
+python -c "import pykx;pykx.install_into_QHOME(overwrite_embedpy=True)"
+```
- If you cannot edit files in `QHOME` you can copy the files to your local folder and load `pykx.q` from there:
+If you cannot edit the files in `#!python QHOME`, copy them to your local folder and load `#!python pykx.q` from there:
- ```bash
- python -c "import pykx;pykx.install_into_QHOME(to_local_folder=True)"
- ```
+```bash
+python -c "import pykx;pykx.install_into_QHOME(to_local_folder=True)"
+```
-### Initialization
+### Initialize
-Once installation has been completed a user should be in a position to initialise the library as follows
+Initialize the library as follows:
```q
q)\l pykx.q
@@ -56,116 +66,120 @@ import | {[f;x]r:wrap f x 0;$[count x:1_x;.[;x];]r}[code]enlist
..
```
-## Using the library
+## How to use the library
-Usage of the functionality provided by this library can range in complexity from the simple execution of Python code through to the generation of streaming applications containing machine learning models. The following documentation section outlines the use of this library under various use-case agnostic scenarios
+Use this library to complete a wide variety of tasks, from the simple execution of Python code through to the generation of streaming applications containing machine learning models. The next sections outline various use-case-agnostic scenarios that you can follow.
-### Evaluating and Executing Python code
+### Evaluate and Execute Python
-#### Executing Python code
-
-This interface allows a user to execute Python code a variety of ways:
-
-1. Executing directly using the `.pykx.pyexec` function
+??? "Differences between evaluation and execution"
- This is incredibly useful if there is a requirement to script execution of Python code within a library
+ Python evaluation (unlike Python execution) does not allow side effects. Any attempt at variable assignment or class definition signals an error. To execute a string with side effects, use `#!python .pykx.pyexec` or `#!python .p.e`.
- ```q
- q).pykx.pyexec"import numpy as np"
- q).pykx.pyexec"array = np.array([0, 1, 2, 3])"
- q).pykx.pyexec"print(array)"
- [0 1 2 3]
- ```
+ [Difference between eval and exec in Python](https://stackoverflow.com/questions/2220699/whats-the-difference-between-eval-exec-and-compile)
-2. Usage of the PyKX console functionality
+??? info "What’s a Python side effect?"
- This is useful when interating within a q session and needing to prototype some functionality in Python
+ A Python function has side effects if it might do more than return a value, for example, modify the state or interact with external entities/systems in a noticeable way. Such effects could manifest as changes to input arguments, modifications to global variables, file operations, or network communications.
- ```q
- q).pykx.console[]
- >>> import numpy as np
- >>> print(np.linspace(0, 10, 5))
- [ 0. 2.5 5. 7.5 10. ]
- >>> quit()
- q)
- ```
+#### Evaluate Python code
-3. Execution through use of a `p)` prompt
+To evaluate Python code with PyKX, pass a string of Python code to a variety of PyKX functions as shown below.
- Provided as a way to embed execution of Python code within a q script, additionally this provides backwards compatibility with PyKX.
+For example, if you want to evaluate and return the result to `#!python q`, use the function `#!python .pykx.qeval`:
- ```q
- q)p)import numpy as np
- q)p)print(np.arange(1, 10, 2))
- [1 3 5 7 9]
- ```
+```q
+q).pykx.qeval"1+2"
+3
+```
+Similarly, to evaluate Python code and return the result as a `#!python foreign` object denoting the underlying Python object, use:
-4. Loading of a `.p` file
+```q
+q)show a:.pykx.pyeval"1+2"
+foreign
+q)print a
+3
+```
+Finally, to return a hybrid representation that you can edit to return the q or Python representation, run the following:
- This is provided as a method of executing the contents of a Python file in bulk.
+```q
+q)show b:.pykx.eval"1+2"
+{[f;x].pykx.util.pykx[f;x]}[foreign]enlist
+q)b` // Convert to a q object
+3
+q)b`. // Convert to a Python foreign
+foreign
+```
- ```q
- $ cat test.p
- def func(x, y):
- return(x+y)
- $ q pykx.q
- q)\l test.p
- q).pykx.get[`func]
- {[f;x].pykx.util.pykx[f;x]}[foreign]enlist
- ```
+#### Execute Python code
-#### Evaluating Python code
+This interface allows you to execute Python code in a variety of ways:
-The evaluation of Python code can be completed using PyKX by passing a string of Python code to a variety of functions.
+a) Execute directly with the `#!python .pykx.pyexec` function
-??? "Differences between evaluation and execution"
+This is incredibly useful if you need to script execution of Python code within a library:
- Python evaluation (unlike Python execution) does not allow side effects. Any attempt at variable assignment or class definition will signal an error. To execute a string performing side effects, use `.pykx.pyexec` or `.p.e`.
+```q
+q).pykx.pyexec"import numpy as np"
+q).pykx.pyexec"array = np.array([0, 1, 2, 3])"
+q).pykx.pyexec"print(array)"
+[0 1 2 3]
+```
- [Difference between eval and exec in Python](https://stackoverflow.com/questions/2220699/whats-the-difference-between-eval-exec-and-compile)
+b) Use the PyKX console functionality
-To evaluate Python code and return the result to `q`, use the function `.pykx.qeval`.
+This is useful when interacting within a q session and you need to prototype a functionality in Python:
```q
-q).pykx.qeval"1+2"
-3
+q).pykx.console[]
+>>> import numpy as np
+>>> print(np.linspace(0, 10, 5))
+[ 0. 2.5 5. 7.5 10. ]
+>>> quit()
+q)
```
-Similarly to evaluate Python code and return the result as a `foreign` object denoting the underlying Python object
+c) Use a `#!python p)` prompt
+
+This way of embedding the execution of Python code within a q script also provides backwards compatibility with embedPy:
```q
-q)show a:.pykx.pyeval"1+2"
-foreign
-q)print a
-3
+q)p)import numpy as np
+q)p)print(np.arange(1, 10, 2))
+[1 3 5 7 9]
```
-Finally to return a hybrid representation which can be manipulated to return the q or Python representation you can run the following
+d) Load a `#!python .p` file
+
+This is a method of executing the contents of a Python file in bulk:
```q
-q)show b:.pykx.eval"1+2"
+$ cat test.p
+def func(x, y):
+ return(x+y)
+$ q pykx.q
+q)\l test.p
+q).pykx.get[`func]
{[f;x].pykx.util.pykx[f;x]}[foreign]enlist
-q)b` // Convert to a q object
-3
-q)b`. // Convert to a Python foreign
-foreign
```
-## Interacting with PyKX objects
+### Interact with PyKX objects
-### Foreign objects
+#### Foreign objects
At the lowest level, Python objects are represented in q as foreign objects, which contain pointers to objects in the Python memory space.
-Foreign objects can be stored in variables just like any other q datatype, or as part of lists, dictionaries or tables. They will display as foreign when inspected in the q console or using the string (or .Q.s) representation.
+You can store foreign objects in variables just like any other q datatype, or as part of lists, dictionaries or tables. They will show up as foreign when inspected in the q console or using the string (or .Q.s) representation.
-**Serialization:** Kdb+ cannot serialize foreign objects, nor send them over IPC: they live in the embedded Python memory space. To pass these objects over IPC, first convert them to q.
+??? "Serialization and IPC"
-### PyKX objects
+ Kdb+ cannot serialize foreign objects, nor send them over IPC. Foreign objects live in the embedded Python memory space. To pass them over IPC, first you have to convert them to q.
-Foreign objects cannot be directly operated on in q. Instead, Python objects are typically represented as PyKX objects, which wrap the underlying foreign objects. This provides the ability to get and set attributes, index, call or convert the underlying foreign object to a q object.
+#### Create PyKX objects
-Use `.pykx.wrap` to create a PyKX object from a foreign object.
+q doesn't allow you to operate directly with foreign objects. Instead, Python objects are represented as PyKX objects, which wrap the underlying foreign objects. This helps to get and set attributes, index, call or convert the underlying foreign object to a q object.
+
+Use `#!python .pykx.wrap` to create a PyKX object from a foreign object.
```q
q)x
@@ -175,19 +189,22 @@ q)p /how a PyKX object looks
{[f;x].pykx.util.pykx[f;x]}[foreign]enlist
```
-More commonly, PyKX objects are retrieved directly from Python using one of the following functions:
+To retrieve PyKX objects directly from Python, choose between the following functions:
-function | argument | example
+**Function** | **Argument** | **Example**
---------------|--------------------------------------------------|-----------------------
`.pykx.import` | symbol: name of a Python module or package, optional second argument is the name of an object within the module or package | ``np:.pykx.import`numpy``
`.pykx.get` | symbol: name of a Python variable in `__main__` | ``v:.pykx.get`varName``
`.pykx.eval` | string: Python code to evaluate | `x:.pykx.eval"1+1"`
-**Side effects:** As with other Python evaluation functions and noted previously, `.pykx.eval` does not permit side effects.
-### Converting data
+!!! warning "Side effects"
+
+ As with other Python evaluation functions, `#!python .pykx.eval` does not allow side effects.
-Given `obj`, a PyKX object representing Python data, we can get the underlying data (as foreign or q) using
+#### Convert data
+
+For `#!python obj`, a PyKX object representing Python data, to obtain the underlying data (as foreign object or q) use:
```q
obj`. / get data as foreign
@@ -206,28 +223,30 @@ q)x`
1 2 3
```
-### `None` and identity
+#### `#!python None` and identity
+
+Python `#!python None` maps to the q identity function `#!python ::` when converting from Python to q (and vice versa).
-Python `None` maps to the q identity function `::` when converting from Python to q (and vice versa).
+!!! warning "Exception!"
-There is one important exception to this. When calling Python functions, methods or classes with a single q data argument, passing `::` will result in the Python object being called with _no_ arguments, rather than a single argument of `None`. See the section below on _Zero-argument calls_ for how to explicitly call a Python callable with a single `None` argument.
+ When calling Python functions, methods or classes with a single q data argument, passing `::` results in the Python object being called with _no arguments_, rather than a single argument of `None`. See the [Zero-argument calls](#zero-argument-calls) section for how to call a Python object with a single `None` argument.
-### Getting attributes and properties
+#### Get attributes and properties
-Given `obj`, a PyKX object representing a Python object, we can get an attribute or property directly using
+Given `#!python obj`, a PyKX object representing a Python object, you can get an attribute or property by using:
```q
obj`:attr / equivalent to obj.attr in Python
obj`:attr1.attr2 / equivalent to obj.attr1.attr2 in Python
```
-These expressions return PyKX objects, allowing users to chain operations together.
+These expressions return PyKX objects, allowing you to chain operations together:
```q
obj[`:attr1]`:attr2 / equivalent to obj.attr1.attr2 in Python
```
-e.g.
+For example:
```bash
$ cat class.p
@@ -246,15 +265,15 @@ q)obj[`:y]`
3
```
-### Setting attributes and properties
+#### Set attributes and properties
-Given `obj`, a PyKX object representing a Python object, we can set an attribute or property directly using
+Given `#!python obj`, a PyKX object representing a Python object, you can set an attribute or property by using:
```q
obj[:;`:attr;val] / equivalent to obj.attr=val in Python
```
-e.g.
+For example:
```q
q)obj[`:x]`
@@ -269,21 +288,21 @@ q)obj[`:y]`
20
```
-### Indexing
+#### How to index
-Given `lst`, a PyKX object representing an indexable container object in Python, we can access the element at index `i` using
+Given `#!python lst`, a PyKX object representing an indexable container object in Python, you can access the element at index `#!python i` by using:
```q
lst[@;i] / equivalent to lst[i] in Python
```
-We can set the element at index `i` (to object `x`) using
+Set the element at index `#!python i` (to object `#!pythonx`) with this command:
```q
lst[=;i;x] / equivalent to lst[i]=x in Python
```
-These expressions return PyKX objects, e.g.
+These expressions return PyKX objects, for instance:
```q
q)lst:.pykx.eval"[True,2,3.0,'four']"
@@ -305,17 +324,15 @@ q)lst`
`last
```
-### Getting methods
+#### Get methods
-Given `obj`, a PyKX object representing a Python object, we can access a method directly using
+Given `#!python obj`, a PyKX object representing a Python object, you can access a method by using:
```q
obj`:method / equivalent to obj.method in Python
```
-Presently the calling of PyKX objects representing Python methods is only supported in such a manner that the return of evaluation is a PyKX object.
-
-For example
+When calling PyKX objects representing Python methods, the return of evaluation is a PyKX object. For example:
```q
q)np:.pykx.import`numpy
@@ -328,13 +345,16 @@ q)arange[12]`
0 1 2 3 4 5 6 7 8 9 10 11
```
-### PyKX function API
+#### PyKX function API
-Using the function API, PyKX objects can be called directly (returning PyKX objects) or declared callable returning q or `foreign` data.
+Use the function API to achieve the following:
-Users explicitly specify the return type as q or foreign, the default is as a PyKX object.
+- Call PyKX objects (to get PyKX objects).
+- Declare PyKX objects callable (to get q or `#!python foreign` data).
-Given `func`, a `PyKX` object representing a callable Python function or method, we can carry out the following operations:
+The default return is a PyKX object. For q or foreign return type, you need to specify it.
+
+Given `#!python func`, a `#!python PyKX` object representing a callable Python function or method, you can carry out the following operations:
```q
func / func is callable by default (returning PyKX)
@@ -347,193 +367,195 @@ func[>]arg / call func(arg) (returning foreign)
func[>;arg] / equivalent
```
-**Chaining operations** Returning another PyKX object from a function or method call, allows users to chain together sequences of operations. We can also chain these operations together with calls to `.pykx.import`, `.pykx.get` and `.pykx.eval`.
-
-
-### PyKX examples
-
-Some examples
+!!! info "How to chain operations?"
+
+ To chain together sequences of operations, return another PyKX object from a function or method call. Alternatively, call `.pykx.import`, `.pykx.get` and `.pykx.eval`.
-```bash
-$ cat test.p # used for tests
-class obj:
- def __init__(self,x=0,y=0):
- self.x = x # attribute
- self.y = y # property (incrementing on get)
- @property
- def y(self):
- a=self.__y
- self.__y+=1
- return a
- @y.setter
- def y(self, y):
- self.__y = y
- def total(self):
- return self.x + self.y
-```
-```q
-q)\l test.p
-q)obj:.pykx.get`obj / obj is the *class* not an instance of the class
-q)o:obj[] / call obj with no arguments to get an instance
-q)o[`:x]`
-0
-q)o[;`]each 5#`:x
-0 0 0 0 0
-q)o[:;`:x;10]
-q)o[`:x]`
-10
-q)o[`:y]`
-1
-q)o[;`]each 5#`:y
-3 5 7 9 11
-q)o[:;`:y;10]
-q)o[;`]each 5#`:y
-10 13 15 17 19
-q)tot:o[`:total;<]
-q)tot[]
-30
-q)tot[]
-31
-```
+#### PyKX examples
-```q
-q)np:.pykx.import`numpy
-q)v:np[`:arange;12]
-q)v`
-0 1 2 3 4 5 6 7 8 9 10 11
-q)v[`:mean;<][]
-5.5
-q)rs:v[`:reshape;<]
-q)rs[3;4]
-0 1 2 3
-4 5 6 7
-8 9 10 11
-q)rs[2;6]
-0 1 2 3 4 5
-6 7 8 9 10 11
-q)np[`:arange;12][`:reshape;3;4]`
-0 1 2 3
-4 5 6 7
-8 9 10 11
-```
-
-```q
-q)stdout:.pykx.import[`sys]`:stdout.write
-q)stdout `$"hello\n";
-hello
-q)stderr:.pykx.import[`sys;`:stderr.write]
-q)stderr `$"goodbye\n";
-goodbye
-```
-
-```q
-q)oarg:.pykx.eval"10"
-q)oarg`
-10
-q)ofunc:.pykx.eval["lambda x:2+x";<]
-q)ofunc[1]
-3
-q)ofunc oarg
-12
-q)p)def add2(x,y):return x+y
-q)add2:.pykx.get[`add2;<]
-q)add2[1;oarg]
-11
-```
+=== "Example #1"
-### Function argument types
+ ```bash
+ $ cat test.p # used for tests
+ class obj:
+ def __init__(self,x=0,y=0):
+ self.x = x # attribute
+ self.y = y # property (incrementing on get)
+ @property
+ def y(self):
+ a=self.__y
+ self.__y+=1
+ return a
+ @y.setter
+ def y(self, y):
+ self.__y = y
+ def total(self):
+ return self.x + self.y
+ ```
-One of the distinct differences that PyKX has over the previous incarnation of embedded interfacing with Python in q PyKX is support for a much wider variety of data type conversions between q and Python.
+ ```q
+ q)\l test.p
+ q)obj:.pykx.get`obj / obj is the *class* not an instance of the class
+ q)o:obj[] / call obj with no arguments to get an instance
+ q)o[`:x]`
+ 0
+ q)o[;`]each 5#`:x
+ 0 0 0 0 0
+ q)o[:;`:x;10]
+ q)o[`:x]`
+ 10
+ q)o[`:y]`
+ 1
+ q)o[;`]each 5#`:y
+ 3 5 7 9 11
+ q)o[:;`:y;10]
+ q)o[;`]each 5#`:y
+ 10 13 15 17 19
+ q)tot:o[`:total;<]
+ q)tot[]
+ 30
+ q)tot[]
+ 31
+ ```
+=== "Example #2"
+
+ ```q
+ q)np:.pykx.import`numpy
+ q)v:np[`:arange;12]
+ q)v`
+ 0 1 2 3 4 5 6 7 8 9 10 11
+ q)v[`:mean;<][]
+ 5.5
+ q)rs:v[`:reshape;<]
+ q)rs[3;4]
+ 0 1 2 3
+ 4 5 6 7
+ 8 9 10 11
+ q)rs[2;6]
+ 0 1 2 3 4 5
+ 6 7 8 9 10 11
+ q)np[`:arange;12][`:reshape;3;4]`
+ 0 1 2 3
+ 4 5 6 7
+ 8 9 10 11
+ ```
+=== "Example #3"
+
+ ```q
+ q)stdout:.pykx.import[`sys]`:stdout.write
+ q)stdout `$"hello\n";
+ hello
+ q)stderr:.pykx.import[`sys;`:stderr.write]
+ q)stderr `$"goodbye\n";
+ goodbye
+ ```
+=== "Example #4"
+
+ ```q
+ q)oarg:.pykx.eval"10"
+ q)oarg`
+ 10
+ q)ofunc:.pykx.eval["lambda x:2+x";<]
+ q)ofunc[1]
+ 3
+ q)ofunc oarg
+ 12
+ q)p)def add2(x,y):return x+y
+ q)add2:.pykx.get[`add2;<]
+ q)add2[1;oarg]
+ 11
+ ```
-In particular the following types are supported:
+#### Function argument types
-1. Python native objects
-2. Numpy objects
-3. Pandas objects
-4. PyArrow objects
-5. PyKX objects
+PyKX supports data type conversions between q and Python for Python native objects, Numpy objects, Pandas objects, PyArrow objects, and PyKX objects.
-By default when passing a q object to a callable function it will be converted to the most "natural" analogous types. This is controlled through the setting of `.pykx.util.defaultConv`
+By default, when passing a q object to a callable function, it's converted to the most "natural" analogous type, as detailed below:
-- PyKX/q generic list objects will be converted to Python lists
-- PyKX/q table/keyed table objects will be converted to Pandas equivalent DataFrames
-- All other PyKX/q objects will be converted to their analogous PyKX/q types
+- PyKX/q generic list objects become Python lists.
+- PyKX/q table/keyed table objects become Pandas equivalent DataFrames.
+- All other PyKX/q objects become their analogous numpy equivalent types.
!!! Warning
- Prior to PyKX 2.1.0 all conversions from q objects to Python would convert to their Numpy equivalent. This behaviour raised a number of issues with migration for users previously operating with embedPy and as such has been migrated to the behaviour described above. If you require the same behaviour as that prior to 2.1.0 please set the environment variable `PYKX_DEFAULT_CONVERSION="np"`
-
-For example:
-
-```q
-q)typeFunc:.pykx.eval"lambda x:print(type(x))"
-q)typeFunc 1;
-
-q)typeFunc til 10;
-
-q)typeFunc (10?1f;10?1f)
-
-q)typeFunc ([]100?1f;100?1f);
-
-```
+ Prior to PyKX 2.1.0, all conversions from q objects to Python would convert to their Numpy equivalent. To achieve this now, set the environment variable `PYKX_DEFAULT_CONVERSION="np"`
-The default behavior of the conversions which are undertaken when making function/method calls is controlled through the definition of `.pykx.util.defaultConv`
+For function/method calls, control the default behavior of the conversions by setting `#!python .pykx.util.defaultConv`:
```q
q).pykx.util.defaultConv
"default"
```
+You can apply one of the following values:
-This can have one of the following values:
+|**Python type**|Default|Python|Numpy|Pandas|PyArrow|PyKX|
+|---------------|-------|------|-----|------|-------|----|
+|**Value**: |"default"|"py"|"np"|"pd"|"pa"|"k"|
-| Python type | Value |
-|-------------|-----------|
-| Default | "default" |
-| Python | "py" |
-| Numpy | "np" |
-| Pandas | "pd" |
-| PyArrow | "pa" |
-| PyKX | "k" |
-Taking the examples above for Numpy we can update the default types across all function calls
+In the example below, we start with Numpy and update the default types across all function calls:
-```q
-q)typeFunc:.pykx.eval"lambda x:print(type(x))"
-q).pykx.util.defaultConv:"py"
-q)typeFunc 1;
-
-q)typeFunc til 10;
-
-q)typeFunc ([]100?1f;100?1f);
-
-
-q).pykx.util.defaultConv:"pd"
-q)typeFunc 1;
-
-q)typeFunc til 10;
-
-q)typeFunc ([]100?1f;100?1f);
-
-
-q).pykx.util.defaultConv:"pa"
-q)typeFunc 1;
-
-q)typeFunc til 10;
-
-q)typeFunc ([]100?1f;100?1f);
-
+=== "Numpy"
-q).pykx.util.defaultConv:"k"
-q)typeFunc 1;
-
-q)typeFunc til 10;
-
-q)typeFunc ([]100?1f;100?1f);
-
-```
+ ```q
+ q)typeFunc:.pykx.eval"lambda x:print(type(x))"
+ q)typeFunc 1;
+
+ q)typeFunc til 10;
+
+ q)typeFunc (10?1f;10?1f)
+
+ q)typeFunc ([]100?1f;100?1f);
+
+ ```
+=== "Python"
+
+ ```q
+ q)typeFunc:.pykx.eval"lambda x:print(type(x))"
+ q).pykx.util.defaultConv:"py"
+ q)typeFunc 1;
+
+ q)typeFunc til 10;
+
+ q)typeFunc ([]100?1f;100?1f);
+
+ ```
+=== "Pandas"
+
+ ```q
+ q).pykx.util.defaultConv:"pd"
+ q)typeFunc 1;
+
+ q)typeFunc til 10;
+
+ q)typeFunc ([]100?1f;100?1f);
+
+ ```
+=== "PyArrow"
+
+ ```q
+ q).pykx.util.defaultConv:"pa"
+ q)typeFunc 1;
+
+ q)typeFunc til 10;
+
+ q)typeFunc ([]100?1f;100?1f);
+
+ ```
+=== "PyKX"
+
+ ```q
+ q).pykx.util.defaultConv:"k"
+ q)typeFunc 1;
+
+ q)typeFunc til 10;
+
+ q)typeFunc ([]100?1f;100?1f);
+
+ ```
-Alternatively individual arguments to functions can be modified using the `.pykx.to*` functionality, for example in the following:
+Alternatively, to modify individual arguments to functions, use the `#!python .pykx.to*` functionality:
```q
q)typeFunc:.pykx.eval"lambda x,y: [print(type(x)), print(type(y))]"
@@ -551,9 +573,9 @@ q)typeFunc[.pykx.tok til 10;.pykx.tok ([]100?1f)]; // Pass in two PyKX objects
```
-### Setting Python variables
+#### Set Python variables
-Variables can be set in Python `__main__` using `.pykx.set`
+You can set variables in Python `#!python __main__` by using `#!python .pykx.set`:
```q
q).pykx.set[`var1;42]
@@ -566,28 +588,28 @@ q)qfunc[3]
6
```
-## Function calls
-
+### Function calls
-Python allows for calling functions with
+Python allows you to call functions with:
- A variable number of arguments
- A mixture of positional and keyword arguments
- Implicit (default) arguments
-All of these features are available through the PyKX function-call interface.
-Specifically:
+This is available in the PyKX function-call interface, as detailed below:
-- Callable PyKX objects are variadic
-- Default arguments are applied where no explicit arguments are given
-- Individual keyword arguments are specified using the (infix) `pykw` operator
-- A list of positional arguments can be passed using `pyarglist` (like Python \*args)
-- A dictionary of keyword arguments can be passed using `pykwargs` (like Python \*\*kwargs)
+- Callable PyKX objects are variadic (they accept a variable number of arguments).
+- Default arguments are applied where no explicit arguments are given.
+- Individual keyword arguments are specified using the (infix) `#!python pykw` operator.
+- A list of positional arguments can be passed using `#!python pyarglist` (like Python \*args).
+- A dictionary of keyword arguments can be passed using `#!python pykwargs` (like Python \*\*kwargs).
-**Keyword arguments last** We can combine positional arguments, lists of positional arguments, keyword arguments and a dictionary of keyword arguments. However, _all_ keyword arguments must always follow _any_ positional arguments. The dictionary of keyword arguments (if given) must be specified last.
+!!! info "Keyword arguments last"
+
+ You can combine positional arguments, lists of positional arguments, keyword arguments, and a dictionary of keyword arguments. However, _all_ keyword arguments must always follow _any_ positional arguments. The dictionary of keyword arguments (if given) must be specified _last_.
-### Example function calls
+#### Examples
```q
q)p)import numpy as np
@@ -595,8 +617,7 @@ q)p)def func(a=1,b=2,c=3,d=4):return np.array([a,b,c,d,a*b*c*d])
q)qfunc:.pykx.get[`func;<] / callable, returning q
```
-Positional arguments are entered directly.
-Function calling is variadic, so later arguments can be excluded.
+Enter positional arguments directly. Function calling is variadic, so you can exclude later arguments:
```q
q)qfunc[2;2;2;2] / all positional args specified
@@ -611,8 +632,7 @@ q)qfunc[2;2;2;2;2] / error if too many args specified
^
```
-Individual keyword arguments can be specified using the `pykw` operator (applied infix).
-Any keyword arguments must follow positional arguments, but the order of keyword arguments does not matter.
+Specify individual keyword arguments with the `#!python pykw` operator (applied infix). The order of keyword arguments doesn't matter.
```q
q)qfunc[`d pykw 1;`c pykw 2;`b pykw 3;`a pykw 4] / all keyword args specified
@@ -629,7 +649,7 @@ q)qfunc[`a pykw 2;`a pykw 2] / error if duplicate keyword args
^
```
-A list of positional arguments can be specified using `pyarglist` (similar to Python’s \*args).
+To specify a list of positional arguments, use `#!python pyarglist` (similar to Python’s \*args).
Again, keyword arguments must follow positional arguments.
```q
@@ -651,9 +671,8 @@ q)qfunc[`a pykw 1;pyarglist 2 2 2] / error if positional list after keyword arg
^
```
-
-A dictionary of keyword arguments can be specified using `pykwargs` (similar to Python’s \*\*kwargs).
-If present, this argument must be the _last_ argument specified.
+You can specify a dictionary of keyword arguments by using `#!python pykwargs` (similar to Python’s \*\*kwargs).
+If present, this argument must be the _last_ argument.
```q
q)qfunc[pykwargs`d`c`b`a!1 2 3 4] / full keyword dict specified
@@ -668,7 +687,7 @@ q)qfunc[pykwargs`a`a!1 2] / error if duplicate keyword names
'dupnames
```
-All 4 methods can be combined in a single function call, as long as the order follows the above rules.
+You can combine all four methods in a single function call if the order follows the above rules.
```q
q)qfunc[4;pyarglist enlist 3;`c pykw 2;pykwargs enlist[`d]!enlist 1]
@@ -677,10 +696,9 @@ q)qfunc[4;pyarglist enlist 3;`c pykw 2;pykwargs enlist[`d]!enlist 1]
!!! warning "`pykw`, `pykwargs`, and `pyarglist`"
- Before defining functions containing `pykw`, `pykwargs`, or `pyarglist` within a script, the file `p.q` must be loaded explicitly.
- Failure to do so will result in errors `'pykw`, `'pykwargs`, or `'pyarglist`.
+ Before defining functions containing `pykw`, `pykwargs`, or `pyarglist` within a script, you must explicitly load the file `p.q`. Failure to do so results in errors.
-### Zero-argument calls
+#### Zero-argument calls
In Python these two calls are _not_ equivalent:
@@ -693,7 +711,7 @@ func(None) #call with argument None
Although `::` in q corresponds to `None` in Python, if a PyKX function is called with `::` as its only argument, the corresponding Python function will be called with _no_ arguments.
-To call a Python function with `None` as its sole argument, retrieve `None` as a foreign object in q and pass that as the argument.
+To call a Python function with `#!python None` as its sole argument, retrieve `#!python None` as a foreign object in q and pass that as the argument:
```q
q)pynone:.pykx.eval"None"
@@ -702,12 +720,12 @@ q)pyfunc pynone;
None
```
-Python | form | q
+**Python** | **Form** | **q**
---------------|---------------------------|-----------------------
`func()` | call with no arguments | `func[]` or `func[::]`
`func(None)` | call with argument `None` | `func[.pykx.eval"None"]`
-!!! info "Q functions applied to empty argument lists"
+!!! info "q functions applied to empty argument lists"
The _rank_ (number of arguments) of a q function is determined by its _signature_,
an optional list of arguments at the beginning of its definition.
@@ -720,10 +738,9 @@ Python | form | q
So `func[::]` is equivalent to `func[]` – and in Python to `func()`, not `func[None]`.
-### Printing or returning object representation
-
+#### Print or return
-`.pykx.repr` returns the string representation of a Python object, either PyKX or foreign. This representation can be printed to stdout using `.pykx.print`. The usage of this function with a q object
+`#!python .pykx.repr` returns the string representation of a Python object, either PyKX or foreign. You can print this representation to `#!python stdout` by using `#!python .pykx.print`. Here's how to use this function with a q object:
```q
q)x:.pykx.eval"{'a':1,'b':2}"
@@ -743,11 +760,7 @@ x x1
0.2296615 0.1959907
0.6919531 0.375638
```
+## Next steps
-### Aliases in the root
-
-
-For convenience, `pykx.q` defines `print` in the default namespace of q, as aliases for `.pykx.print`. To prevent the aliasing of this function please set either:
-
-1. `UNSET_PYKX_GLOBALS` as an environment variable.
-2. `unsetPyKXGlobals` as a command line argument when initialising your q session.
+- Use the [pykx.q Library Reference Card](../pykx-under-q/api.md).
+- [Upgrade from embedPy](../pykx-under-q/upgrade.md).
diff --git a/docs/pykx-under-q/upgrade.md b/docs/pykx-under-q/upgrade.md
index f451454..3d0d0c7 100644
--- a/docs/pykx-under-q/upgrade.md
+++ b/docs/pykx-under-q/upgrade.md
@@ -1,12 +1,22 @@
-# Differences and upgrade considerations from embedPy
+---
+title: Upgrade from embedPy
+description: How to upgrade from embedPy to PyKX within q
+date: June 2024
+author: KX Systems, Inc.,
+tags: embedPy, PyKX, q,
+---
-As outlined [here](intro.md) PyKX provides users with the ability to execute Python code within a q session similar to [embedPy](https://github.com/kxsystems/embedpy). This document outlines points of consideration when upgrading from embedPy to PyKX under q both with respect to the function mappings between the two interfaces and differences in their behavior.
+# Why upgrade from embedPy
+
+_This page outlines differences and function mappings when upgrading from embedPy to PyKX in a q session._
+
+Just like [PyKX](../getting-started/what_is_pykx.md), [embedPy](https://github.com/kxsystems/embedpy) is a tool that allows to execute Python code and call Python functions.
## Functional differences
### q symbol and string support
-EmbedPy does not allow users to discern between q string and symbol types when converting to Python. In both cases these are converted to `str` objects in Python. As a result round trip conversions are not supported in embedPy for symbols, PyKX does support such round trip operations:
+EmbedPy doesn't allow users to discern between q `#!python string` and `#!python symbol` types when converting to Python. In both cases, these are converted to `#!python str` objects in Python. As a result, embedPy doesn't support round-trip conversions for symbols, but PyKX does:
=== "embedPy"
@@ -30,17 +40,17 @@ EmbedPy does not allow users to discern between q string and symbol types when c
1b
```
-## Functionality mapping
+### Functionality mapping
-The following table describes the function mapping from PyKX to embedPy for various elements of the supported functionality within embedPy, where a mapping supported this will be explicitly noted. Where workarounds exist these are additionally noted.
+The following table describes function mapping from PyKX to embedPy:
| Description | PyKX | embedPy |
|-----------------------------------------------------------------------|---------------------------------|-----------------|
-| Library loading | `\l pykx.q` | `\l p.q` |
-| Importing Python Libraries as wrapped Python objects | `.pykx.import` | `.p.import` |
-| Setting objects in Python Memory | `.pykx.set` | `.p.set` |
-| Retrieving Python objects from Memory | `.pykx.get` | `.p.get` |
-| Converting Python objects to q | `.pykx.toq` | `.p.py2q` |
+| Load library | `\l pykx.q` | `\l p.q` |
+| Import Python Libraries as wrapped Python objects | `.pykx.import` | `.p.import` |
+| Set objects in Python Memory | `.pykx.set` | `.p.set` |
+| Retrieve Python objects from Memory | `.pykx.get` | `.p.get` |
+| Convert Python objects to q | `.pykx.toq` | `.p.py2q` |
| Execute Python code returning as intermediary q/Python object | `.pykx.eval` | `.p.eval` |
| Execute Python code returning a q object | `.pykx.qeval` | `.p.qeval` |
| Execute Python code returning a Python foreign object | `.pykx.pyeval` | `.p.eval` |
@@ -54,42 +64,42 @@ The following table describes the function mapping from PyKX to embedPy for vari
| Generate a callable Python function returning a Python foreign object | `.pykx.pycallable` | `.p.pycallable` |
| Generate a callable Python function returning a q result | `.pykx.qcallable` | `.p.qcallable` |
| Interactive Python help string | Unsupported | `.p.help` |
-| Retrieval of Python help string as a q string | Unsupported | `.p.helpstr` |
+| Retrieve Python help string as a q string | Unsupported | `.p.helpstr` |
| Convert a q object to a Python foreign object | Unsupported | `.p.q2py` |
| Create a Python closure using a q function | Unsupported | `.p.closure` |
| Create a Python generator using a q function | Unsupported | `.p.generator` |
## PyKX under q benefits over embedPy
-PyKX under q provides a number of key functional benefits over embedPy alone when considering the generation of workloads that integrate Python and q code. The following are the key functional/feature updates which provide differentiation between the two libraries
+When generating workloads that integrate Python and q code, PyKX under q provides a few key functional benefits over embedPy alone:
-1. Flexibility in supported data formats and conversions
-2. Python code interoperability
-3. Access to PyKX in it's Python first modality
+1. [Flexibility in supported data formats and conversions](#1-flexibility-in-supported-data-formats-and-conversions)
+2. [Python code interoperability](#2-python-interoperability)
+3. [Access to PyKX as a Python module](#3-access-to-pykx-as-a-python-module)
-### Flexibility in supported data formats and conversions
+### 1. Flexibility in supported data formats and conversions
-EmbedPy contains a fundamental limitation with respect to the data formats that are supported when converting between q and Python. Namely that all q objects when passed to Python functions use the analogous Python/NumPy representation. This limitation means that a user of embedPy who require data to be in a Pandas/PyArrow format need to handle these conversions manually.
+When using EmbedPy to convert data between q and Python, there’s a fundamental limitation related to supported data formats. Specifically, when passed to Python functions, q objects use the analogous Python/NumPy representation. This means that if an embedPy user requires data in a Pandas/PyArrow format, they need to convert it manually.
-As PyKX supports Python, NumPy, Pandas and PyArrow data formats this improves the flexibility of workflows that can be supported, for example PyKX will by default convert q tables to Pandas DataFrames when passed to a Python function as follows
+As PyKX supports Python, NumPy, Pandas, and PyArrow data formats, it improves the workflow coverage and flexibility. For instance, PyKX by default converts q tables to Pandas DataFrames when passed to a Python function as follows:
```q
q).pykx.eval["lambda x:type(x)"] ([]10?1f;10?1f)
```
-Additional to this a number of helper functions are provided to allow users to selectively choose the target data formats which are used when passing to multivariable functions, for example
+Additionally, PyKX provides helper functions, allowing you to choose the target data formats used when passing to multivariable functions. For example:
```q
q).pykx.eval["lambda x, y:print(type(x), type(y))"][.pykx.tonp ([]10?1f);.pykx.topd til 10];
```
-This flexibility makes integration with custom libraries easier to manage.
+This flexibility makes integration with custom libraries significantly easier to manage.
-### Python interoperability
+### 2. Python interoperability
-For users that are working to integrate tightly their Python code and q code prototyping Python functions for use within embedPy could be difficult. Users are required when defining their functions either to provide them as a string with appropriate tab/indent usage to a `.p.e` as follows
+If you wish to integrate Python and q code, prototyping Python functions for use within embedPy could be difficult. When defining your functions, you need to either provide them as a string with appropriate tab/indent usage to a `#!python .p.e` as follows:
```q
q).p.e"def func(x):\n\treturn x+1"
@@ -98,11 +108,11 @@ q)pyfunc[2]
3
```
-Alternatively users could create a `.py`/`.p` file and access their functions using ```.pykx.import[`file_name]``` or `\l file_name.p` respectively.
+Alternatively, you could create a `#!python .py`/`#!python .p` file and access your functions using ```#!python .pykx.import[`file_name]``` or `#!python \l file_name.p` respectively.
-While these solutions provide provide a method of integrating your Python code they are not intuitive to a user versed both in Python and q.
+Both solutions are not intuitive to users versed both in Python and q.
-PyKX provides a function `.pykx.console` which allows users within a q session to run a Python "console" to generate their functions/variables for use within their q code. The following example uses PyKX 2.3.0.
+That's why PyKX provides a Python `#!python .pykx.console` function that you can run within a q session to generate your functions/variables. The following example uses PyKX 2.3.0:
```q
q).pykx.console[]
@@ -115,13 +125,13 @@ q)pyfunc[2]
3
```
-This change allows users to iterate development of their analytics faster than when operating with embedPy.
+This function allows you to iterate your analytics development faster than when operating with embedPy.
-### Access to PyKX in it's Python first modality
+### 3. Access to PyKX as a Python module
-Following on from the Python interoperability section above access to PyKX itself as a Python module provides significant flexibility to users when developing analytics for use within a q session.
+Access to PyKX in its Python-first mode adds more flexibility to users who develop analytics to use within q.
-With embedPy when q/kdb+ data is passed to Python for the purposes of completing "Python first" analysis there is a requirement that that analysis fully uses Python libraries that are available to a user and can not get performance benefits from having access to q/kdb+.
+With embedPy, when you pass q/kdb+ data to Python to complete a "Python-first" analysis, you're restricted to your Python libraries and can't get performance benefits from having access to q/kdb+.
Take for example a case where a user wishes to run a Python function which queries a table available in their q process using SQL and calculates the mean value for all numeric columns.
@@ -138,3 +148,7 @@ q)pyfunc `a
x1| 0.5592623
x2| 0.486176
```
+## Next steps
+
+- Learn[How to use PyKX within q](../pykx-under-q/intro.md).
+- Use the [pykx.q Library Reference Card](../pykx-under-q/api.md).
\ No newline at end of file
diff --git a/docs/release-notes/changelog.md b/docs/release-notes/changelog.md
index ad3894c..47f3f70 100644
--- a/docs/release-notes/changelog.md
+++ b/docs/release-notes/changelog.md
@@ -4,9 +4,1130 @@
The changelog presented here outlines changes to PyKX when operating within a Python environment specifically, if you require changelogs associated with PyKX operating under a q environment see [here](./underq-changelog.md).
-!!! Warning
+## PyKX 3.0.0
+
+#### Release Date
+
+2024-11-12
+
+!!! Note
+
+ PyKX 3.0.0 is currently not available for Mac x86/ARM for all Python versions. Updated builds will be provided once available. To install PyKX 3.0.0 on Mac please install from source [here](https://github.com/kxsystems/pykx).
+
+### Additions
+
+- Addition of functionality to allow for development of end-to-end streaming workflows consisting of data-ingestion, persistence and query. This functionality is outlined in-depth [here](../user-guide/advanced/streaming/index.md) with a worked example [here](../examples/streaming/index.md).
+
+- PyKX tabular objects now have access to `select`, `exec`, `update` and `delete` methods to allow queries to be performed directly against the objects. This provides a significant ease-of-use improvement while maintaining access to the previous behavior.
+
+ ```python
+ >>> import pykx as kx
+ >>> N = 1000
+ >>> table = kx.Table(data={
+ ... 'time': kx.TimespanAtom('now'),
+ ... 'sym': kx.random.random(N, ['AAA', 'BBB', 'CCC']),
+ ... 'price': kx.random.random(N, 10.0)})
+ >>> table.select(where = kx.Column('price') > 5)
+ >>> table.exec(kx.Column('sym'))
+ >>> table.delete(where = kx.Column('sym') == 'AAA')
+ >>> table.update(2*kx.Column('price'))
+ ```
+
+- Update to the PyKX Query API in licensed mode to support a significantly more Python first approach to querying kdb+ in-memory and on-disk databases. The following are a number of basic examples, see [here](../user-guide/fundamentals/query/pyquery.md) for more details.
+
+ ```python
+ >>> table = kx.Table(data={
+ ... 'sym': kx.random.random(100, ['AAPL', 'GOOG', 'MSFT']),
+ ... 'date': kx.random.random(100, kx.q('2022.01.01') + [0,1,2]),
+ ... 'price': kx.random.random(100, 1000.0),
+ ... 'size': kx.random.random(100, 100)
+ ... })
+ >>> table.select(columns=kx.Column('price').max(), where=kx.Column('size') > 5)
+ >>> table.update(column=kx.Column('price').wavg(kx.Column('size')).rename('vwap'), by=kx.Column('sym'))
+ >>> table.delete(column=kx.Column('sym'))
+ >>> table.update(column=(kx.Column('price') * kx.Column('size')).rename('total'))
+ ```
+
+- Addition of a new method to PyKX IPC Connection objects `upd` which allows for the execution of a function `.u.upd` on remote processes with supplied data. In the case that data is supplied as a `#!python kx.Table` object the raw data required for execution of the update will be extracted.
+
+ ```python
+ >>> import pykx as kx
+ >>> N = 1000
+ >>> table = kx.Table(data={
+ ... 'time': kx.TimespanAtom('now'),
+ ... 'sym': kx.random.random(N, ['AAA', 'BBB', 'CCC']),
+ ... 'price': kx.random.random(N, 10.0)})
+ >>> with kx.SyncQConnection(port=5010) as q:
+ ... q.upd('trade', table)
+ >>> with kx.SyncQConnection(port=5010) as q:
+ ... q.upd('trade', [kx.TimespanAtom('now'), 'AAA', 10.101])
+ ```
+
+- Beta features available in the 2.* versions of PyKX have now been migrated to full support.
+ - The full list of these features are as follows:
+ - [Database Creation and Management](../user-guide/advanced/database/index.md)
+ - [Compression and Encryption Module](../user-guide/advanced/compress-encrypt.md)
+ - [Remote Function Execution](../user-guide/advanced/remote-functions.md)
+ - [Streamlit Integration](../user-guide/advanced/streamlit.md)
+ - [Multi-threaded use of PyKX](../user-guide/advanced/threading.md)
+ - Upgrade considerations for these features can be found [here](../upgrades/2030.md#remote-python-execution)
+
+- Allows user to overwrite jupyter notebook kernel with q code by setting `PYKX_JUPYTERQ` to `True` before starting the notebook. This can also be accessed at runtime using `#!python kx.util.jupyter_qfirst_enable()` and `#!python kx.util.jupyter_qfirst_disable()`
+- Tab completion for `reserved_words` list in `src/pykx/__init__.py` added for Jupyter Notebooks after the import of PyKX.
+- Addition of method `reorder_columns` to `pykx.Table` objects allowing users to change the order of columns within an in-memory table, if the list of columns does not contain all columns within the table the supplied columns will be used as the first `N` columns of the resulting table.
+
+ ```python
+ >>> tab = kx.Table(data={
+ ... 'a': [1, 2, 3],
+ ... 'b': ['a', 'b', 'c'],
+ ... 'c': [1.0, 2.0, 3.0]
+ ... })
+ >>> tab.reorder_columns('c')
+ pykx.Table(pykx.q('
+ c a b
+ -----
+ 1 1 a
+ 2 2 b
+ 3 3 c
+ '))
+ >>> tab.reorder_columns(['b', 'c', 'a'])
+ pykx.Table(pykx.q('
+ b c a
+ -----
+ a 1 1
+ b 2 2
+ c 3 3
+ '))
+ ```
+
+- Addition of methods `map` and `applymap` to `#!python kx.Table` and `#!python kx.KeyedTable` objects allowing users to apply a supplied function to every element of a table
+
+ === "Python lambda"
+
+ ```python
+ >>> tab = kx.Table(data={
+ ... 'x': [[1, 2, 3], 1, [1, 2]],
+ ... 'y': [kx.LongAtom.null, 1, 2]})
+ >>> tab.map(lambda x:len(str(x)))
+ pykx.Table(pykx.q('
+ x y
+ ---
+ 5 2
+ 1 1
+ 3 1
+ '))
+ >>> tab.map(lambda x:len(str(x)), na_action='ignore')
+ pykx.Table(pykx.q('
+ x y
+ ---
+ 5
+ 1 1
+ 3 1
+ '))
+ >>> tab.map(lambda x, y: y+len(str(x)), y = 1)
+ pykx.Table(pykx.q('
+ x y
+ ---
+ 6 3
+ 2 2
+ 4 2
+ '))
+ ```
+
+ === "Python function"
+
+ ```python
+ >>> tab = kx.Table(data={
+ ... 'x': [[1, 2, 3], 1, [1, 2]],
+ ... 'y': [kx.LongAtom.null, 1, 2]})
+ >>> def _multi_arg_count(x, y=1):
+ ... try:
+ ... count = len(x)
+ ... except TypeError as err:
+ ... count = 1
+ ... return count + y
+ >>> tab.map(_multi_arg_count)
+ pykx.Table(pykx.q('
+ x y
+ ---
+ 4 2
+ 2 2
+ 3 2
+ '))
+ >>> tab.map(_multi_arg_count, y=3)
+ pykx.Table(pykx.q('
+ x y
+ ---
+ 6 4
+ 4 4
+ 5 4
+ '))
+ ```
+
+ === "q function"
+
+ ```python
+ >>> tab = kx.Table(data={
+ ... 'x': [[1, 2, 3], 1, [1, 2]],
+ ... 'y': [kx.LongAtom.null, 1, 2]})
+ >>> tab.map(kx.q.count)
+ pykx.Table(pykx.q('
+ x y
+ ---
+ 3 1
+ 1 1
+ 2 1
+ '))
+ >>> tab.map(kx.q('{count string x}'))
+ pykx.Table(pykx.q('
+ x y
+ ---
+ 3 0
+ 1 1
+ 2 1
+ '))
+ ```
+
+- Added constructors for `EnumVector` and `EnumAtom` types. Added `values()` / `value()`, `domain()` and `indices()` / `index()` methods to access different components of the enumerations.
+
+ ```python
+ >>>kx.q('l:`a`b`c')
+ >>>v=('a', 'c')
+ >>>e=kx.EnumVector('l', values=v)
+ >>>e
+ pykx.EnumVector(pykx.q('`l$`a`c'))
+ >>>e.domain()
+ pykx.SymbolAtom(pykx.q('`l'))
+ >>>e.values()
+ pykx.SymbolVector(pykx.q('`a`c'))
+ >>>e.indices()
+ pykx.LongVector(pykx.q('0 2'))
+ ```
+
+- Addition to `#!python kx.DB` module to facilitate
+ - Loading of a database to not move current working directory
+
+ ```python
+ >>> import pykx as kx
+ >>> db = kx.DB(path='db', change_dir=False)
+ ```
+
+ - Loading of a database to not require loading of `.q` files which exist within the database.
+
+ ```python
+ >>> import pykx as kx
+ >>> db = kx.DB(path='db', load_scripts=False)
+ ```
+
+- Added `cloud_libraries` kwarg to `#!python kx.install_into_QHOME` allowing installation of the kdb Insights cloud libraries to QHOME
+
+- Addition of a new utility function `#!python kx.util.detect_bad_columns` to validate if the columns of a table object conform to the naming conventions supported by kdb+ and highlighting if the table contains duplicate column names raising a warning indicating potential issues and returning `#!python True` if the table contains invalid columns.
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.q('flip (`a;`$"!";`a;`$"a b")!4 4#16?1f')
+ >>> kx.util.detect_bad_columns(tab)
+ /usr/local/anaconda3/lib/python3.8/site-packages/pykx/util.py:593: UserWarning:
+ Duplicate columns or columns with reserved characters detected:
+ Duplicate columns: ['a']
+ Invalid columns: ['!', 'a b']
+ See https://code.kx.com/pykx/help/troubleshooting.html to learn more about updating your table
+ warn(warn_string)
+ True
+ >>> kx.util.detect_bad_columns(tab, return_cols=True)
+ /usr/local/anaconda3/lib/python3.8/site-packages/pykx/util.py:588: UserWarning:
+ Duplicate columns or columns with reserved characters detected:
+ Duplicate columns: ['a']
+ Invalid columns: ['!', 'a b']
+ See https://code.kx.com/pykx/help/troubleshooting.html to learn more about updating your table
+ warn(warn_string)
+ ['a', '!', 'a b']
+ ```
+
+- Addition of functionality to allow operators to be generated from an input string
+
+ ```python
+ >>> import pykx as kx
+ >>> kx.Operator('?')
+ pykx.Operator(pykx.q('?'))
+ ```
+
+- Addition of named operators to allow use of single character operators available in q, for example:
+ - [drop](../api/pykx-execution/q.md#drop)
+ - [fill](../api/pykx-execution/q.md#fill)
+ - [dict](../api/pykx-execution/q.md#dict)
+
+ === "Using operators"
+
+ ```python
+ >>> kx.q.dict(['a', 'b', 'c'], [1, 2, 3])
+ pykx.Dictionary(pykx.q('
+ a| 1
+ b| 2
+ c| 3
+ '))
+ ```
+
+ === "Getting help"
+
+ ```python
+ >>> help(kx.q.fill)
+ Help on Operator in module pykx.wrappers:
+
+ pykx.Operator(pykx.q('!'))
+ • dict
+
+ Generate a dictionary by passing two lists of equal lengths
+
+ >>> kx.q.dict(['a', 'b', 'c'], [1, 2, 3])
+ pykx.Dictionary(pykx.q('
+ a| 1
+ b| 2
+ c| 3
+ '))
+ ```
+
+- When generating IPC connections with `reconnection_attempts` users can now configure the initial delay between first and second attempts and the function which updates the delay on successive attempts using the `reconnection_delay` and `reconnection_function` keywords, for example setting a 1 second delay between successive attempts.
+
+ ```python
+ >>> conn = kx.SyncQConnection(reconnection_attempts=5, reconnection_delay=1, reconnection_function=lambda x:x)
+ >>> conn('1+1') # after this call the server on port 5050 is shutdown for 3 seconds
+ pykx.LongAtom(pykx.q('2'))
+ >>> conn('1+2')
+ WARNING: Connection lost attempting to reconnect.
+ Failed to reconnect, trying again in 1.0 seconds.
+ Failed to reconnect, trying again in 1.0 seconds.
+ Failed to reconnect, trying again in 1.0 seconds.
+ Connection successfully reestablished.
+ pykx.LongAtom(pykx.q('3'))
+ ```
+
+- Two new options added on first initialisation of PyKX to allow users to:
+ - Use the path to their already downloaded `kc.lic`/`k4.lic` licenses without going through the "Do you want to install a license" workflow
+ - Allow users to persist for future use that they wish to use the IPC only unlicensed mode of PyKX, this will persist a file `~/.pykx-config` which sets configuration denoting unlicensed mode is to be used.
+
+- Addition of function `#!python kx.util.install_q` to allow users who do not have access to a `q` executable at the time of installing PyKX. See [here](../getting-started/installing.md) for instructions regarding its use
+- Addition of function `#!python kx.util.start_q_subprocess` to allow a `q` process to be started on a specified port with supplied initialisation arguments, for example:
+
+ ```python
+ >>> kx.util.start_q_subprocess(5050, load_file='qfile.q', init_args = ['-t', '1000'])
+ ```
+
+- Added new constructor methods for `#!python kx.DateAtom`, `#!python kx.TimestampAtom` and `#!python TimespanAtom` so now users can create Temporal Type objects directly by passing numeric values in licensed mode.
+
+ ```python
+ >>> kx.DateAtom(2024, 9, 21)
+ pykx.DateAtom(pykx.q('2024.09.21'))
+
+ >>> kx.TimestampAtom(2150, 10, 22, 20, 31, 15, 70713856)
+ pykx.TimestampAtom(pykx.q('2150.10.22D20:31:15.070713856'))
+
+ >>> kx.TimespanAtom(43938, 19, 7, 31, 664551936)
+ pykx.TimespanAtom(pykx.q('43938D19:07:31.664551936')))
+ ```
+
+- Added conversions for additional NumPy datatypes.
+
+ ```python
+ >>> kx.toq(np.array([1, 2], np.dtype('uint16')))
+ pykx.IntVector(pykx.q('1 2i'))
+
+ >>> kx.toq(np.array([1, 2], np.dtype('uint32')))
+ pykx.LongVector(pykx.q('1 2'))
+
+ >>> kx.toq(np.array([1, 2], np.dtype('int8')))
+ pykx.ShortVector(pykx.q('1 2h'))
+
+ >>> kx.toq(np.array([1, 2], np.dtype('float16')))
+ pykx.RealVector(pykx.q('1 2e'))
+
+ >>> kx.toq(np.bytes_('a'))
+ pykx.CharAtom(pykx.q('"a"'))
+
+ >>> kx.toq(np.bytes_('aa'))
+ pykx.CharVector(pykx.q('"aa"'))
+ ```
+
+- Added conversion for `pandas.NA` to the `pykx.Identity` `(::)`.
+- Added conversions for PyArrow Scalar types. Note: These conversions rely on PyArrow `.as_py()` and as such mirror it's limitations.
+
+ ```python
+ >>> kx.toq(pa.array([0.0, 1.0, 2.0], type=pa.float32())[0])
+ pykx.FloatAtom(pykx.q('0f'))
+ ```
+
+- Addition of `is_pos_inf` and `is_neg_inf` properties to PyKX atom objects to check for positive and negative infinity values.
+
+ ```python
+ >>> kx.q('0W').is_pos_inf
+ True
+ >>> kx.q('0W').is_neg_inf
+ False
+ >>> kx.q('-0W').is_neg_inf
+ True
+ ```
+
+- Addition of `inf_neg` property to PyKX atom objects.
+
+ ```python
+ >>> kx.IntAtom.inf
+ pykx.IntAtom(pykx.q('0Wi'))
+ >>> kx.IntAtom.inf_neg
+ pykx.IntAtom(pykx.q('-0Wi'))
+ ```
+
+- All infinity and null properties on atom classes are now usable in unlicensed mode: `null`, `inf`, `inf_pos`, `inf_neg`, `is_inf`, `is_neg_inf`, `is_pos_inf`.
+
+### Fixes and Improvements
+
+- Previously built-in `#!python help()` python function, when called on q keywords such as `#!python kx.q.rand` or `#!python kx.q.max` was displaying the generic `wrappers` object help message. As noted in [the installation docs](../getting-started/installing.md#dependencies), this functionality requires the installation of optional libraries available through `pip install pykx[help]`.
+
+ === "Behavior prior to change"
+
+ ```
+ Help on Lambda in module pykx.wrappers object:
+
+ class Lambda(Function)
+ | Lambda(x: Any, *, cast: bool = None)
+ |
+ | Wrapper for q lambda functions.
+ |
+ | Lambda's are the most basic kind of function in q. They can take between 0 and 8 parameters
+ | (inclusive), which all must be q objects themselves. If the provided parameters are not
+ | [`pykx.K`][] objects, they will be converted into them using [`pykx.toq`][].
+ |
+ | Unlike other [`pykx.Function`][] subclasses, `Lambda` objects can be called with keyword
+ | arguments, using the names of the parameters from q.
+ ```
+
+ === "Behavior post change"
+
+ ```
+ Help on Lambda in pykx:
+
+ pykx.Lambda = pykx.Lambda(pykx.q('k){*1?x}'))
+ • rand
+
+ Pick randomly.
+
+ >>> pykx.q.rand([1, 2, 3, 4, 5])
+ pykx.LongAtom(q('2'))
+ ```
+
+- When establishing a `#!python kx.SecureQConnection` the first item could not be passed as a `#!python kx.SymbolAtom` object, this created issues for some users making IPC calls against restricted/secure APIs, this has been resolved.
+- Fix for `#!python TypeError` if `.pykx-config` config file exists but does not contain specified PyKX profile.
+
+ === "Behavior prior to change"
+
+ ```python
+ >>> import pykx as kx
+ Traceback (most recent call last):
+ File "C:\Users\user\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\pykx\config.py", line 68, in
+ _pykx_profile_content = _pykx_config_content[pykx_config_profile]
+ ~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^
+ KeyError: 'default'
+
+ During handling of the above exception, another exception occurred:
+
+ Traceback (most recent call last):
+ File "", line 1, in
+ File "C:\Users\user\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\pykx\__init__.py", line 17, in
+ from ._pyarrow import pyarrow
+ File "C:\Users\user\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\pykx\_pyarrow.py", line 12, in
+ from .config import load_pyarrow_unsafe
+ File "C:\Users\user\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\pykx\config.py", line 71, in
+ print("Unable to locate specified 'PYKX_PROFILE': '" + pykx_config_profile + "' in file '" + config_path + "'") # noqa E501
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~
+ TypeError: can only concatenate str (not "WindowsPath") to str
+ ```
+
+ === "Behavior post change"
+
+ ```python
+ >>> import pykx as kx
+ Unable to locate specified 'PYKX_PROFILE': 'default' in file 'C:\Users\user\.pykx-config'
+ ```
+
+- Previously loading pykx.q during q startup using `QINIT` or `QHOME/q.q` resulted in a segfault or a corruption.
+- Removal of several deprecated configuration variables which previously noted to be deprecated at the next major release. See [here](../upgrades/2030.md#deprecations) for more information.
+- Attempts to convert `#!python kx.List` objects with non-conforming types previously resulted in an ambiguous error, updated error message indicates the root cause.
+
+ === "Behavior prior to change"
+
+ ```python
+ >>> kx.q('(.z.p;4)').pa()
+ ...
+ pyarrow.lib.ArrowNotImplementedError: Expected np.datetime64 but got: int64
+ >>>
+ >>> kx.q('(4;.z.p)').pa()
+ ...
+ pyarrow.lib.ArrowInvalid: Cannot mix NumPy dtypes int64 and datetime64
+ ```
+
+ === "Behavior post change"
+
+ ```python
+ >>> kx.q('(.z.p;4)').pa()
+ ...
+ QError: Unable to convert pykx.List with non conforming types to PyArrow,
+ failed with error: Expected np.datetime64 but got: int64
+ >>>
+ >>> kx.q('(4;.z.p)').pa()
+ ...
+ QError: Unable to convert pykx.List with non conforming types to PyArrow,
+ failed with error: Cannot mix NumPy dtypes int64 and datetime64
+ ```
+
+- The function `#!python kx.util.debug_environment` now returns the applied configuration values at startup instead of customised values, this ensures that configuration set in `.pykx-config` file will be used and limits chances of future default changes being missed in debugging.
+
+ === "Behavior prior to change"
+
+ ```python
+ >>> kx.util.debug_environment()
+ ..
+ **** PyKX Environment Variables ****
+ PYKX_IGNORE_QHOME:
+ PYKX_KEEP_LOCAL_TIMES:
+ PYKX_ALLOCATOR:
+ PYKX_GC:
+ PYKX_LOAD_PYARROW_UNSAFE:
+ PYKX_MAX_ERROR_LENGTH:
+ PYKX_NOQCE:
+ PYKX_Q_LIB_LOCATION:
+ PYKX_RELEASE_GIL:
+ PYKX_Q_LOCK:
+ PYKX_DEFAULT_CONVERSION:
+ ..
+ ```
+
+ === "Behavior post change"
+
+ ```python
+ >>> kx.util.debug_environment()
+ ..
+ **** PyKX Configuration Variables ****
+ PYKX_IGNORE_QHOME: False
+ PYKX_KEEP_LOCAL_TIMES: False
+ PYKX_ALLOCATOR: False
+ PYKX_GC: False
+ PYKX_LOAD_PYARROW_UNSAFE: False
+ PYKX_MAX_ERROR_LENGTH: 256
+ PYKX_NOQCE: False
+ PYKX_RELEASE_GIL: False
+ PYKX_Q_LIB_LOCATION: /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pykx/lib
+ PYKX_Q_LOCK: False
+ PYKX_SKIP_UNDERQ: False
+ PYKX_Q_EXECUTABLE: /usr/local/anaconda3/envs/qenv/q/m64/q
+ PYKX_THREADING: False
+ PYKX_4_1_ENABLED: False
+ PYKX_QDEBUG: False
+ PYKX_DEBUG_INSIGHTS_LIBRARIES: False
+ ..
+ ```
+
+- Use of the `#!python has_nulls` property on keyed tables failed to appropriately check for the presence of null values checking for nulls column names rather than null values within the table content. This has been rectified.
+- Operations on `#!python kx.GroupbyTable` objects which have been indexed previously would raise an error indicating invalid key access, this has now been resolved
+
+ === "Behavior prior to change"
+
+ ```python
+ >>> tab = kx.Table(data={
+ ... 'sym': kx.random.random(100, ['a', 'b', 'c']),
+ ... 'size': kx.random.random(100, 100),
+ ... 'price': kx.random.random(100, 10.0)})
+ >>> tab.groupby('sym')['price'].max()
+ KeyError: "['['sym', 'price']'] is not an index"
+ ```
+
+ === "Behavior post change"
+
+ ```python
+ >>> tab = kx.Table(data={
+ ... 'sym': kx.random.random(100, ['a', 'b', 'c']),
+ ... 'size': kx.random.random(100, 100),
+ ... 'price': kx.random.random(100, 10.0)})
+ >>> tab.groupby('sym')['price'].max()
+ pykx.KeyedTable(pykx.q('
+ sym| price
+ ---| --------
+ a | 9.830794
+ b | 9.761246
+ c | 9.592286
+ '))
+ ```
+
+- Support has now been added for conversion of `timedelta64[us]` to `#!python kx.Timespan` type objects
+
+ === "Behavior prior to change"
+
+ ```python
+ >>> import pandas as pd
+ >>> import numpy as np
+ >>> import pykx as kx
+ >>> df = pd.DataFrame({
+ ... 'time': np.array([1,2,3], dtype='timedelta64[us]'),
+ ... 'sym': ['msft', 'ibm', 'ge'],
+ ... 'qty': [100, 200, 150]})
+ >>> kx.toq(df)
+ TypeError: ktype cannot be inferred from Numpy dtype timedelta64[us]
+ ```
+
+ === "Behavior post change"
+
+ ```python
+ >>> import pandas as pd
+ >>> import numpy as np
+ >>> import pykx as kx
+ >>> df = pd.DataFrame({
+ ... 'time': np.array([1,2,3], dtype='timedelta64[us]'),
+ ... 'sym': ['msft', 'ibm', 'ge'],
+ ... 'qty': [100, 200, 150]})
+ >>> kx.toq(df)
+ pykx.Table(pykx.q('
+ time sym qty
+ -----------------------------
+ 0D00:00:00.000001000 msft 100
+ 0D00:00:00.000002000 ibm 200
+ 0D00:00:00.000003000 ge 150
+ '))
+ ```
+- Previously loading pykx.q during q startup using `q.q` results in a segfault or a corruption.
+
+ === "Behavior prior to change"
+
+ ```bash
+ $ echo "\l pykx.q" > $QHOME/q.q
+
+ $ q
+ :241: PyKXWarning: Failed to initialize PyKX successfully with the following error: Captured output from initialization attempt:
+ double free or corruption (out)
+
+ License location used:
+ /usr/pykx/kc.lic
+ ```
+
+ === "Behavior post change"
+
+ ```bash
+ $ echo "\l pykx.q" > $QHOME/q.q
+
+ $ q
+ q)
+ ```
+
+- Addition of keyword argument to `#!python kx.toq` functionality to allow Python strings to be returned as `#!python pykx.CharVector` objects rather than `#!python pykx.SymbolAtom` objects.
+
+ ```python
+ >>> import pykx as kx
+ >>> kx.toq('str')
+ pykx.SymbolAtom(pykx.q('`str'))
+ >>> kx.toq('str', strings_as_char=True)
+ pykx.CharVector(pykx.q('"str"'))
+ >>>
+ >>> kx.toq({'a': {'b': 'test'}, 'b': 'test1'})
+ pykx.Dictionary(pykx.q('
+ a| (,`b)!,`test
+ b| `test1
+ '))
+ >>> kx.toq({'a': {'b': 'test'}, 'b': 'test1'}, strings_as_char=True)
+ pykx.Dictionary(pykx.q('
+ a| (,`b)!,"test"
+ b| "test1"
+ '))
+ ```
+
+- Previously remote function calls required user defined functions to supply at least one argument, attempts to generate a zero-argument function would result in a `type` error being raised.
+
+ === "Behavior prior to change"
+
+ ```python
+ >>> import pykx as kx
+ >>> session = kx.remote.session(port=5050)
+ >>> @kx.remote.function(session)
+ ... def zero_arg_function():
+ ... return 10
+ >>> zero_arg_function()
+ pykx.exceptions.QError: type
+ ```
+
+ === "Behavior post change"
+
+ ```python
+ >>> import pykx as kx
+ >>> session = kx.remote.session(port=5050)
+ >>> @kx.remote.function(session)
+ >>> @function(remote_session)
+ ... def zero_arg_function():
+ ... return 10
+ >>> zero_arg_function()
+ pykx.LongAtom(pykx.q('10'))
+ ```
+
+- Previously `append` on two `#!python pykx.List` objects was not correctly inserting the new contents.
+
+ === "Behavior prior to change"
+
+ ```python
+ >>> a = kx.toq(['a',1])
+ >>> a.append(['b',2])
+ >>> a
+ pykx.List(pykx.q('
+ `a
+ 1
+ `b
+ 2
+ '))
+ ```
+
+ === "Behavior post change"
+
+ ```python
+ >>> a = kx.toq(['a',1])
+ >>> a.append(['b',2])
+ >>> a
+ pykx.List(pykx.q('
+ `a
+ 1
+ (`b;2)
+ '))
+ ```
+
+- Previously users could not convert `` using `#!python pykx.toq`. It now returns a ``.
+
+ === "Behavior prior to change"
+
+ ```python
+ >> t = kx.q('([] g:2?0Ng)')
+ >>> kx.toq(t['g'].pd(raw=True))
+ Traceback (most recent call last):
+ File "", line 1, in
+ File "pykx/toq.pyx", line 2539, in pykx.toq.ToqModule.__call__
+ File "pykx/toq.pyx", line 242, in pykx.toq._default_converter
+ TypeError: Cannot convert '
+ [(-1120411.5051955811-2.2494901007079993e+40j), (1.0810316408884467e+40+2.3725570805246122e+135j)]
+ Length: 2, dtype: pykx.uuid' to K object
+ ```
+
+ === "Behavior post change"
+
+ ```Python
+ >>> t = kx.q('([] g:2?0Ng)')
+ >>> kx.toq(t['g'].pd(raw=True))
+ pykx.GUIDVector(pykx.q('8c6b8b64-6815-6084-0a3e-178401251b68 5ae7962d-49f2-404d-5aec-f7c8abbae288'))
+ >>> type(kx.toq(t['g'].pd(raw=True)))
+
+ ```
+
+- Attempts to load a database using the `#!python kx.DB` module previously would raise an `nyi` error if the path to the database contained a space
+
+ === "Behavior prior to change"
+
+ ```python
+ >>> import pykx as kx
+ >>> db = kx.DB(path='/tmp/test directory/mydb')
+ ..
+ QError: Failed to load Database with error: nyi
+ ```
+
+ === "Behavior post change"
+
+ ```python
+ >>> import pykx as kx
+ >>> db = kx.DB(path='/tmp/test directory/mydb')
+ >>> db.tables
+ ['trade']
+ ```
+
+- Improved handling of invalid methods for Splayed and Partitioned Tables.
+
+ === "Behavior prior to change"
+
+ ```python
+ >>> tab.head()
+ Traceback (most recent call last):
+ File "", line 1, in
+ File "/home/andymc/work/KXI-47027/KXI-47027/lib/python3.10/site-packages/pykx/pandas_api/__init__.py", line 57, in return_val
+ res = func(*args, **kwargs)
+ File "/home/andymc/work/KXI-47027/KXI-47027/lib/python3.10/site-packages/pykx/pandas_api/pandas_indexing.py", line 345, in head
+ return q(f'{{{n}#x}}', self)
+ File "/home/andymc/work/KXI-47027/KXI-47027/lib/python3.10/site-packages/pykx/embedded_q.py", line 249, in __call__
+ return factory(result, False)
+ File "pykx/_wrappers.pyx", line 493, in pykx._wrappers._factory
+ File "pykx/_wrappers.pyx", line 486, in pykx._wrappers.factory
+ pykx.exceptions.QError: par
+ ```
+
+ === "Behavior post change"
+
+ ```python
+ >>> tab.head()
+ AttributeError: Operation 'head' not supported for PartitionedTable type
+ ```
+
+- Resolved issue in `_repr_html_` on table objects which was not displaying the final row when table count equaled console height.
+
+ === "Behavior prior to change"
+
+ ![Before](./images/KXI-52686_before.png)
+
+ === "Behavior post change"
+
+ ![After](./images/KXI-52686_after.png)
+
+- When using Jupyter Notebooks if a table to be presented in HTML format contains invalid columns resulting in a return of `#!python True` when `#!python kx.util.detect_bad_columns` is called a warning will be raised to a user highlighting the invalid columns and the printed `#!python table.__repr__` will be presented.
+- Updates to various error messages to provide more descriptive explanations, see [here](../upgrades/2030.md) for more information on changes you may need to consider.
+- Resolved `No Such file or directory` error for `libe.so` when attempting to initialize unlicensed mode with `PYKX_4_1_ENABLED=True`.
+- Resolved `WARN: Failed to load KX Insights Core library` when using PyKX with `PYKX_4_1_ENABLED=True`.
+- Removal of various deprecated keywords used in table operations:
+ - `#!python modify` keyword for `#!python select`, `#!python exec`, `#!python update` and `#!python delete` operations on `#!python pykx.Table` and `#!python pykx.KeyedTable`. This has been permanently changed to use `#!python inplace`.
+ - `#!python replace_self` keyword when attempting to overwrite a `#!python pykx.Table` or `#!python KeyedTable` using insert/upsert functionality. This has been permanently changed to use `#!python inplace`.
+
+- Binary updates
+ - Linux ARM `e.o` library updated to 2023.11.22. Fixes the following issues:
+ - Support for SSL3 for remote unlicensed querying
+ - PyKX will no longer overwrite Numpy subnormal definition
+ - Windows `e.dll` and `e.lib` libraries updated to 2024.08.21. Underlying libraries now compiled using `MSVC 2017`. This removed the dependency on `msvcr100.dll` for PyKX on Windows.
+ - Updates to Windows, Linux and Mac binaries `q.[so|dll]` for kdb+ 4.0 to 2024.10.25 and for kdb+ 4.1 to 2024.10.16. Fixes the following issue:
+ - Function defined by `vk` available for use.
+
+- Changed the logic of `#!python list(KeyedTable)` and the `#!python pykx.KeyedTable` iterator to return the values of that table as a list of lists instead of the `keys` as a list of tuple objects.
+
+ === "Behavior prior to change"
+
+ ```python
+ >>> tk=kx.q('([a:1 2 3] b:5 6 7;c:8 9 10)')
+ >>> [print(x) for x in tk]
+ (pykx.LongAtom(pykx.q('1')),)
+ (pykx.LongAtom(pykx.q('2')),)
+ (pykx.LongAtom(pykx.q('3')),)
+
+ >>> list(tk)
+ [(pykx.LongAtom(pykx.q('1')),), (pykx.LongAtom(pykx.q('2')),), (pykx.LongAtom(pykx.q('3')),)]
+ ```
+
+ === "Behavior post change"
+
+ ```python
+ >>> tk=kx.q('([a:1 2 3] b:5 6 7;c:8 9 10)')
+ >>> [print(x) for x in tk]
+ [pykx.LongAtom(pykx.q('5')), pykx.LongAtom(pykx.q('8'))]
+ [pykx.LongAtom(pykx.q('6')), pykx.LongAtom(pykx.q('9'))]
+ [pykx.LongAtom(pykx.q('7')), pykx.LongAtom(pykx.q('10'))]
+
+ >>> list(tk)
+ [[pykx.LongAtom(pykx.q('5')), pykx.LongAtom(pykx.q('8'))], [pykx.LongAtom(pykx.q('6')), pykx.LongAtom(pykx.q('9'))], [pykx.LongAtom(pykx.q('7')), pykx.LongAtom(pykx.q('10'))]]
+ ```
+
+- Changed return of `pykx.KeyedTable.keys()` from a list of tuples to a `pykx.Table` to be consistent with `pykx.KeyedTable.values()` and unkeyed `pykx.Table`.
+
+ === "Behavior prior to change"
+
+ ```python
+ >>> tk=kx.q('([a:1 2 3] b:5 6 7;c:8 9 10)')
+ >>> tk.keys()
+ [(pykx.LongAtom(pykx.q('1')),), (pykx.LongAtom(pykx.q('2')),), (pykx.LongAtom(pykx.q('3')),)]
+ ```
+
+ === "Behavior post change"
+
+ ```python
+ >>> tk=kx.q('([a:1 2 3] b:5 6 7;c:8 9 10)')
+ >>> tk.keys())
+ pykx.Table(pykx.q('
+ a
+ -
+ 1
+ 2
+ 3
+ '))
+ ```
+
+- PyKX conversions during `.py()`/`.np()`/`.pd()`/`.pa()` now convert null values to Python values rather than leaving as PyKX objects. The changes are outlined in full on the [nulls and infinities](../user-guide/fundamentals/nulls_and_infinities.md) page.
+
+ === "Behavior prior to change"
+
+ ```python
+ >>> kx.q('1 2 0N').py()
+ [1, 2, pykx.LongAtom(pykx.q('0N'))]
+ ```
+
+ === "Behavior post change"
+
+ ```python
+ >>> kx.q('1 2 0N').py()
+ [1, 2, ]
+ ```
+
+- Infinity values are no longer blocked from converting for `ShortAtom`/`IntAtom`/`LongAtom` datatypes. The changes are outlined in full on the [nulls and infinities](../user-guide/fundamentals/nulls_and_infinities.md) page.
+
+ === "Behavior prior to change"
+
+ ```python
+ >>> kx.q('0W').np()
+ Traceback (most recent call last):
+ File "", line 1, in
+ File "/home/user/.pyenv/versions/3.11.5/lib/python3.11/site-packages/pykx/wrappers.py", line 1279, in np
+ return self._np_null_or_inf(np.int64(_wrappers.k_j(self)), raw)
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ File "/home/user/.pyenv/versions/3.11.5/lib/python3.11/site-packages/pykx/wrappers.py", line 1245, in _np_null_or_inf
+ raise PyKXException('Numpy does not support infinite atomic integral values')
+ pykx.exceptions.PyKXException: Numpy does not support infinite atomic integral values
+ ```
+
+ === "Behavior post change"
+
+ ```python
+ >>> kx.q('0W').np()
+ 9223372036854775807
+ ```
+
+- Fix Pandas Timedelta atom conversions for Pandas<2.0
+
+ === "Behavior prior to change"
+
+ ```python
+ >>> kx.toq(pd.Timedelta('0 days 16:36:29.214000'))
+ Traceback (most recent call last):
+ File "", line 1, in
+ File "pykx/toq.pyx", line 2692, in pykx.toq.ToqModule.__call__
+ File "pykx/toq.pyx", line 270, in pykx.toq._default_converter
+ TypeError: Cannot convert 'Timedelta('0 days 16:36:29.214000')' to K object. See pykx.register to register custom conversions.
+ ```
+
+ === "Behavior post change"
+
+ ```python
+ >>> kx.toq(pd.Timedelta('0 days 16:36:29.214000'))
+ pykx.TimespanAtom(pykx.q('0D16:36:29.214000000'))
+ ```
+
+- Changed logic of `pykx.KeyedTable.get()` and `pykx.Table.get()` function to return a vector in cases where a single column table would previously have been returned.
+- When attempting to get a column not present in the table, calling `pykx.KeyedTable.get()` or `pykx.Table.get()` raises an error if `default` is not set by the user instead of returning `None`.
+
+ === "Behavior prior to change"
+
+ ```python
+ >>> tab=kx.q('1!([] x: til 3; y: 3 - til 3; z: 3?`a`b`c)')
+ >>> tab2.get('y')
+ pykx.Table(pykx.q('
+ y
+ -
+ 3
+ 2
+ 1
+ '))
+
+ >>> tab.get('r')
+ >>>
+ ```
+
+ === "Behavior post change"
+
+ ```python
+ >>> tab=kx.q('1!([] x: til 3; y: 3 - til 3; z: 3?`a`b`c)')
+ >>> tab2.get('y')
+ pykx.LongVector(pykx.q('3 2 1'))
+
+ >>> tab.get('r')
+ Traceback (most recent call last):
+ File "", line 1, in
+ File "/home/andymc/work/KXI-50017/KXI-50017/lib/python3.10/site-packages/pykx/pandas_api/__init__.py", line 57, in return_val
+ res = func(*args, **kwargs)
+ File "/home/andymc/work/KXI-50017/KXI-50017/lib/python3.10/site-packages/pykx/pandas_api/pandas_indexing.py", line 366, in get
+ return _get(self, key, default)
+ File "/home/andymc/work/KXI-50017/KXI-50017/lib/python3.10/site-packages/pykx/pandas_api/pandas_indexing.py", line 41, in _get
+ raise QError(f'Attempted to retrieve inaccessible column: {key}')
+ pykx.exceptions.QError: Attempted to retrieve inaccessible column: r
+ ```
+
+- `select_dtypes` will now return `pykx.Identity(pykx.q('::'))` if all columns are excluded, or if no columns are included.
+
+ === "Behavior prior to change"
+
+ ```python
+ >>> tab = kx.q('([] a:1 2 3j;b:4 5 6i)')
+
+ >>> tab.dtypes
+ pykx.Table(pykx.q('
+ columns datatypes type
+ -----------------------------------
+ a "kx.LongAtom" "kx.LongAtom"
+ b "kx.IntAtom" "kx.IntAtom"
+ '))
+
+ >>> tab.select_dtypes(include=['SymbolAtom'])
+ pykx.Table(pykx.q('
+ a b
+ ---
+ 1 4
+ 2 5
+ 3 6
+ '))
+
+ >>> tab.select_dtypes(exclude=['kx.LongAtom', 'kx.IntAtom'])
+ pykx.Table(pykx.q('
+ a b
+ ---
+ 1 4
+ 2 5
+ 3 6
+ '))
+ ```
+
+ === "Behavior post change"
+
+ ```python
+ >>> tab = kx.q('([] a:1 2 3j;b:4 5 6i)')
+
+ >>> tab.dtypes
+ pykx.Table(pykx.q('
+ columns datatypes type
+ -----------------------------------
+ a "kx.LongAtom" "kx.LongAtom"
+ b "kx.IntAtom" "kx.IntAtom"
+ '))
+
+ >>> tab.select_dtypes(include=['SymbolAtom'])
+ pykx.Identity(pykx.q('::'))
+
+ >>> tab.select_dtypes(exclude=['kx.LongAtom', 'kx.IntAtom'])
+ pykx.Identity(pykx.q('::'))
+ ```
+
+- `select_dtypes` now functions on `KeyTable` objects. Always returning key columns, performing column filtering on value columns.
+
+ === "Behavior prior to change"
+
+ ```python
+ >>> dfk = kx.q('([c1:`a`b`c]; c2:1 2 3h; c3:1 2 3j; c4:1 2 3i)')
+ >>> dfk.select_dtypes(include=[kx.ShortAtom])
+ Traceback (most recent call last):
+ File "", line 1, in
+ File "/home/user/.pyenv/versions/3.11.5/lib/python3.11/site-packages/pykx/pandas_api/__init__.py", line 57, in return_val
+ res = func(*args, **kwargs)
+ ^^^^^^^^^^^^^^^^^^^^^
+ File "/home/user/.pyenv/versions/3.11.5/lib/python3.11/site-packages/pykx/pandas_api/pandas_conversions.py", line 337, in select_dtypes
+ table_out = q('''{[qtab;inc] tCols:cols qtab;
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ File "/home/user/.pyenv/versions/3.11.5/lib/python3.11/site-packages/pykx/embedded_q.py", line 251, in __call__
+ return factory(result, False)
+ ^^^^^^^^^^^^^^^^^^^^^^
+ File "pykx/_wrappers.pyx", line 493, in pykx._wrappers._factory
+ File "pykx/_wrappers.pyx", line 486, in pykx._wrappers.factory
+ pykx.exceptions.QError: nyi
+ ```
+
+
+ === "Behavior post change"
+
+ ```python
+ >>> dfk = kx.q('([c1:`a`b`c]; c2:1 2 3h; c3:1 2 3j; c4:1 2 3i)')
+ >>> dfk.select_dtypes(include=[kx.ShortAtom])
+ pykx.KeyedTable(pykx.q('
+ c1| c2
+ --| --
+ a | 1
+ b | 2
+ c | 3
+ '))
+ ```
+
+- Addition of support for configuration/environment variable `PYKX_SUPPRESS_WARNINGS` which allows users to suppress the raising of warnings by PyKX to highlight users need to be careful of the outputs/behavior of the results/features. For example:
+
+ === "Behavior before change"
+
+ ```python
+ >>> import pykx as kx
+ >>> import numpy as np
+ >>> np.max(kx.q.til(10))
+ /usr/python/site-packages/pykx/wrappers.py:2246: UserWarning: Attempting to call numpy..
+ warn('Warning: Attempting to call numpy __array_function__ on a '
+ 9
+ ```
+
+ === "Behavior post change"
+
+ ```python
+ >>> import os
+ >>> os.environ['PYKX_SUPPRESS_WARNINGS'] = 'True'
+ >>> import pykx as kx
+ >>> import numpy as np
+ >>> np.max(kx.q.til(10))
+ 9
+ ```
+
+- If a `QARGS` configuration value has been supplied on the startup of PyKX which has no effect when running in Python PyKX will now print a warning to users. For example:
+
+ ```python
+ $ export QARGS='-p 5050 -t 1000'
+ $ python
+ >>> import pykx as kx
+ /usr/python/site-packages/pykx/config.py: RuntimeWarning: '-p' argument unsupported in QARGS..
+ /usr/python/site-packages/pykx/config.py: RuntimeWarning: '-t' argument unsupported in QARGS..
+ ```
+
+- PyKX Pandas dependency for Python 3.8 has been clamped to `<2.0` due to support being dropped for it by Pandas after 2.0.3.
+
+### Upgrade considerations
+
+- PyKX 3.0.0 is a major version update which includes changes requiring review before upgrading from 2.5.*. A page details these changes in full [here](../upgrades/2030.md)
+
+## PyKX 2.5.4
+
+#### Release Date
+
+2024-10-22
+
+!!! Note
+
+ PyKX 2.5.4 is currently not available for Mac x86/ARM for all Python versions. Updated builds will be provided once available.
+
+### Fixes and Improvements
+
+- Resolved context interface failing to load files on Windows.
+
+ === "Behavior prior to change"
+
+ ```python
+ >>> kx.q.context
+ Traceback (most recent call last):
+ File "C:\Users\user\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\pykx\__init__.py", line 162, in __getattr__
+ self.__getattribute__('_register')(name=key)
+ File "C:\Users\user\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\pykx\__init__.py", line 248, in _register
+ self._call(
+ File "C:\Users\user\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\pykx\embedded_q.py", line 246, in __call__
+ return factory(result, False, name=query.__str__())
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ File "pykx\\_wrappers.pyx", line 521, in pykx._wrappers._factory
+ File "pykx\\_wrappers.pyx", line 514, in pykx._wrappers.factory
+ pykx.exceptions.QError: "C:\lib"
+
+ The above exception was the direct cause of the following exception:
+
+ Traceback (most recent call last):
+ File "", line 1, in
+ File "C:\Users\user\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\pykx\__init__.py", line 166, in __getattr__
+ raise attribute_error from inner_error
+ File "C:\Users\user\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\pykx\__init__.py", line 159, in __getattr__
+ return ctx.__getattr__(key)
+ ^^^^^^^^^^^^^^^^^^^^
+ File "C:\Users\user\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\pykx\ctx.py", line 153, in __getattr__
+ raise AttributeError(
+ AttributeError: 'pykx.ctx.QContext' object has no attribute 'context'
+ QError: '.context
+ ```
+
+ === "Behavior post change"
+
+ ```python
+ >>> kx.q.context
+
+ ```
+
+### Version Support Changes
+
+- Version 2.5.4 marks the removal of support for releases to PyPi/Anaconda of Python 3.7 supported versions of PyKX
- Currently PyKX is not compatible with Pandas 2.2.0 or above as it introduced breaking changes which cause data to be cast to the incorrect type.
## PyKX 2.5.2
@@ -18,7 +1139,7 @@
- Converting PyKX generic lists using the keyword parameter `raw=True` would previously return incorrect results, the values received being the memory address of the individual elements of the list, this has now been resolved
- === "Behaviour prior to change"
+ === "Behavior prior to change"
```python
>>> a = kx.q('(1; 3.4f; `asad; "asd")')
@@ -26,7 +1147,7 @@
array([3012581664, 30547, 3012579792, 30547], dtype=uint64)
```
- === "Behaviour post change"
+ === "Behavior post change"
```python
>>> a = kx.q('(1; 3.4f; `asad; "asd")')
@@ -34,9 +1155,9 @@
array([1, 3.4, b'asad', b'asd'], dtype=object)
```
-- Converting PyKX generic lists using the keyword parameter `raw=True` when explictly required previously would error indicating that the keyword argument was not supplied. This has been resolved with the parameter now appropriately passed to all items
+- Converting PyKX generic lists using the keyword parameter `raw=True` when explicitly required previously would error indicating that the keyword argument was not supplied. This has been resolved with the parameter now appropriately passed to all items
- === "Behaviour prior to change"
+ === "Behavior prior to change"
The errors below are truncated for readability
@@ -49,7 +1170,7 @@
TypeError: The q datetime type is deprecated, and can only be accessed ..
```
- === "Behaviour post change"
+ === "Behavior post change"
```python
>>> kx.q("(1;2;3;`a;2024.01.01T12:00:00)").py(raw=True)
@@ -89,7 +1210,7 @@
- Fix to issue where use of `kx.SymbolAtom` with `__getitem__` method on `kx.Table` objects would return a table rather then vector/list. The return now mirrors the expected return which matches `str` type inputs
- === "Behaviour prior to change"
+ === "Behavior prior to change"
```python
>>> import pykx as kx
@@ -106,7 +1227,7 @@
'))
```
- === "Behaviour post change"
+ === "Behavior post change"
```python
>>> import pykx as kx
@@ -118,7 +1239,7 @@
```
- Reworked `Table.std()` method to better handle edge cases relating to mixed columns and nulls. Now matching Pandas results. This addresses issues raised [here](https://github.com/KxSystems/pykx/issues/28).
-- Fix to issue where loading PyKX on Windows from 2.5.0 could result in a users working directory being changed to `site-packages/pykx`.
+- Fix to issue where loading PyKX on Windows from 2.5.0 could result in a users working directory being changed to `site-packages/pykx`.
## PyKX 2.5.1
@@ -128,20 +1249,84 @@
### Additions
+- Addition of methods to `pykx.Table` and `pykx.KeyedTable` objects for the application of `sql`, `select`, `exec`, `delete` and `update` queries
+
+ === "SQL"
+
+ ```python
+ >>> tab = kx.Table(data = {'x': [1, 2, 3], 'x1': ['a', 'b', 'a']})
+ >>> tab.sql("select * from $1 where x1='a'")
+ >>> tab.sql("select * from $1 where x1=$2 and x=$3", 'a', 1)
+ ```
+
+ === "select"
+
+ ```python
+ >>> qtab = kx.Table(data = {
+ ... 'col1': kx.random.random(100, ['a', 'b', 'c']),
+ ... 'col2': kx.random.random(100, 1.0),
+ ... 'col3': kx.random.random(100, False),
+ ... 'col4': kx.random.random(100, 10.0)})
+ >>> qtab.select(where='col2<0.5')
+ >>> qtab.select(columns={'avgCol2': 'avg col2', 'minCol4': 'min col4'}, by={'col1': 'col1'})
+ >>> qtab.select(columns={'avgCol2': 'avg col2', 'minCol4': 'min col4'},
+ ... by={'col1': 'col1'},
+ ... where='col3=0b')
+ ```
+
+ === "exec"
+
+ ```python
+ >>> qtab = kx.Table(data = {
+ ... 'col1': kx.random.random(100, ['a', 'b', 'c']),
+ ... 'col2': kx.random.random(100, 1.0),
+ ... 'col3': kx.random.random(100, False),
+ ... 'col4': kx.random.random(100, 10.0)})
+ >>> qtab.exec('col3')
+ >>> qtab.exec({'symcol': 'col1', 'maxcol4': 'max col4'}, where=['col1=`a', 'col2<0.3'])
+ >>> qtab.exec(columns={'avgCol2': 'avg col2', 'minCol4': 'min col4'},
+ ... by={'col1': 'col1'},
+ ... where='col3=0b')
+ ```
+
+ === "delete"
+
+ ```python
+ >>> qtab = kx.Table(data = {
+ ... 'name': ['tom', 'dick', 'harry'],
+ ... 'age': [28, 29, 35],
+ ... 'hair': ['fair', 'dark', 'fair'],
+ ... 'eye': ['green', 'brown', 'gray']})
+ >>> qtab.delete(['age', 'eye'])
+ >>> qtab.delete(where=['hair=`fair', 'age=28'])
+ >>> qtab.delete('age', modify=True)
+ ```
+
+ === "update"
+
+ ```python
+ >>> qtab = kx.Table(data={
+ ... 'name': ['tom', 'dick', 'harry'],
+ ... 'age': [28, 29, 35],
+ ... 'hair': ['fair', 'dark', 'fair'],
+ ... 'eye': ['green', 'brown', 'gray']})
+ >>> qtab.update({'eye': '`blue`brown`green'})
+ >>> qtab.update({'eye': ['blue']}, where='hair=`fair')
+
- [Pandas API](../user-guide/advanced/Pandas_API.ipynb) additions: `isnull`, `isna`, `notnull`, `notna`, `idxmax`, `idxmin`, `kurt`, `sem`.
- Addition of `filter_type`, `filter_columns`, and `custom` parameters to `QReader.csv()` to add options for CSV type guessing.
-
+
```python
>>> import pykx as kx
>>> reader = kx.QReader(kx.q)
>>> reader.csv("myFile0.csv", filter_type = "like", filter_columns="*name", custom={"SYMMAXGR":15})
pykx.Table(pykx.q('
- firstname lastname
+ firstname lastname
----------------------
- "Frieda" "Bollay"
- "Katuscha" "Paton"
- "Devina" "Reinke"
- "Maurene" "Bow"
+ "Frieda" "Bollay"
+ "Katuscha" "Paton"
+ "Devina" "Reinke"
+ "Maurene" "Bow"
"Iseabal" "Bashemeth"
..
'))
@@ -150,9 +1335,9 @@
### Fixes and Improvements
- Fix to regression in PyKX 2.5.0 where PyKX initialisation on Windows would result in a segmentation fault when using an `k4.lic` license type.
-- Previously user could not make direct use of `kx.SymbolicFunction` type objects against a remote process, this has been rectified
+- Previously users could not make direct use of `kx.SymbolicFunction` type objects against a remote process, this has been rectified
- === "Behaviour prior to change"
+ === "Behavior prior to change"
```python
>>> import pykx as kx
@@ -166,7 +1351,7 @@
pykx.exceptions.QError: .my.func
```
- === "Behaviour post change"
+ === "Behavior post change"
```python
>>> import pykx as kx
@@ -181,7 +1366,7 @@
- Previously use of the context interface for q primitive functions in licensed mode via IPC would partially run the function on the client rather than server, thus limiting usage for named entities on the server.
- === "Behaviour prior to change"
+ === "Behavior prior to change"
```python
>>> import pykx as kx
@@ -192,7 +1377,7 @@
pykx.exceptions.QError: tab
```
- === "Behaviour post change"
+ === "Behavior post change"
```python
>>> import pykx as kx
@@ -209,7 +1394,7 @@
- With the release of PyKX 2.5.0 and support of PyKX usage in paths containing spaces the context interface functionality could fail to load a requested context over IPC if PyKX was not loaded on the server.
- === "Behaviour prior to change"
+ === "Behavior prior to change"
```python
>>> import pykx as kx
@@ -218,7 +1403,7 @@
... Error Message ...
```
- === "Behaviour post change"
+ === "Behavior post change"
```python
>>> import pykx as kx
@@ -315,7 +1500,7 @@
```python
>>> import pykx as kx
We have been unable to update your license for PyKX using the following information:
- Environment variable: KDB_K4LICENSE_B64
+ Environment variable: KDB_K4LICENSE_B64
License location: /user/path/to/license/k4.lic
Reason: License content matches supplied Environment variable
@@ -330,7 +1515,7 @@
Would you like to renew your license? [Y/n]:
```
-- Intialization workflow for PyKX using form based install process now allows users to install Commercial "k4.lic" licenses using this mechanism. The updated workflow provides the following outputs
+- Initialization workflow for PyKX using form based install process now allows users to install Commercial "k4.lic" licenses using this mechanism. The updated workflow provides the following outputs
=== "License initialization"
@@ -347,7 +1532,7 @@
Enter your choice here [1/2]: 2
To apply for your PyKX license, contact your KX sales representative or sales@kx.com.
- Alternately apply through https://kx.com/book-demo.
+ Alternately apply through https://kx.com/book-demo.
Would you like to open this page? [Y/n]: n
Select the method you wish to use to activate your license:
@@ -370,7 +1555,7 @@
PyKX unlicensed mode enabled. To set this as your default behavior please set the following environment variable PYKX_UNLICENSED='true'
For more information on PyKX modes of operation, please visit https://code.kx.com/pykx/user-guide/advanced/modes.html.
- To apply for a PyKX license please visit
+ To apply for a PyKX license please visit
Personal License: https://kx.com/kdb-insights-personal-edition-license-download
Commercial License: Contact your KX sales representative or sales@kx.com or apply on https://kx.com/book-demo
@@ -382,11 +1567,11 @@
>>> tab = kx.q('([] a:2 2 3; b:4 2 6; c:(1b;0b;1b); d:(`a;`b;`c); e:(1;2;`a))')
>>> tab.replace(2, "test")
pykx.Table(pykx.q('
- a b c d e
+ a b c d e
---------------------
- `test 4 1 a 1
+ `test 4 1 a 1
`test `test 0 b `test
- 3 6 1 c `a
+ 3 6 1 c `a
'))
```
@@ -396,7 +1581,7 @@
- When importing PyKX from a source file path containing a space initialisation would fail with an `nyi` error message, this has now been resolved
- === "Behaviour prior to change"
+ === "Behavior prior to change"
```python
>>> import pykx as kx
@@ -408,7 +1593,7 @@
pykx.exceptions.QError: nyi
```
- === "Behaviour post change"
+ === "Behavior post change"
```python
>>> import pykx as kx
@@ -421,7 +1606,7 @@
- `kx.util.debug_environment()` now uses `PyKXReimport` when running the `q` subprocess and captures `stderr` in case of failure.
- When using debug mode, retrieval of unknown context's would incorrectly present a backtrace to a user, for example:
- === "Behaviour prior to change"
+ === "Behavior prior to change"
```python
>>> import os
@@ -445,7 +1630,7 @@
1 2019.12.01D00:25:53.000000000 2019.12.01D00:26:04.000000000 1 ..
```
- === "Behaviour post change"
+ === "Behavior post change"
```python
>>> import os
@@ -462,7 +1647,7 @@
- When using debug mode, PyKX could run into issues where attempts to compare single character atoms would result in an error. This has now been fixed.
- === "Behaviour prior to change"
+ === "Behavior prior to change"
```python
>>> import os
@@ -490,7 +1675,7 @@
pykx.exceptions.QError: =
```
- === "Behaviour post change"
+ === "Behavior post change"
```python
>>> import os
@@ -499,9 +1684,9 @@
>>> kx.q('"z"') == b'z'
pykx.BooleanAtom(pykx.q('1b'))
```
-- Update to system functions `tables` and `functions` to allow listing of tables and functions within dictionaries. Previously attempts to list entities within dictionaries would attempt to retrieve items in a namespace. The below example shows this behaviour for tables.
+- Update to system functions `tables` and `functions` to allow listing of tables and functions within dictionaries. Previously attempts to list entities within dictionaries would attempt to retrieve items in a namespace. The below example shows this behavior for tables.
- === "Behaviour prior to change"
+ === "Behavior prior to change"
```python
>>> import pykx as kx
@@ -513,7 +1698,7 @@
pykx.SymbolVector(pykx.q(',`table'))
```
- === "Behaviour post change"
+ === "Behavior post change"
```python
>>> import pykx as kx
@@ -528,14 +1713,14 @@
- Resolved issue in `PyKXReimport` which caused it to set empty environment variables to `None` rather than leaving them empty.
- The `_PyKX_base_types` attribute assigned to dataframes during `.pd()` conversion included `'>` in the contents. This has been removed:
- === "Behaviour prior to change"
+ === "Behavior prior to change"
```python
>>> kx.q('([] a:1 2)').pd().attrs['_PyKX_base_types']
{'a': "LongVector'>"}
```
- === "Behaviour post change"
+ === "Behavior post change"
```python
>>> kx.q('([] a:1 2)').pd().attrs['_PyKX_base_types']
@@ -544,7 +1729,7 @@
- IPC queries can now pass PyKX Functions like objects as the query parameter.
- === "Behaviour prior to change"
+ === "Behavior prior to change"
```python
>>> import pykx as kx
@@ -560,7 +1745,7 @@
ValueError: Cannot send Python function over IPC
```
- === "Behaviour post change"
+ === "Behavior post change"
```python
>>> import pykx as kx
@@ -575,7 +1760,7 @@
- When failing to initialise PyKX with an expired or invalid license PyKX will now point a user to the license location:
- === "Behaviour prior to change"
+ === "Behavior prior to change"
```python
Your PyKX license has now expired.
@@ -586,7 +1771,7 @@
Would you like to renew your license? [Y/n]:
```
- === "Behaviour post change"
+ === "Behavior post change"
```python
Your PyKX license has now expired.
@@ -601,14 +1786,14 @@
```
- Disabled raw conversions for `kx.List` types as the resulting converted object would be unusable, for example:
- === "Behaviour prior to change"
+ === "Behavior prior to change"
```python
>>> kx.q('(1j; 2f; 3i; 4e; 5h)').np(raw=True)
array([418404288, 1, 418403936, 1, 418404000], dtype=np.uintp)
```
- === "Behaviour post change"
+ === "Behavior post change"
```python
>>> kx.q('(1j; 2f; 3i; 4e; 5h)').np(raw=True)
@@ -617,7 +1802,7 @@
- `handle_nulls` now operates on all of `datetime64[ns|us|ms|s]` and ensures that the contents of the original dataframe are not modified:
- === "Behaviour prior to change"
+ === "Behavior prior to change"
```python
>>> ns = np.array(['', '2020-09-08T07:06:05.123456789'], dtype='datetime64[ns]')
@@ -633,9 +1818,9 @@
>>> kx.toq(df, handle_nulls=True)
:1: RuntimeWarning: WARN: Type information of column: s is not known falling back to DayVector type
pykx.Table(pykx.q('
- ns us ms s
+ ns us ms s
----------------------------------------------------------------------------------------------------
- 1970.01.01D00:00:00.000000000 1970.01.01D00:00:00.000000000
+ 1970.01.01D00:00:00.000000000 1970.01.01D00:00:00.000000000
2020.09.08D07:06:05.123456789 2020.09.08D07:06:05.123456000 2020.09.08D07:06:05.123000000 2020.09.08
'))
>>> df
@@ -644,7 +1829,7 @@
1 1990-09-09 07:06:05.123456789 2020-09-08 07:06:05.123456 2020-09-08 07:06:05.123 2020-09-08 07:06:05
```
- === "Behaviour post change"
+ === "Behavior post change"
```python
>>> ns = np.array(['', '2020-09-08T07:06:05.123456789'], dtype='datetime64[ns]')
@@ -659,9 +1844,9 @@
1 2020-09-08 07:06:05.123456789 2020-09-08 07:06:05.123456 2020-09-08 07:06:05.123 2020-09-08 07:06:05
>>> kx.toq(df, handle_nulls=True)
pykx.Table(pykx.q('
- ns us ms s
+ ns us ms s
-----------------------------------------------------------------------------------------------------------------------
-
+
2020.09.08D07:06:05.123456789 2020.09.08D07:06:05.123456000 2020.09.08D07:06:05.123000000 2020.09.08D07:06:05.000000000
'))
>>> df
@@ -672,7 +1857,7 @@
- Fix for error when calling `.pd(raw=True)` on `EnumVector`:
- === "Behaviour prior to change"
+ === "Behavior prior to change"
```python
>>> kx.q('`s?`a`b`c').pd(raw=True)
@@ -684,7 +1869,7 @@
TypeError: super() argument 1 must be a type, not EnumVector
```
- === "Behaviour post change"
+ === "Behavior post change"
```python
>>> import pykx as kx
@@ -699,31 +1884,31 @@
- Since 2.1.0 when using Pandas >= 2.0 dataframe columns of type `datetime64[s]` converted to `DateVector` under `toq`. Now correctly converts to `TimestampVector`. See [conversion condsideratons](../user-guide/fundamentals/conversion_considerations.md#temporal-types) for further details.
- === "Behaviour prior to change"
+ === "Behavior prior to change"
```python
>>> kx.toq(pd.DataFrame(data= {'a':np.array(['2020-09-08T07:06:05'], dtype='datetime64[s]')}))
:1: RuntimeWarning: WARN: Type information of column: a is not known falling back to DayVector type
pykx.Table(pykx.q('
- a
+ a
----------
2020.09.08
'))
```
- === "Behaviour post change"
+ === "Behavior post change"
```python
>>> kx.toq(pd.DataFrame(data= {'a':np.array(['2020-09-08T07:06:05'], dtype='datetime64[s]')}))
pykx.Table(pykx.q('
- a
+ a
-----------------------------
2020.09.08D07:06:05.000000000
'))
- #Licensed users can pass `ktype` specifying column types if they wish to override the default behaviour
+ #Licensed users can pass `ktype` specifying column types if they wish to override the default behavior
>>> kx.toq(pd.DataFrame(data= {'a':np.array(['2020-09-08T07:06:05'], dtype='datetime64[s]')}), ktype={'a':kx.DateVector})
pykx.Table(pykx.q('
- a
+ a
----------
2020.09.08
'))
@@ -734,7 +1919,7 @@
### Beta Features
-- Addition of [streamlit](https://streamlit.io/) connection class `pykx.streamlit.Connection` to allow querying of q processes when building a streamlit application. For an example of this functionality and an introduction to it's usage see [here](../beta-features/streamlit.md).
+- Addition of [streamlit](https://streamlit.io/) connection class `pykx.streamlit.Connection` to allow querying of q processes when building a streamlit application. For an example of this functionality and an introduction to it's usage see [here](../user-guide/advanced/streamlit.md).
## PyKX 2.4.2
@@ -754,7 +1939,7 @@
### Fixes and Improvements
-- Previously calls to `qsql.select`, `qsql.exec`, `qsql.update` and `qsql.delete` would require multiple calls to parse the content of `where`, `colums` and `by` clauses. These have now been removed with all parsing now completed within the functional query when called via IPC or local to the Python process.
+- Previously calls to `qsql.select`, `qsql.exec`, `qsql.update` and `qsql.delete` would require multiple calls to parse the content of `where`, `columns` and `by` clauses. These have now been removed with all parsing now completed within the functional query when called via IPC or local to the Python process.
- Linux x86 and Mac x86/ARM unlicensed mode `e.o` library updated to 2023.11.22. Fixes subnormals issue:
=== "Behavior prior to change"
@@ -1326,7 +2511,7 @@
- Attribute setting and removal.
- Addition of missing tables from partitions within a database.
-- Added `PYKX_THREADING` environment variable that allows [multithreaded programs](../beta-features/threading.md) to modify state when calling into python on secondary threads. Note: This behaviour is only supported on Linux / MacOS.
+- Added `PYKX_THREADING` environment variable that allows [multithreaded programs](../user-guide/advanced/threading.md) to modify state when calling into python on secondary threads. Note: This behavior is only supported on Linux / MacOS.
!!! Note
@@ -1385,7 +2570,7 @@
- `pykx.q.qsql.select` and `pykx.q.qsql.exec` statements no longer use `get` calls for table retrieval unnecessarily when operating locally or via IPC.
- Null integral values in table keys will no longer convert the underlying vectors to floats when converting from a `pykx.KeyedTable` to `pandas.DataFrame`
- === "Behaviour prior to change"
+ === "Behavior prior to change"
```python
>>> kx.q('`col1 xkey ([] col1: (1j; 2j; 0Nj); col2:(1j; 2j; 0Nj); col3:`a`b`c)').pd()
@@ -1396,7 +2581,7 @@
0.0 -- c
```
- === "Behaviour post change"
+ === "Behavior post change"
```python
>>> kx.q('`col1 xkey ([] col1: (1j; 2j; 0Nj); col2:(1j; 2j; 0Nj); col3:`a`b`c)').pd()
@@ -1409,7 +2594,7 @@
!!! Warning
- For multi-keyed PyKX tables converted to Pandas the appropriate round-trip behaviour is supported however due to limitations in Pandas displaying of these as masked arrays is not supported as below
+ For multi-keyed PyKX tables converted to Pandas the appropriate round-trip behavior is supported however due to limitations in Pandas displaying of these as masked arrays is not supported as below
```python
>>> kx.q('`col1`col2 xkey ([] col1: (1j; 2j; 0Nj); col2:(1j; 2j; 0Nj); col3:`a`b`c)').pd()
@@ -1424,7 +2609,7 @@
- Addition of deprecation warning for environmental configuration option `PYKX_NO_SIGINT` which is to be replaced by `PYKX_NO_SIGNAL`. This is used when users require no signal handling logic overwrites and now covers `SIGTERM`, `SIGINT`, `SIGABRT` signals amongst others.
- Use of `pykx.q.system.variables` no longer prepends leading `.` to supplied string allowing users to get the variables associated with dictionary like namespaces.
- === "Behaviour prior to change"
+ === "Behavior prior to change"
```python
>>> kx.q('.test.a:1;.test.b:2')
@@ -1435,7 +2620,7 @@
pykx.SymbolVector(pykx.q('`s#`a`b'))
```
- === "Behaviour post change"
+ === "Behavior post change"
```python
>>> kx.q('.test.a:1;.test.b:2')
@@ -1632,7 +2817,7 @@
pykx.IntAtom(pykx.q('-0Wi'))
```
-- Users can now use the environment variables `PYKX_UNLICENSED="true"` or `PYKX_LICENSED="true"` set this as part of configuration within their `.pykx-config` file to allow `unlicensed` or `licensed` mode to be the default behaviour on initialisation for example:
+- Users can now use the environment variables `PYKX_UNLICENSED="true"` or `PYKX_LICENSED="true"` set this as part of configuration within their `.pykx-config` file to allow `unlicensed` or `licensed` mode to be the default behavior on initialisation for example:
```python
>>> import os
@@ -1873,7 +3058,7 @@
### Fixes and Improvements
-- Fix to regression in PyKX 2.1.0 where execution of `from pykx import *` would result in the following behaviour
+- Fix to regression in PyKX 2.1.0 where execution of `from pykx import *` would result in the following behavior
```
>>> from pykx import *
@@ -2216,7 +3401,7 @@ the following reads a CSV file and specifies the types of the three columns name
- Fixed an issue causing a crash when closing `QConnection` instances on Windows.
- Updated q 4.0 libraries to 2023.08.11. Note: Mac ARM release remains on 2022.09.30.
-- Fix [Jupyter Magic](../getting-started/q_magic_command.ipynb) in local mode.
+- Fix [Jupyter Magic](../examples/jupyter-integration.ipynb) in local mode.
- Fix error when binding with [FFI](https://github.com/KxSystems/ffi) in `QINIT`.
- Fix issue calling `peach` with `PYKX_RELEASE_GIL` set to true when calling a Python function.
@@ -2570,7 +3755,7 @@ the following reads a CSV file and specifies the types of the three columns name
- Updated to stable classifier (`Development Status :: 5 - Production/Stable`) in project metadata. Despite this update being done in version 1.0.1, version 1.0.0 is still the first stable release of PyKX.
- PyKX now provides source distributions (`sdist`). It can be downloaded from PyPI using `pip download --no-binary=:all: --no-deps pykx`. As noted in [the installation docs](../getting-started/installing.md#supported-environments), installations built from the source will only receive support on a best-effort basis.
- Fixed Pandas NaT conversion to q types. Now `pykx.toq(pandas.NaT, ktype=ktype)` produces a null temporal atom for any given `ktype` (e.g. `pykx.TimeAtom`).
-- Added [a doc page for limitations of embedded q](../user-guide/advanced/limitations.md).
+- Added [a doc page for limitations of embedded q](../help/issues.md).
- Added a test to ensure large vectors are correctly handled (5 GiB).
- Always use synchronous queries internally, i.e. fix `QConnection(sync=False)`.
- Disabled the context interface over IPC. This is a temporary measure that will be reversed once q function objects are updated to run in the environment they were defined in by default.
diff --git a/docs/release-notes/images/KXI-52686_after.png b/docs/release-notes/images/KXI-52686_after.png
new file mode 100644
index 0000000..c0984fa
Binary files /dev/null and b/docs/release-notes/images/KXI-52686_after.png differ
diff --git a/docs/release-notes/images/KXI-52686_before.png b/docs/release-notes/images/KXI-52686_before.png
new file mode 100644
index 0000000..b1c746f
Binary files /dev/null and b/docs/release-notes/images/KXI-52686_before.png differ
diff --git a/docs/release-notes/underq-changelog.md b/docs/release-notes/underq-changelog.md
index 18d7c68..749ee50 100644
--- a/docs/release-notes/underq-changelog.md
+++ b/docs/release-notes/underq-changelog.md
@@ -6,6 +6,113 @@ This changelog provides updates from PyKX 2.0.0 and above, for information relat
The changelog presented here outlines changes to PyKX when operating within a q environment specifically, if you require changelogs associated with PyKX operating within a Python environment see [here](./changelog.md).
+## PyKX 3.0.0
+
+#### Release Date
+
+2024-11-12
+
+### Additions
+
+- Added `cloud_libraries` kwarg to `install_into_QHOME` allowing installation of the kdb Insights cloud libraries to QHOME.
+- Addition of support for new environment variable `PYKX_USE_FIND_LIBPYTHON` which will use the Python package [`find_libpython`](https://pypi.org/project/find-libpython/) to specify the location from which `libpython.[so|dll]` will be taken.
+
+### Fixes and Improvements
+
+- Addition of function `.pykx.toq0` to support conversion of Python strings to q strings rather than q symbols as is the default behaviour
+
+ ```q
+ q)pystr:.pykx.eval["\"test\""]
+ q).pykx.toq0[pystr]
+ `test
+ q).pykx.toq0[pystr;1b]
+ "test"
+ ```
+
+- Fix for `install_into_QHOME` with `overwrite_embedpy=True`. Previously loading PyKX through use of `p)` would fail.
+
+ === "Behaviour prior to change"
+
+ ```q
+ q)p)print(1+1)
+ 'pykx.q. OS reports: No such file or directory
+ [3] /home/user/q/p.k:1: \l pykx.q
+ ```
+
+ === "Behaviour post change"
+
+ ```q
+ q)p)print(1+1)
+ 2
+ ```
+
+- Fix to minor memory leak when accessing attributes or retrieving global variables from Python objects. The following operations would lead to this behaviour
+
+ ```q
+ q)np:.pykx.import[`numpy]
+ q)np`:array # Accessing an attribute caused a leak
+ q).pykx.console[]
+ >>> variable = 100
+ >>> quit()
+ q).pykx.get`variable # Accessing a global in this way caused a leak
+ ```
+
+- When loading on Linux loading of `qlog` no longer loads the logging functionality into the `.pykx` namespace and instead loads it to the `.com_kx_log` namespace as expected under default conditions.
+
+ === "Behaviour prior to change"
+
+ ```q
+ q)@[{get x;1b};`.pykx.configure;0b]
+ 1b
+ q)@[{get x;1b};`.com_kx_log.configure;0b]
+ 0b
+ ```
+
+ === "Behaviour post change"
+
+ ```q
+ q)@[{get x;1b};`.pykx.configure;0b]
+ 0b
+ q)@[{get x;1b};`.com_kx_log.configure;0b]
+ 1b
+ ```
+
+## PyKX 2.5.4
+
+#### Release Date
+
+2024-10-22
+
+### Fixes and Improvements
+
+- `.pykx.util.loadfile` now loads a file using it's full path unless it contains a space. This is to avoid issues loading scripts which are sensitive to their working directory.
+
+## PyKX 2.5.3
+
+#### Release Date
+
+2024-08-20
+
+### Fixes and Improvements
+
+- Previously PyKX conversions of generic lists (type 0h) would convert this data to it's `raw` representation rather than it's `python` representation as documented. This had the effect of restricting the usability of some types within PyKX under q in non-trivial use-cases. With the `2.5.2` changes to more accurately represent `raw` data at depth this became more obvious as an issue.
+
+ === "Behaviour prior to change"
+
+ ```q
+ q).pykx.version[]
+ "2.5.2"
+ q).pykx.print .pykx.eval["lambda x:x"](`test;::;first 1?0p)
+ [b'test', None, 49577290277400616]
+ ```
+
+ === "Behaviour post change"
+
+ ```q
+ q).pykx.print .pykx.eval["lambda x:x"]
+ ['test', None, datetime.datetime(2002, 1, 25, 11, 16, 58, 871372)]
+ ```
+
## PyKX 2.5.0
#### Release Date
diff --git a/docs/roadmap.md b/docs/roadmap.md
index 023c600..89a2514 100644
--- a/docs/roadmap.md
+++ b/docs/roadmap.md
@@ -6,21 +6,6 @@ If you need a feature that's not included in this list please let us know by rai
## Upcoming Changes
-- More Pythonic query syntax when querying PyKX Tables. Syntax for this will be similar to the following:
-
- ```python
- >>> import pykx as kx
- >>> N = 10000
- >>> table = kx.Table(data = {
- ... 'x' : kx.random.random(N, ['a', 'b', 'c]),
- ... 'x1': kx.random.random(N, 100.0),
- ... 'x2': kx.random.random(N, 100)
- ... })
- >>> table.select(where = kx.col('x') == 'a')
- >>> table.select(kx.col('x1').max())
- >>> table.select(kx.col('x1').wavg('x2'))
- ```
-
- Addition of support for q primitives as methods off PyKX Vector and Table objects. Syntax for this will be similar to the following:
```python
@@ -31,14 +16,8 @@ If you need a feature that's not included in this list please let us know by rai
>>> vec.abs()
```
-- Performance improvements for conversions from NumPy arrays to PyKX Vector objects and vice-versa through enhanced use of C++ over Cython.
-- Addition of functionality for the development of streaming workflows using PyKX.
+- Performance improvements for conversions from PyKX Vector objects to Numpy arrays through enhanced use of C++ over Cython.
- Configurable initialisation logic in the absence of a license. Thus allowing users who have their own workflows for license access to modify the instructions for their users.
-- Promotion of Beta functionality currently available in PyKX to full production support
- - Database Management
- - Compression and Encryption
- - Multi-threaded execution
- - Remote function execution
## Future
diff --git a/docs/spelling.txt b/docs/spelling.txt
index 286571f..53129bd 100644
--- a/docs/spelling.txt
+++ b/docs/spelling.txt
@@ -81,3 +81,32 @@ performant
qsql
reimplementing
analyze
+roadmap
+conor
+noor
+kiernan
+nydza
+nicpon
+rian
+smyth
+foden
+hyaric
+da
+kian
+le
+maynes
+neutropolis
+nipsn
+siobhan
+http
+licensor
+www
+venv
+deserialization
+vs
+embedq
+https
+doesn
+builtins
+kobject
+streamlit
diff --git a/docs/stylesheets/2021-edited.css b/docs/stylesheets/2021-edited.css
new file mode 100644
index 0000000..2f7c1ea
--- /dev/null
+++ b/docs/stylesheets/2021-edited.css
@@ -0,0 +1,256 @@
+/*
+ Title: PyKX extra CSS for code.kx.com in MkDocs
+ */
+
+/* countermand MkDocs insertion of href attribute after link anchor
+--------------------------------------------- */
+@media print {
+ a[href]:after {
+ content: none !important;
+ }
+ }
+
+ [data-md-color-scheme="kx-light"] {
+ --md-primary-fg-color: hsla(210, 33%, 9%, 1);
+ --md-typeset-a-color: hsla(217, 100%, 40%, 1);
+ --md-typeset-a-color--dark: hsla(217, 100%, 40%, 1);
+ --md-typeset-img-bg: transparent;
+ --md-typeset-mark-color: rgba(295,195,0,.3); /*KX Yellow: #FFC300*/
+ }
+
+ [data-md-color-scheme="slate"] {
+ --md-primary-fg-color: hsla(210, 33%, 9%, 1);
+ --md-typeset-a-color: #FFC300;
+ --md-typeset-a-color--dark: hsla(217, 100%, 40%, 1);
+ --md-typeset-img-bg: #f4edda;
+ --md-typeset-mark-color: rgba(295,195,0,.3); /*KX Yellow: #FFC300*/
+ }
+ .md-typeset a:hover {
+ color: var(--md-typeset-a-color);
+ }
+ /* admonition use kx blue colors */
+ .md-typeset .note>.admonition-title,
+ .md-typeset .note>summary {
+ background-color: rgba(82,148,254,.1);
+ border-color: #5294fe;
+ }
+
+ .md-typeset .admonition.note, .md-typeset details.note {
+ border-color: #5294fe;
+ }
+
+ .md-typeset .admonition.home-page,
+ .md-typeset details.home-page {
+ border-width: 0;
+ border-left-width: 0.2rem;
+ border-color: var(--md-code-fg-color)
+ }
+
+ .md-typeset .home-page > .admonition-title,
+ .md-typeset .home-page > summary {
+ background-color:rgb(125 125 125 / 10%);
+ border-color: var(--md-accent-fg-color--transparent);
+ }
+
+ .md-typeset .home-page > .admonition-title::before,
+ .md-typeset .home-page > summary::before {
+ background-color: var(--md-code-fg-color);
+ mask-image:var(--md-admonition-icon--abstract);
+ }
+
+ /* TEXT HIGHLIGHTING - KX Yellow: #FFC300 */
+ ::selection {
+ background: var(--md-typeset-mark-color); /* WebKit/Blink Browsers */
+ }
+ ::-moz-selection {
+ background: var(--md-typeset-mark-color); /* Gecko Browsers */
+ }
+
+
+ /* type
+ --------------------------------------------- */
+ /*
+ Content hosted on code.kx.com will be displayed in Euclid.
+ Content hosted elsewhere will not, unless the font is installed on the user's machine.
+ The Segoe UI secondary font will be used only if installed on the user's machine:
+ Microsoft permits neither hosting at code.kx.com nor distribution.
+ The most probable outcome for a site hosted off code.kx.com is display with the user's installed fonts.
+ */
+ @font-face {
+ font-family: 'EuclidSquare';
+ src: local('Euclid Square'),
+ url('https://code.kx.com/fonts/EuclidSquare-Regular-WebS.woff2') format('woff2'),
+ url('https://code.kx.com/fonts/EuclidSquare-Regular-WebS.woff') format('woff');
+ font-weight: normal;
+ font-style: normal;
+ }
+ @font-face {
+ font-family: 'EuclidSquare';
+ src: local('Euclid Square'),
+ url('https://code.kx.com/fonts/EuclidSquare-RegularItalic-WebS.woff2') format('woff2'),
+ url('https://code.kx.com/fonts/EuclidSquare-RegularItalic-WebS.woff') format('woff');
+ font-weight: normal;
+ font-style: italic;
+ }
+ @font-face {
+ font-family: 'EuclidSquare';
+ src: local('Euclid Square'),
+ url('https://code.kx.com/fonts/EuclidSquare-Bold-WebS.woff2') format('woff2'),
+ url('https://code.kx.com/fonts/EuclidSquare-Bold-WebS.woff') format('woff');
+ font-weight: bold;
+ font-style: normal;
+ }
+
+ body, input,
+ article h1, article h2, article h3,
+ .wp-author, #wp-brand, .publications, .admonition-title,
+ article details>summary {
+ font-family: 'EuclidSquare', 'Segoe UI', -apple-system, Cantarell, Helvetica, Arial, sans-serif;
+ }
+
+ .md-typeset code,
+ .md-typeset pre {
+ font-family: 'Consolas', SFMono-Regular, Menlo, 'Droid Sans Mono', monospace;
+ }
+
+ .md-typeset a>code {
+ padding: 0 .15em;
+ }
+
+ /* stop floated blocks intruding into headings */
+ .md-typeset h2 {
+ clear: both;
+ }
+
+ /* undo font-size:85% code */
+ .md-typeset code{
+ font-size: 1em;
+ background-color: var(--md-code-bg-color);
+ color: var(--md-code-fg-color);
+ }
+
+
+ .md-typeset .admonition pre {
+ font-size: 1em;
+ }
+
+ .nowrap {
+ white-space: nowrap;
+ }
+
+ .nobullets ul {
+ list-style: none;
+ }
+
+ /* RHS TOC TITLE & NAV TREE TITLE - Euclid Square Bold UPPERCASE 9/12pt */
+ .md-nav--lifted>.md-nav__list>.md-nav__item--active>.md-nav__link,
+ .md-nav__title {
+ font-weight: 300;
+ letter-spacing: 0.2em;
+ opacity: 0.5;
+ padding-bottom: .8em;
+ text-transform: uppercase;
+ }
+
+ /* tweak Prism.js styles
+ --------------------------------------------- */
+ .md-typeset pre[class*="language-"] {
+ background-color: var(--md-code-bg-color);
+ color: var(--md-code-fg-color);
+ font-size: .8em;
+ }
+ .md-typeset pre code {
+ line-height: 1.6em;
+ }
+
+ .md-typeset pre[class*="language-syntax"] {
+ /*border: 1px solid rgba(0,0,0,0.2);*/
+ box-shadow: 2px 2px rgba(0,0,0,.1);
+ font-size: 1em;
+ font-weight: bold;
+ padding: 0.5em 1em;
+ }
+
+ .md-typeset pre[class*="language-q"],
+ .md-typeset pre[class*="language-txt"] {
+ background-color: var(--md-code-bg-color);
+ color: var(--md-code-fg-color);
+ }
+
+ /* less padding on code blocks */
+ .md-typeset pre > code {
+ padding: 0;
+ }
+
+ /* match Prism code blocks */
+ /*.md-typeset code {
+ background-color: #fdf6e3;
+ }
+ */.md-typeset .admonition code {
+ background-color: var(--md-code-bg-color);
+ }
+
+ .md-typeset .admonition pre[class*="language-"] {
+ padding: 0;
+ }
+ /* inline code elements */
+ .md-content code {
+ background-color: var(--md-code-bg-color);
+ color:var(--md-code-fg-color);
+ box-shadow: none;
+ }
+
+ /* Fixed-font block elements
+ ---------------------------------------------------------------------------- */
+ div.typewriter p {
+ background-color: var(--md-code-bg-color);
+ color: var(--md-code-fg-color); /* = Prism */
+ font-family: 'Consolas', SFMono-Regular, Menlo, 'Droid Sans Mono', monospace;
+ font-size: 80%;
+ font-weight: 500;
+ /*font-variant-ligatures: none;*/
+ font-feature-settings: "liga" 0;
+ margin: 0;
+ padding: 1em;
+ white-space: pre-wrap;
+ }
+ div.typewriter a {
+ background-color: var(--md-code-bg-color);
+ color: var(--md-typeset-a-color);
+ }
+ div.typewriter strong {
+ font-weight: 800;
+ }
+
+ .md-typeset table:not([class]) th {
+ /*font-style: italic;*/
+ font-weight: normal;
+ opacity: .4;
+ }
+
+ /* background of transparent images in Dark Mode */
+ .md-typeset p:not(.small-face) img[src$=".png"] {
+ background-color: var(--md-typeset-img-bg);
+ border-radius: 5px;
+ padding: 1em;
+ }
+
+ /* Small faces */
+ .md-typeset p.small-face {
+ float: left;
+ margin: 0 1em 0 0;
+ }
+ .md-typeset .small-face img {
+ border-radius: 35px;
+ width: 70px;
+ }
+
+ mark {
+ padding: 0 .2em;
+ }
+
+ /* No background in code block within admonition */
+ .md-typeset .admonition pre {
+ background-color: transparent;
+ margin-top: 0;
+ }
diff --git a/docs/stylesheets/prism-edited.css b/docs/stylesheets/prism-edited.css
new file mode 100644
index 0000000..2eb16d2
--- /dev/null
+++ b/docs/stylesheets/prism-edited.css
@@ -0,0 +1,322 @@
+/* PrismJS 1.23.0
+https://prismjs.com/download.html#themes=prism-solarizedlight&languages=markup+css+clike+javascript+apl+aspnet+bash+batch+c+csharp+cpp+docker+git+handlebars+http+ini+j+java+json+json5+markdown+markup-templating+powershell+protobuf+python+q+r+regex+shell-session+sql+yaml&plugins=treeview */
+/*
+ Solarized Color Schemes originally by Ethan Schoonover
+ http://ethanschoonover.com/solarized
+
+ Ported for PrismJS by Hector Matos
+ Website: https://krakendev.io
+ Twitter Handle: https://twitter.com/allonsykraken)
+*/
+
+/*
+SOLARIZED HEX
+--------- -------
+base03 #002b36
+base02 #073642
+base01 #586e75
+base00 #657b83
+base0 #839496
+base1 #93a1a1
+base2 #eee8d5
+base3 #fdf6e3
+yellow #b58900
+orange #cb4b16
+red #dc322f
+magenta #d33682
+violet #6c71c4
+blue #268bd2
+cyan #2aa198
+green #859900
+*/
+
+code[class*="language-"],
+pre[class*="language-"] {
+ color: var(--md-code-fg-color);
+ font-family: Consolas, Monaco, 'Andale Mono', 'Ubuntu Mono', monospace;
+ font-size: 1em;
+ text-align: left;
+ white-space: pre;
+ word-spacing: normal;
+ word-break: normal;
+ word-wrap: normal;
+
+ line-height: 1.5;
+
+ -moz-tab-size: 4;
+ -o-tab-size: 4;
+ tab-size: 4;
+
+ -webkit-hyphens: none;
+ -moz-hyphens: none;
+ -ms-hyphens: none;
+ hyphens: none;
+}
+
+pre[class*="language-"]::-moz-selection, pre[class*="language-"] ::-moz-selection,
+code[class*="language-"]::-moz-selection, code[class*="language-"] ::-moz-selection {
+ background: #073642; /* base02 */
+}
+
+pre[class*="language-"]::selection, pre[class*="language-"] ::selection,
+code[class*="language-"]::selection, code[class*="language-"] ::selection {
+ background: #073642; /* base02 */
+}
+
+/* Code blocks */
+pre[class*="language-"] {
+ padding: 1em;
+ margin: .5em 0;
+ overflow: auto;
+ border-radius: 0.3em;
+}
+
+:not(pre) > code[class*="language-"],
+pre[class*="language-"] {
+ background-color: var(--md-code-bg-color);
+ color: var(--md-code-fg-color);
+}
+
+/* Inline code */
+:not(pre) > code[class*="language-"] {
+ padding: .1em;
+ border-radius: .3em;
+}
+
+.token.comment,
+.token.prolog,
+.token.doctype,
+.token.cdata {
+ color: #93a1a1; /* base1 */
+}
+
+.token.punctuation {
+ color: #586e75; /* base01 */
+}
+
+.token.namespace {
+ opacity: .7;
+}
+
+.token.property,
+.token.tag,
+.token.boolean,
+.token.number,
+.token.constant,
+.token.symbol,
+.token.deleted {
+ color: #268bd2; /* blue */
+}
+
+.token.selector,
+.token.attr-name,
+.token.string,
+.token.char,
+.token.builtin,
+.token.url,
+.token.inserted {
+ color: var(--md-code-hl-string-color); /* cyan */
+}
+
+.token.entity {
+ color: #657b83; /* base00 */
+ background: #eee8d5; /* base2 */
+}
+
+.token.atrule,
+.token.attr-value,
+.token.keyword {
+ color: #859900; /* green */
+}
+
+.token.function,
+.token.class-name {
+ color: #b58900; /* yellow */
+}
+
+.token.regex,
+.token.important,
+.token.variable {
+ color: #cb4b16; /* orange */
+}
+
+.token.important,
+.token.bold {
+ font-weight: bold;
+}
+.token.italic {
+ font-style: italic;
+}
+
+.token.entity {
+ cursor: help;
+}
+
+.token.treeview-part .entry-line {
+ position: relative;
+ text-indent: -99em;
+ display: inline-block;
+ vertical-align: top;
+ width: 1.2em;
+}
+.token.treeview-part .entry-line:before,
+.token.treeview-part .line-h:after {
+ content: "";
+ position: absolute;
+ top: 0;
+ left: 50%;
+ width: 50%;
+ height: 100%;
+}
+.token.treeview-part .line-h:before,
+.token.treeview-part .line-v:before {
+ border-left: 1px solid #ccc;
+}
+.token.treeview-part .line-v-last:before {
+ height: 50%;
+ border-left: 1px solid #ccc;
+ border-bottom: 1px solid #ccc;
+}
+.token.treeview-part .line-h:after {
+ height: 50%;
+ border-bottom: 1px solid #ccc;
+}
+.token.treeview-part .entry-name {
+ position: relative;
+ display: inline-block;
+ vertical-align: top;
+}
+.token.treeview-part .entry-name.dotfile {
+ opacity: 0.5;
+}
+
+/* @GENERATED-FONT */
+@font-face {
+ font-family: "PrismTreeview";
+ /**
+ * This font is generated from the .svg files in the `icons` folder. See the `treeviewIconFont` function in
+ * `gulpfile.js/index.js` for more information.
+ *
+ * Use the following escape sequences to refer to a specific icon:
+ *
+ * - \ea01 file
+ * - \ea02 folder
+ * - \ea03 image
+ * - \ea04 audio
+ * - \ea05 video
+ * - \ea06 text
+ * - \ea07 code
+ * - \ea08 archive
+ * - \ea09 pdf
+ * - \ea0a excel
+ * - \ea0b powerpoint
+ * - \ea0c word
+ */
+ src: url("data:application/font-woff;base64,d09GRgABAAAAAAgYAAsAAAAAEGAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAABHU1VCAAABCAAAADsAAABUIIslek9TLzIAAAFEAAAAPwAAAFY1UkH9Y21hcAAAAYQAAAB/AAACCtvO7yxnbHlmAAACBAAAA+MAAAlACm1VqmhlYWQAAAXoAAAAKgAAADZfxj5jaGhlYQAABhQAAAAYAAAAJAFbAMFobXR4AAAGLAAAAA4AAAA0CGQAAGxvY2EAAAY8AAAAHAAAABwM9A9CbWF4cAAABlgAAAAfAAAAIAEgAHZuYW1lAAAGeAAAATcAAAJSfUrk+HBvc3QAAAewAAAAZgAAAIka0DSfeJxjYGRgYOBiMGCwY2BycfMJYeDLSSzJY5BiYGGAAJA8MpsxJzM9kYEDxgPKsYBpDiBmg4gCACY7BUgAeJxjYGRYyjiBgZWBgaGQoRZISkLpUAYOBj0GBiYGVmYGrCAgzTWFweEV4ysehs1ArgDDFgZGIA3CDAB2tQjAAHic7ZHLEcMwCESfLCz/VEoKSEE5parURxMOC4c0Ec283WGFdABgBXrwCAzam4bOK9KWeefM3Hhmjyn3ed+hTRq1pS7Ra/HjYGPniHcXMy4G/zNTP7/KW5HTXArkvdBW3ArN19dCG/NRIN8K5HuB/CiQn4U26VeBfBbML9NEH78AeJyVVc1u20YQ3pn905JcSgr/YsuSDTEg3cR1bFEkYyS1HQcQ2jQF2hot6vYSoECKnnPLA/SWUy9NTr31Bfp+6azsNI0SGiolzu7ODnfn+2Z2lnHG3rxhr9nfLGKbLGesncAYYnUHpsVnMG/uwyzNdFIVd6HI6twp8+R3LpT4TSglLoTHwwJgG2/dFvKrl9yI507/p5CCq4LTxB/PlPjkFaMHnWB/0S9je7RTPS+utnGtom1T2q5pk/e3H0M1S18rsXAL7wgpxQuhAmteGGvNjmcfGXuwnFNOPCXxeOGmnjrBLWNyBeNtVq2Hs03yus1aPS3mzSyNVSfu588iW1Q93x/4fjcHn+5EkS2tMxr4xIRa8ese+4L9uKZnxEqs8+ldyN9atU02a5t5uQ8hZGms1QTKpaKYqnipiNNOAIeIADC0JNEOYY+jtSgFoOchiAjRGFACpUTRje8bwIYWGCDEgENY8MEu9bnCYCdAxftoNg0KiSpUtPaHcanYwzXRu6T4r40b5npal3V7UHWCPJW9niyl1vIHgoujEXZjudBkeWkOeMQBRmbEPhKzij1i52t6/TadL+3q7H0U1eq4E8cG4gIIwQLx8VX7ToPXgPrehVc5QXHR7gMSmwjKfaYAP4KvZV+yn9bE18y2IY37LvtyrSg3i7ZK++B603ndlg/gBJpZRsfpBI6hyiaQ6FjlnThz8lAC3LgBIMnXDOAXxBQ4SIgiEhx2AcGCAwAhwjXRpCQms42bwAUt75BvAwgONzdgOfWEwzk4Ylzj4mz+5YEzzXzWX9aNlk7ot65y5QnBHsNlm6zDTu7sspRqG4V+fgJ1lVBZ07Nm7s5nemo3Lf3PO7iwtnroQ5/YDGwPRUip6fV6L+27p+wCHwSvPs85UnHqId8NAn5IBsKdv95KrL9m31Gsf2a/rluDslk1y1J9GE+LUmmVT/OyOHaFKGnapt2H5XeJTmKd6qYNoVVZOy+pWzr7rMip3ndG/4mQSoUcMbAqG/YNIAdXhkAqTVruXhocSKN0iS4Rwj7vSS4fcF/La07BfeQSuRAcFeW+9igjwPhhYPpGCBCBHhxiKMyFMFT7ziRH7RtfIWdiha+TdW+Rqs7bLHdN2ZJIKl0um0x3op9saYr0REeRdj09pl43pMzz4tjztrY8L4o8bzT+oLY27PR/eFtXs/YY5vtwB5Iqad14eYN0ujveMaGWqkdU3TKbQSC5Uvxaf4fA7SAQ3r2tEfIhd4duld91bwMisjqBw22orthNcroXl7KqO1329HBgAexgoCfGAwiDPoBnriki3lmNojrzvD0tjo6E3vPYP6E2BMIAeJxjYGRgYADiY8t3FsTz23xl4GbYzIAB/v9nWM6wBcjgYGAC8QH+QQhZAAB4nGNgZGBg2MzAACeXMzAyoAJeADPyAh14nGNgAILNpGEA0fgIZQAAAAAAAAA2AHIAvgE+AZgCCAKMAv4DlgPsBEYEoHicY2BkYGDgZchi4GQAASYg5gJCBob/YD4DABTSAZcAeJx9kU1uwjAQhV/4qwpqhdSqi67cTTeVEmBXDgBbhBD7AHYISuLUMSD2PUdP0HNwjp6i676k3qQS9Ujjb968mYUNoI8zPJTHw02Vy9PAFatfbpLuHbfIT47b6MF33KH+6riLF0wc93CHN27wWtdUHvHuuIFbfDhuUv903CKfHbfxgC/HHerfjrtYen3HPTx7ambiIl0YKQ+xPM5ltE9CU9NqxVKaItaZGPqDmj6VmTShlRuxOoniEI2sVUIZnYqJzqxMEi1yo3dybf2ttfk4CJTT/bVOMYNBjAIpFiTJOLCWOGLOHGGPBCE7l32XO0tmw04MjQwCQ7774B//lDmrZkJY3hvOrHBiLuiJMKJqoVgrejQ3CP5Yubt0JwxNJa96Oypr6j621VSOMQKG+uP36eKmHylcb0MAeJxtwdEOgjAMBdBeWEFR/Mdl7bTJtMsygc/nwVfPoYF+QP+tGDAigDFhxgVXLLjhjhUPCtmKTtmLaGN7x6dy/Io5bybqoevRQ3LRObb0sk3HKpn1SFqW6ru26vbpYfcmRCccJhqsAAA=")
+ format("woff");
+}
+
+.token.treeview-part .entry-name:before {
+ content: "\ea01";
+ font-family: "PrismTreeview";
+ font-size: inherit;
+ font-style: normal;
+ -webkit-font-smoothing: antialiased;
+ -moz-osx-font-smoothing: grayscale;
+ width: 2.5ex;
+ display: inline-block;
+}
+
+.token.treeview-part .entry-name.dir:before {
+ content: "\ea02";
+}
+.token.treeview-part .entry-name.ext-bmp:before,
+.token.treeview-part .entry-name.ext-eps:before,
+.token.treeview-part .entry-name.ext-gif:before,
+.token.treeview-part .entry-name.ext-jpe:before,
+.token.treeview-part .entry-name.ext-jpg:before,
+.token.treeview-part .entry-name.ext-jpeg:before,
+.token.treeview-part .entry-name.ext-png:before,
+.token.treeview-part .entry-name.ext-svg:before,
+.token.treeview-part .entry-name.ext-tiff:before {
+ content: "\ea03";
+}
+.token.treeview-part .entry-name.ext-cfg:before,
+.token.treeview-part .entry-name.ext-conf:before,
+.token.treeview-part .entry-name.ext-config:before,
+.token.treeview-part .entry-name.ext-csv:before,
+.token.treeview-part .entry-name.ext-ini:before,
+.token.treeview-part .entry-name.ext-log:before,
+.token.treeview-part .entry-name.ext-md:before,
+.token.treeview-part .entry-name.ext-nfo:before,
+.token.treeview-part .entry-name.ext-txt:before {
+ content: "\ea06";
+}
+.token.treeview-part .entry-name.ext-asp:before,
+.token.treeview-part .entry-name.ext-aspx:before,
+.token.treeview-part .entry-name.ext-c:before,
+.token.treeview-part .entry-name.ext-cc:before,
+.token.treeview-part .entry-name.ext-cpp:before,
+.token.treeview-part .entry-name.ext-cs:before,
+.token.treeview-part .entry-name.ext-css:before,
+.token.treeview-part .entry-name.ext-h:before,
+.token.treeview-part .entry-name.ext-hh:before,
+.token.treeview-part .entry-name.ext-htm:before,
+.token.treeview-part .entry-name.ext-html:before,
+.token.treeview-part .entry-name.ext-jav:before,
+.token.treeview-part .entry-name.ext-java:before,
+.token.treeview-part .entry-name.ext-js:before,
+.token.treeview-part .entry-name.ext-php:before,
+.token.treeview-part .entry-name.ext-rb:before,
+.token.treeview-part .entry-name.ext-xml:before {
+ content: "\ea07";
+}
+.token.treeview-part .entry-name.ext-7z:before,
+.token.treeview-part .entry-name.ext-bz:before,
+.token.treeview-part .entry-name.ext-bz2:before,
+.token.treeview-part .entry-name.ext-gz:before,
+.token.treeview-part .entry-name.ext-rar:before,
+.token.treeview-part .entry-name.ext-tar:before,
+.token.treeview-part .entry-name.ext-tgz:before,
+.token.treeview-part .entry-name.ext-zip:before {
+ content: "\ea08";
+}
+.token.treeview-part .entry-name.ext-aac:before,
+.token.treeview-part .entry-name.ext-au:before,
+.token.treeview-part .entry-name.ext-cda:before,
+.token.treeview-part .entry-name.ext-flac:before,
+.token.treeview-part .entry-name.ext-mp3:before,
+.token.treeview-part .entry-name.ext-oga:before,
+.token.treeview-part .entry-name.ext-ogg:before,
+.token.treeview-part .entry-name.ext-wav:before,
+.token.treeview-part .entry-name.ext-wma:before {
+ content: "\ea04";
+}
+.token.treeview-part .entry-name.ext-avi:before,
+.token.treeview-part .entry-name.ext-flv:before,
+.token.treeview-part .entry-name.ext-mkv:before,
+.token.treeview-part .entry-name.ext-mov:before,
+.token.treeview-part .entry-name.ext-mp4:before,
+.token.treeview-part .entry-name.ext-mpeg:before,
+.token.treeview-part .entry-name.ext-mpg:before,
+.token.treeview-part .entry-name.ext-ogv:before,
+.token.treeview-part .entry-name.ext-webm:before {
+ content: "\ea05";
+}
+.token.treeview-part .entry-name.ext-pdf:before {
+ content: "\ea09";
+}
+.token.treeview-part .entry-name.ext-xls:before,
+.token.treeview-part .entry-name.ext-xlsx:before {
+ content: "\ea0a";
+}
+.token.treeview-part .entry-name.ext-doc:before,
+.token.treeview-part .entry-name.ext-docm:before,
+.token.treeview-part .entry-name.ext-docx:before {
+ content: "\ea0c";
+}
+.token.treeview-part .entry-name.ext-pps:before,
+.token.treeview-part .entry-name.ext-ppt:before,
+.token.treeview-part .entry-name.ext-pptx:before {
+ content: "\ea0b";
+}
diff --git a/docs/upgrades/2030.md b/docs/upgrades/2030.md
new file mode 100644
index 0000000..39a0d98
--- /dev/null
+++ b/docs/upgrades/2030.md
@@ -0,0 +1,141 @@
+---
+title: Upgrade PyKX from 2.5.* to 3.*
+description: How to upgrade from PyKX 2.5.* to 3.*
+date: October 2024
+author: KX Systems, Inc.,
+tags: PyKX, upgrade, remote Python execution
+---
+
+# Upgrade from PyKX 2.5.* to 3.*
+_This page outlines key differences when upgrading PyKX versions from 2.5.* to 3.*._
+
+## API Changes
+
+### Remote Python execution
+
+- Remote Python Execution is no longer a Beta feature. To use this feature, remove the setting of the `#!python PYKX_BETA_FEATURES` environment variable.
+
+- Additional required dependencies for this feature are now part of the required dependencies.
+
+ === "Previous behaviour"
+
+ ```bash
+ pip install pykx[beta]
+ ```
+
+ === "New behaviour"
+
+ ```bash
+ pip install pykx
+ ```
+
+- Generation of a remote session which can be used previously was a two-step process:
+
+ 1. Initialize the session object
+ 1. Create the session
+
+ This changed to a single function call.
+
+ === "Previous behaviour"
+
+ ```python
+ >>> import pykx as kx
+ >>> session = kx.remote.session()
+ >>> session.create(host='localhost', port=5050)
+ ```
+
+ === "New behaviour"
+
+ ```python
+ >>> import pykx as kx
+ >>> session = kx.remote.session(host='localhost', port=5050)
+ ```
+
+- How users specify the Python libraries which should be available on remote processes has changed:
+
+ - Previously this was done using a function call to `#!python session.add_library`. This function would specify the libraries to be loaded on first execution of the function and expected the names of the libraries to be loaded as a list of arguments.
+ - Now you can use the keyword `#!python libraries` at session creation to load the libraries. Also, the library addition function is now called `session.libraries` to match the API for streaming with PyKX. Finally the `#!python libraries` keyword and function take a dictionary mapping the aliased name for the library to the library which is to be imported, namely `#!python import numpy as np` would be defined as `#!python {'np': 'numpy'}`.
+
+ === "Previous Behaviour"
+
+ ```python
+ >>> import pykx as kx
+ >>> session = kx.remote.session()
+ >>> session.create(host='localhost', port=5050)
+ >>> session.add_library('numpy', 'pykx')
+ ```
+
+ === "New Behaviour"
+
+ ```python
+ >>> import pykx as kx
+ # Initialise libraries at session creation
+ >>> session = kx.remote.session(port=5050, libraries = {'kx': 'pykx', 'np': 'numpy'})
+
+ # Add Libraries after session creation
+ >>> session = kx.remote.session(port=5050)
+ >>> session.libraries({'kx': 'pykx', 'np': 'numpy'})
+ ```
+
+- The `#!python clear` method provided for `#!python session` objects is now called `#!python close`. This change aligns the naming with IPC communication channels being 'closed' when stopping communication with a remote session and aligns with the naming used within the IPC module
+
+ === "Previous Behaviour"
+
+ ```python
+ >>> import pykx as kx
+ >>> session = kx.remote.session()
+ >>> session.create(host='localhost', port=5050)
+ >>> session.clear()
+ ```
+
+ === "New Behaviour"
+
+ ```python
+ >>> import pykx as kx
+ >>> session = kx.remote.session(host='localhost', port=5050)
+ >>> session.close()
+ ```
+
+## Deprecations
+
+- The following table outlines environment variables/configuration options which are now fully deprecated and the updated name for these values if they exist.
+
+ | **Deprecated option** | **Supported option** |
+ | :----------------------- | :---------------------- |
+ | `PYKX_NO_SIGINT` | `PYKX_NO_SIGNAL` |
+ | `IGNORE_QHOME` | `PYKX_IGNORE_QHOME` |
+ | `KEEP_LOCAL_TIMES` | `PYKX_KEEP_LOCAL_TIMES` |
+ | `SKIP_UNDERQ` | `PYKX_SKIP_UNDERQ` |
+ | `UNDER_PYTHON` | `PYKX_UNDER_PYTHON` |
+ | `UNSET_PYKX_GLOBALS` | No longer applicable |
+ | `PYKX_UNSET_GLOBALS` | No longer applicable |
+ | `PYKX_ENABLE_PANDAS_API` | No longer applicable |
+
+- Removal of the now deprecated `#!python modify` keyword for `#!python select`, `#!python exec`, `#!python update` and `#!python delete` operations on `#!python pykx.Table` and `#!python pykx.KeyedTable`. This has been permanently changed to be use `#!python inplace`.
+- Removal of the deprecated `#!python replace_self` keyword when attempting to overwrite a `#!python pykx.Table` or `#!python KeyedTable` using insert/upsert functionality. To maintain this behaviour use the `#python inplace` keyword.
+
+## Error message changes
+
+Various `#!python pykx.QError` error messages now provide more verbose explanations for users. Any code which relies on specific error string returns may need to be updated, some messages below are truncated for display purposes.
+
+| **Previous error message** | **Updated error message** |
+| :--------------------------- | :-------------------------------------------------------------------------------- |
+| `access` | `access: Failed to connect to server with invalid username/password` |
+| `par` | `par: Cannot execute an unsupported operation on a partitioned table or its ...` |
+| `splay` | `splay: Cannot execute an unsupported operation on a splayed table` |
+| `assign` | `assign: Cannot redefine reserved q word` |
+| `insert` | `insert: Cannot insert a record with an existing key into a keyed table` |
+| `s-fail` | `s-fail: Cannot set "sorted" attribute on an unsorted list ...` |
+| `u-fail` | `u-fail: Failed to do one of the following: ...` |
+| `no-update` | `noupdate: Cannot update a global variable while using: ...` |
+| `no-socket` | `nosocket: Cannot open or use a socket on a thread other than main. ...` |
+
+## Null and Infinite conversion changes
+
+PyKX previously left some null and infinite values unconverted, now these are converted to native Python objects.
+The behaviour of Atom and Vector conversions has also been updated to more closely match each other.
+
+The links below outline the full before and after behaviour.
+
+- [Null Conversions](../user-guide/fundamentals/nulls_and_infinities.md#null-conversions).
+- [Infinite Conversions](../user-guide/fundamentals/nulls_and_infinities.md#infinite-conversions).
diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb
index 8ab8c18..73b9836 100644
--- a/docs/user-guide/advanced/Pandas_API.ipynb
+++ b/docs/user-guide/advanced/Pandas_API.ipynb
@@ -5,12 +5,125 @@
"id": "d2a3ccf7",
"metadata": {},
"source": [
- "# Pandas API\n",
- "The purpose of this notebook is to provide a demonstration of the capabilities of the pandas like API for PyKX Table objects.\n",
- "\n",
- "To follow along please download this notebook using the following link.\n",
- "\n",
- "This demonstration will outline the following\n",
+ "# Pandas Like API for PyKX Tables\n",
+ "\n",
+ "_This page demonstrates the PyKX functionality that aligns with the Pandas API for DataFrame interactions._\n",
+ "\n",
+ "Only operations on PyKX tables that adhere to Pandas API conventions are covered. The focus is on areas where PyKX/q can offer a performance advantage over Pandas, particularly in terms of memory footprint and execution time. \n",
+ "\n",
+ "A full breakdown of the the available functionality and examples of its use can be found in the [Pandas API](#pandas-api) section below.\n",
+ "\n",
+ "## Covered sections of the Pandas API\n",
+ "\n",
+ "In this context, _coverage_ refers to the functionality provided by the PyKX API for Tables, which matches the methods and attributes supported by the Pandas DataFrame API. It does not include functionality for interacting with Pandas Series objects or for reading/writing CSV/JSON files.\n",
+ "\n",
+ "If there's any functionality you would like us to add to this library, please open an issue [here](https://github.com/KxSystems/pykx/issues) or open a pull request [here](https://github.com/KxSystems/pykx/pulls).\n",
+ "\n",
+ "### Property/metadata type information\n",
+ "\n",
+ "| **DataFrame properties**| **PyKX supported?** | **PyKX API documentation link** | \n",
+ "|----------------------|-----------------|-----------------------------|\n",
+ "| [columns](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.columns.html) | :material-check: | [link](Pandas_API.ipynb#tablecolumns) | \n",
+ "| [dtypes](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.dtypes.html) | :material-check: | [link](https://code.kx.com/pykx/2.2/user-guide/advanced/Pandas_API.html#tabledtypes) |\n",
+ "| [empty](https://https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.empty.html) | :material-check: | [link](https://code.kx.com/pykx/2.2/user-guide/advanced/Pandas_API.html#tableempty) |\n",
+ "| [ndim](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.ndim.html) | :material-check: | [link](https://code.kx.com/pykx/2.2/user-guide/advanced/Pandas_API.html#tablendim) |\n",
+ "| [shape](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.shape.html) | :material-check: | [link](https://code.kx.com/pykx/2.2/user-guide/advanced/Pandas_API.html#tableshape) |\n",
+ "| [size](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.size.html) | :material-check: | [link](https://code.kx.com/pykx/2.2/user-guide/advanced/Pandas_API.html#tablesize) |\n",
+ "\n",
+ "### Analytic functionality\n",
+ "\n",
+ "| **DataFrame method** | **PyKX supported?** | **PyKX API documentation link** |\n",
+ "|----------------------|-----------------|-----------------------------|\n",
+ "| [abs](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.abs.html) | :material-check: | [link](Pandas_API.ipynb#tableabs) |\n",
+ "| [agg](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.agg.html) | :material-check: | [link](Pandas_API.ipynb#tableagg) |\n",
+ "| [apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html) | :material-check: | [link](Pandas_API.ipynb#tableapply) |\n",
+ "| [applymap](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.applymap.html) | :material-check: | [link](Pandas_API.ipynb#tableapplymap) |\n",
+ "| [groupby](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html) | :material-check: | [link](Pandas_API.ipynb#tablegroupby) |\n",
+ "| [idxmax](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.idxmax.html) | :material-check: | [link](Pandas_API.ipynb#tableidxmax) |\n",
+ "| [idxmin](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.idxmin.html) | :material-check: | [link](Pandas_API.ipynb#tableidxmin) |\n",
+ "| [kurt](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.kurt.html) | :material-check: | [link](Pandas_API.ipynb#tablekurt) |\n",
+ "| [max](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.max.html) | :material-check: | [link](Pandas_API.ipynb#tablemax) |\n",
+ "| [map](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.map.html) | :material-check: | [link](Pandas_API.ipynb#tablemap) |\n",
+ "| [mean](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.mean.html) | :material-check: | [link](Pandas_API.ipynb#tablemean) |\n",
+ "| [median](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.median.html) | :material-check: | [link](Pandas_API.ipynb#tablemedian) |\n",
+ "| [min](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.min.html) | :material-check: | [link](Pandas_API.ipynb#tablemin) |\n",
+ "| [mode](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.mode.html) | :material-check: | [link](Pandas_API.ipynb#tablemode) |\n",
+ "| [sem](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sem.html) | :material-check: | [link](Pandas_API.ipynb#tablesem) |\n",
+ "| [sum](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sum.html) | :material-check: | [link](Pandas_API.ipynb#tablesum) |\n",
+ "| [skew](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.skew.html) | :material-check: | [link](Pandas_API.ipynb#tableskew) |\n",
+ "| [std](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.std.html) | :material-check: | [link](Pandas_API.ipynb#tablestd) |\n",
+ "| [prod](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.prod.html) | :material-check: | [link](Pandas_API.ipynb#tableprod) |\n",
+ "\n",
+ "### Querying and data interrogation\n",
+ "\n",
+ "| **DataFrame method** | **PyKX supported?** | **PyKX API documentation link** |\n",
+ "|----------------------|-----------------|-----------------------------|\n",
+ "| [all](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.all.html) | :material-check: | [link](Pandas_API.ipynb#tableall) |\n",
+ "| [any](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.any.html) | :material-check: | [link](Pandas_API.ipynb#tableany) |\n",
+ "| [at](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.at.html) | :material-check: | [link](Pandas_API.ipynb#tableat) |\n",
+ "| [count](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.count.html) | :material-check: | [link](Pandas_API.ipynb#tablecount) |\n",
+ "| [get](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.get.html) | :material-check: | [link](Pandas_API.ipynb#tableget) |\n",
+ "| [head](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.head.html) | :material-check: | [link](Pandas_API.ipynb#tablehead) |\n",
+ "| [iloc](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.iloc.html) | :material-check: | [link](Pandas_API.ipynb#tableiloc) |\n",
+ "| [isna](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.isna.html) | :material-check: | [link](Pandas_API.ipynb#tableisna) |\n",
+ "| [isnull](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.isnull.html) | :material-check: | [link](Pandas_API.ipynb#tableisnull) |\n",
+ "| [loc](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.loc.html) | :material-check: | [link](Pandas_API.ipynb#tableloc) |\n",
+ "| [notna](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.notna.html) | :material-check: | [link](Pandas_API.ipynb#tablenotna) |\n",
+ "| [notnull](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.notnull.html) | :material-check: | [link](Pandas_API.ipynb#tablenotnull) |\n",
+ "| [sample](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sample.html) | :material-check: | [link](Pandas_API.ipynb#tablesample) |\n",
+ "| [select_dtypes](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.select_dtypes.html) | :material-check: | [link](Pandas_API.ipynb#tableselect_dtypes) |\n",
+ "| [tail](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.tail.html) | :material-check: | [link](Pandas_API.ipynb#tabletail) |\n",
+ "\n",
+ "### Data preprocessing\n",
+ "\n",
+ "| **DataFrame method** | **PyKX supported?** | **PyKX API documentation link** |\n",
+ "|----------------------|-----------------|-----------------------------|\n",
+ "| [add_prefix](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.add_prefix.html) | :material-check: | [link](Pandas_API.ipynb#tableas_prefix) |\n",
+ "| [add_suffix](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.add_suffix.html) | :material-check: | [link](Pandas_API.ipynb#tableas_suffix) |\n",
+ "| [astype](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.astype.html) | :material-check: | [link](Pandas_API.ipynb#tableastype) |\n",
+ "| [drop](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop.html) | :material-check: | [link](Pandas_API.ipynb#tabledrop) |\n",
+ "| [drop_duplicates](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop_duplicates.html) | :material-check: | [link](Pandas_API.ipynb#tabledrop_duplicates) |\n",
+ "| [pop](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.pop.html) | :material-check: | [link](Pandas_API.ipynb#tablepop) |\n",
+ "| [rename](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rename.html) | :material-check: | [link](Pandas_API.ipynb#tablerename) |\n",
+ "| [reset_index](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.reset_index.html) | :material-check: | [link](Pandas_API.ipynb#tablereset_index) |\n",
+ "| [set_index](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.set_index.html) | :material-check: | [link](Pandas_API.ipynb#tableset_index) |\n",
+ "\n",
+ "### Data joins/merge\n",
+ "\n",
+ "| **DataFrame method** | **PyKX supported?** | **PyKX API documentation link** |\n",
+ "|----------------------|-----------------|-----------------------------|\n",
+ "| [merge](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html) | :material-check: | [link](Pandas_API.ipynb#tablemerge) |\n",
+ "| [merge_asof](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge_asof.html) | :material-check: | [link](Pandas_API.ipynb#tablemerge_asof) |\n",
+ "\n",
+ "### Data sorting\n",
+ "\n",
+ "| **DataFrame method** | **PyKX supported?** | **PyKX API documentation link** | \n",
+ "|----------------------|-----------------|-----------------------------|\n",
+ "| [sort_values](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sort_values.html) | :material-check: | [link](Pandas_API.ipynb#tablesort_values) |\n",
+ "| [nlargest](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.nlargest.html) | :material-check: | [link](Pandas_API.ipynb#tablenlargest) |\n",
+ "| [nsmallest](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.nsmallest.html) | :material-check: | [link](Pandas_API.ipynb#tablensmallest) |\n",
+ "\n",
+ "### Unsupported functionality\n",
+ "\n",
+ "| **DataFrame methods** | **PyKX supported?** | **Additional information** |\n",
+ "|----------------------|------------------|------------------------|\n",
+ "| `*from*` | :material-close: | Functionality for the creation of PyKX Tables from alternative data sources is not supported at this time. |\n",
+ "| `*plot*` | :material-close: | Functionality for the plotting of columns/tables is not supported at this time. |\n",
+ "| `*sparse*` | :material-close: | Sparse data like interactions presently not supported. |\n",
+ "| `to_*` | :material-close: | Functionality for the conversion/persistence of PyKX Tables to other formats is not supported at this time. |\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "67b53fea",
+ "metadata": {},
+ "source": [
+ "## Pandas API\n",
+ "_This notebook provides a demonstration of the capabilities of the pandas-like API for PyKX Table objects._\n",
+ "\n",
+ "To follow along, download this notebook.\n",
+ "\n",
+ "This demonstration outlines the following:\n",
"\n",
"1. [Constructing Tables](#Constructing-Tables)\n",
"2. [Metadata](#Metadata)\n",
@@ -22,7 +135,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"id": "13267c00",
"metadata": {
"tags": [
@@ -32,13 +145,13 @@
"outputs": [],
"source": [
"import os\n",
- "os.environ['IGNORE_QHOME'] = '1' # Ignore symlinking PyKX q libraries to QHOME \n",
+ "os.environ['PYKX_IGNORE_QHOME'] = '1' # Ignore symlinking PyKX q libraries to QHOME\n",
"os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation."
]
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"id": "44c90043",
"metadata": {},
"outputs": [],
@@ -54,7 +167,7 @@
"id": "06e3f624",
"metadata": {},
"source": [
- "## Constructing Tables"
+ "### Constructing Tables"
]
},
{
@@ -62,9 +175,9 @@
"id": "31561309",
"metadata": {},
"source": [
- "### Table\n",
+ "#### Table\n",
"\n",
- "Create a table from a list of rows or by converting a Python dictionary object\n",
+ "Create a table from a list of rows or by converting a Python dictionary object:\n",
"\n",
"**Parameters:**\n",
"\n",
@@ -100,7 +213,7 @@
"id": "273de502",
"metadata": {},
"source": [
- "Create a Table from an array like object."
+ "Create a table from an array-like object."
]
},
{
@@ -118,7 +231,7 @@
"id": "51d82353",
"metadata": {},
"source": [
- "Create a Table from an array like object and provide names for the columns to use."
+ "Create a table from an array-like object and provide names for the columns to use."
]
},
{
@@ -136,9 +249,9 @@
"id": "36edf1de",
"metadata": {},
"source": [
- "### Keyed Table\n",
+ "#### Keyed Table\n",
"\n",
- "Create a keyed table from a list of rows or by converting a Python dictionary object\n",
+ "Create a keyed table from a list of rows or by converting a Python dictionary object:\n",
"\n",
"**Parameters:**\n",
"\n",
@@ -175,7 +288,7 @@
"id": "1a2f9b56",
"metadata": {},
"source": [
- "Create a keyed table from a list of rows."
+ "Create a keyed table from a list of rows:"
]
},
{
@@ -193,7 +306,7 @@
"id": "804183ed",
"metadata": {},
"source": [
- "Create a keyed table from a list of rows and provide names for the resulting columns."
+ "Create a keyed table from a list of rows and provide names for the resulting columns:"
]
},
{
@@ -211,7 +324,7 @@
"id": "b91e990b",
"metadata": {},
"source": [
- "Create a keyed table with a specified index column."
+ "Create a keyed table with a specified index column:"
]
},
{
@@ -229,7 +342,7 @@
"id": "f1f43263",
"metadata": {},
"source": [
- "## Metadata"
+ "### Metadata"
]
},
{
@@ -254,9 +367,9 @@
"id": "c2122f58",
"metadata": {},
"source": [
- "### Table.columns\n",
+ "#### Table.columns\n",
"\n",
- "Get the name of each column in the table"
+ "Get the name of each column in the table:"
]
},
{
@@ -274,9 +387,9 @@
"id": "fc006fd7",
"metadata": {},
"source": [
- "### Table.dtypes\n",
+ "#### Table.dtypes\n",
"\n",
- "Get the datatypes of the table columns"
+ "Get the datatypes of the table columns:"
]
},
{
@@ -286,7 +399,7 @@
"metadata": {},
"outputs": [],
"source": [
- "tab.dtypes"
+ "print(tab.dtypes)"
]
},
{
@@ -294,9 +407,9 @@
"id": "5b4d25bf",
"metadata": {},
"source": [
- "### Table.empty\n",
+ "#### Table.empty\n",
"\n",
- "Returns True if the table is empty otherwise returns False."
+ "Returns `True` if the table is empty otherwise returns `False`."
]
},
{
@@ -314,9 +427,9 @@
"id": "550c1126",
"metadata": {},
"source": [
- "### Table.ndim\n",
+ "#### Table.ndim\n",
"\n",
- "Get the nuber of columns within the table."
+ "Get the number of columns within the table:"
]
},
{
@@ -334,9 +447,9 @@
"id": "f479bdcc",
"metadata": {},
"source": [
- "### Table.shape\n",
+ "#### Table.shape\n",
"\n",
- "Get the shape of the table as a tuple (number of rows, number of columns)."
+ "Get the shape of the table as a tuple (number of rows, number of columns):"
]
},
{
@@ -354,9 +467,9 @@
"id": "42bc2bc3",
"metadata": {},
"source": [
- "### Table.size\n",
+ "#### Table.size\n",
"\n",
- "Get the number of values in the table (rows * cols)."
+ "Get the number of values in the table (rows * cols):"
]
},
{
@@ -374,7 +487,7 @@
"id": "1439bde3",
"metadata": {},
"source": [
- "## Querying and Data Interrogation"
+ "### Querying and Data Interrogation"
]
},
{
@@ -400,7 +513,7 @@
"id": "d356c82f",
"metadata": {},
"source": [
- "### Table.all()\n",
+ "#### Table.all()\n",
"\n",
"```\n",
"Table.all(axis=0, bool_only=False, skipna=True)\n",
@@ -438,7 +551,7 @@
"id": "e9c11a2e",
"metadata": {},
"source": [
- "### Table.any()\n",
+ "#### Table.any()\n",
"\n",
"```\n",
"Table.any(axis=0, bool_only=False, skipna=True)\n",
@@ -476,7 +589,7 @@
"id": "cb69b61a",
"metadata": {},
"source": [
- "### Table.at[]\n",
+ "#### Table.at[]\n",
"\n",
"```\n",
"Table.at[row, col]\n",
@@ -533,7 +646,7 @@
"id": "903c0aac",
"metadata": {},
"source": [
- "### Table.get()\n",
+ "#### Table.get()\n",
"\n",
"```\n",
"Table.get(key, default=None)\n",
@@ -569,50 +682,46 @@
"cell_type": "code",
"execution_count": null,
"id": "7809ac4a",
- "metadata": {
- "scrolled": true
- },
+ "metadata": {},
"outputs": [],
"source": [
- "tab.get('y')"
+ "tab.get(['y'])"
]
},
{
"cell_type": "markdown",
- "id": "2ddd9659",
+ "id": "fe447098-3192-4d1b-9e6a-fa97740ec6fd",
"metadata": {},
"source": [
- "Get the `y` and `z` columns from the table."
+ "Get the `z` column from the table as a vector."
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "78c9f224",
- "metadata": {
- "scrolled": true
- },
+ "id": "4c6199da-8516-42a7-afa2-1640bf7f41b8",
+ "metadata": {},
"outputs": [],
"source": [
- "tab.get(['y', 'z'])"
+ "tab.get('z')"
]
},
{
"cell_type": "markdown",
- "id": "379219ef",
+ "id": "2ddd9659",
"metadata": {},
"source": [
- "Attempt to get the `q` column from the table and receive none as that column does not exist."
+ "Get the `y` and `z` columns from the table."
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "010d9d98",
+ "id": "78c9f224",
"metadata": {},
"outputs": [],
"source": [
- "print(tab.get('q'))"
+ "tab.get(['y', 'z'])"
]
},
{
@@ -620,7 +729,7 @@
"id": "3ee99633",
"metadata": {},
"source": [
- "Attempt to get the `q` column from the table and receive the default value `not found` as that column does not exist."
+ "Attempt to get the `q` column from the table and receive the `default` value `not found` as that column does not exist. Will error if `default` value is not set."
]
},
{
@@ -630,7 +739,7 @@
"metadata": {},
"outputs": [],
"source": [
- "tab.get('q', 'not found')"
+ "tab.get(['q'], 'not found')"
]
},
{
@@ -638,7 +747,7 @@
"id": "34016a3f",
"metadata": {},
"source": [
- "### Table.head()\n",
+ "#### Table.head()\n",
"\n",
"```\n",
"Table.head(n=5)\n",
@@ -702,7 +811,7 @@
"id": "5e21bef1",
"metadata": {},
"source": [
- "### Table.isna()\n",
+ "#### Table.isna()\n",
"\n",
"```\n",
"Table.isna()\n",
@@ -735,21 +844,21 @@
]
},
{
- "cell_type": "code",
- "execution_count": null,
- "id": "d8ff16e1",
- "metadata": {},
- "outputs": [],
- "source": [
- "tabDemo.isna()"
- ]
- },
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d15f0f98",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tabDemo.isna()"
+ ]
+ },
{
"cell_type": "markdown",
"id": "47d20b00",
"metadata": {},
"source": [
- "### Table.isnull()\n",
+ "#### Table.isnull()\n",
"\n",
"```\n",
"Table.isnull()\n",
@@ -784,7 +893,7 @@
"id": "fb3164d5",
"metadata": {},
"source": [
- "### Table.notna()\n",
+ "#### Table.notna()\n",
"\n",
"```\n",
"Table.notna()\n",
@@ -819,7 +928,7 @@
"id": "4e8e5c07",
"metadata": {},
"source": [
- "### Table.notnull()\n",
+ "#### Table.notnull()\n",
"\n",
"```\n",
"Table.notna()\n",
@@ -854,7 +963,7 @@
"id": "d97d6bae",
"metadata": {},
"source": [
- "### Table.iloc[]\n",
+ "#### Table.iloc[]\n",
"\n",
"```\n",
"Table.iloc[:, :]\n",
@@ -981,7 +1090,7 @@
"id": "dc97669c",
"metadata": {},
"source": [
- "### Table.loc[]\n",
+ "#### Table.loc[]\n",
"\n",
"```\n",
"Table.loc[:, :]\n",
@@ -1125,7 +1234,7 @@
"id": "53c9631f",
"metadata": {},
"source": [
- "### Table.sample()\n",
+ "#### Table.sample()\n",
"\n",
"```\n",
"Table.sample(n, frac, replace, weights, random_state, axis, ignore_index)\n",
@@ -1233,7 +1342,7 @@
"id": "7d42cde9",
"metadata": {},
"source": [
- "### Table.select_dtypes()\n",
+ "#### Table.select_dtypes()\n",
"\n",
"```\n",
"Table.select_dtypes(include=None, exclude=None)\n",
@@ -1245,6 +1354,10 @@
"- A single dtype or string.\n",
"- A list of dtypes or strings.\n",
"- Inputs given for `include` and `exclude` cannot overlap.\n",
+ "- If both `include` and `exclude` are passed then `exclude` is ignored.\n",
+ "- If no columns are to be returned then `pykx.Identity(pykx.q('::'))` will be returned.\n",
+ "- For a `kx.KeyedTable` the key columns are all always returned, filtering only applies to value columns.\n",
+ "- For a `kx.KeyedTable` if no value column is to be returned then `pykx.Identity(pykx.q('::'))` will be returned.\n",
"\n",
"The dtype `kx.CharVector` will return an error. Use `kx.CharAtom` for a column of single chars.\n",
"Both `kx.*Atom` and `kx.*Vector` will be taken to mean a column containing a single item per row of type `*`. `kx.List` will include/exclude any columns containing mixed list data (including string columns).\n",
@@ -1262,7 +1375,7 @@
"\n",
"| Type | Description |\n",
"| :-------: | :----------------------------------------------------------------------------------------------: |\n",
- "| Dataframe | The subset of the frame including the dtypes in `include` and excluding the dtypes in `exclude`. |"
+ "| Dataframe | The subset of the frame including the dtypes in `include` or excluding the dtypes in `exclude`. |"
]
},
{
@@ -1326,12 +1439,68 @@
"df.select_dtypes(include = [kx.ShortVector, kx.LongVector])"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "ad73fbdb-b5db-4774-925a-2139d460a5a8",
+ "metadata": {},
+ "source": [
+ "If no columns are to be returned then `pykx.Identity(pykx.q('::'))` is returned"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "09c5ebac-7c5a-4caa-9bb9-acf366ca36ad",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.select_dtypes(include = kx.FloatVector)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b56da282-053b-4589-b8fd-b293914407f3",
+ "metadata": {},
+ "source": [
+ "For a `kx.KeyedTable` the key columns are all always returned, filtering only applies to value columns."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f1a8b60c-79fe-4d92-a8c8-d0abb47e948f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dfk = df.set_index('c1')\n",
+ "\n",
+ "dfk.select_dtypes(include = kx.ShortVector)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1643a6db-b1e2-4f3c-953f-e18a30fe3628",
+ "metadata": {},
+ "source": [
+ "For a `kx.KeyedTable` if no value column is to be returned then `pykx.Identity(pykx.q('::'))` will be returned."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "49c445e1-b073-46bf-ac92-8bc3ef3e282b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dfk.select_dtypes(exclude=[kx.ShortAtom, kx.LongAtom, kx.IntAtom])"
+ ]
+ },
{
"cell_type": "markdown",
"id": "5bb4eaa2",
"metadata": {},
"source": [
- "### Table.tail()\n",
+ "#### Table.tail()\n",
"\n",
"```\n",
"Table.tail(n=5)\n",
@@ -1395,7 +1564,7 @@
"id": "a2edb648",
"metadata": {},
"source": [
- "## Sorting"
+ "### Sorting"
]
},
{
@@ -1403,7 +1572,7 @@
"id": "ee65b6ab",
"metadata": {},
"source": [
- "### Table.sort_values()\n",
+ "#### Table.sort_values()\n",
"\n",
"```\n",
"Table.sort_values(by, ascending=True)\n",
@@ -1423,7 +1592,7 @@
"\n",
"| Type | Description |\n",
"| :----------------: | :------------------------------------------------------------------ |\n",
- "| Table | The resulting table after the sort has been perfomed |"
+ "| Table | The resulting table after the sort has been performed |"
]
},
{
@@ -1489,7 +1658,7 @@
"id": "29930425",
"metadata": {},
"source": [
- "### Table.nsmallest()\n",
+ "#### Table.nsmallest()\n",
"```\n",
"Table.nsmallest(\n",
" n,\n",
@@ -1598,7 +1767,7 @@
"id": "fbb4e07f",
"metadata": {},
"source": [
- "### Table.nlargest()\n",
+ "#### Table.nlargest()\n",
"```\n",
"Table.nlargest(\n",
" n,\n",
@@ -1707,7 +1876,7 @@
"id": "ffc7e449",
"metadata": {},
"source": [
- "## Data Joins/Merging"
+ "### Data Joins/Merging"
]
},
{
@@ -1715,7 +1884,7 @@
"id": "6a4c9fc9",
"metadata": {},
"source": [
- "### Table.merge()\n",
+ "#### Table.merge()\n",
"\n",
"```\n",
"Table.merge(\n",
@@ -1966,7 +2135,7 @@
"id": "42158c05",
"metadata": {},
"source": [
- "### Table.merge_asof()\n",
+ "#### Table.merge_asof()\n",
"\n",
"```\n",
"Table.merge_asof(\n",
@@ -2139,7 +2308,7 @@
"id": "e6280a9a",
"metadata": {},
"source": [
- "## Analytic functionality"
+ "### Analytic functionality"
]
},
{
@@ -2165,7 +2334,7 @@
"id": "fa9c8fc5",
"metadata": {},
"source": [
- "### Table.abs()\n",
+ "#### Table.abs()\n",
"\n",
"```\n",
"Table.abs(numeric_only=False)\n",
@@ -2203,7 +2372,7 @@
"id": "d644f8ee",
"metadata": {},
"source": [
- "### Table.count()\n",
+ "#### Table.count()\n",
"\n",
"```\n",
"Table.count(axis=0, numeric_only=False)\n",
@@ -2240,7 +2409,7 @@
"id": "f8554641",
"metadata": {},
"source": [
- "### Table.max()\n",
+ "#### Table.max()\n",
"\n",
"```\n",
"Table.max(axis=0, skipna=True, numeric_only=False)\n",
@@ -2278,7 +2447,7 @@
"id": "bc5b6dde",
"metadata": {},
"source": [
- "### Table.min()\n",
+ "#### Table.min()\n",
"\n",
"```\n",
"Table.min(axis=0, skipna=True, numeric_only=False)\n",
@@ -2311,12 +2480,12 @@
"tab.min()"
]
},
-{
+ {
"cell_type": "markdown",
"id": "b52627d2",
"metadata": {},
"source": [
- "### Table.idxmax()\n",
+ "#### Table.idxmax()\n",
"\n",
"```\n",
"Table.idxmax(axis=0, skipna=True, numeric_only=False)\n",
@@ -2382,7 +2551,7 @@
"id": "fdb4114c-640a-41ac-a4e7-6c236e9d93ea",
"metadata": {},
"source": [
- "### Table.idxmin()\n",
+ "#### Table.idxmin()\n",
"\n",
"```\n",
"Table.idxmax(axis=0, skipna=True, numeric_only=False)\n",
@@ -2448,7 +2617,7 @@
"id": "4aee2790",
"metadata": {},
"source": [
- "### Table.sum()\n",
+ "#### Table.sum()\n",
"\n",
"```\n",
"Table.sum(axis=0, skipna=True, numeric_only=False, min_count=0)\n",
@@ -2487,7 +2656,7 @@
"id": "3fd35bc7",
"metadata": {},
"source": [
- "### Table.mean()\n",
+ "#### Table.mean()\n",
"\n",
"```\n",
"Table.mean(axis=0, numeric_only=False)\n",
@@ -2570,7 +2739,7 @@
"id": "b0eff83a",
"metadata": {},
"source": [
- "### Table.median()\n",
+ "#### Table.median()\n",
"\n",
"```\n",
"Table.median(axis=0, numeric_only=False)\n",
@@ -2653,7 +2822,7 @@
"id": "929fe196",
"metadata": {},
"source": [
- "### Table.mode()\n",
+ "#### Table.mode()\n",
"\n",
"```\n",
"Table.mode(axis=0, numeric_only=False, dropna=True)\n",
@@ -2731,7 +2900,7 @@
"metadata": {},
"outputs": [],
"source": [
- "tab.mode(axis=1)"
+ "print(tab.mode(axis=1))"
]
},
{
@@ -2776,7 +2945,7 @@
"id": "7371feb5",
"metadata": {},
"source": [
- "### Table.prod()\n",
+ "#### Table.prod()\n",
"\n",
"```\n",
"Table.prod(axis=0, skipna=True, numeric_only=False, min_count=0)\n",
@@ -2833,12 +3002,12 @@
"tab.prod(numeric_only=True)"
]
},
-{
+ {
"cell_type": "markdown",
"id": "fe565b65-fbf2-47ba-a26e-791d09fd4f55",
"metadata": {},
"source": [
- "### Table.kurt()\n",
+ "#### Table.kurt()\n",
"\n",
"```\n",
"Table.kurt(axis=0, skipna=True, numeric_only=False)\n",
@@ -2923,7 +3092,7 @@
"id": "b248fef1",
"metadata": {},
"source": [
- "### Table.sem()\n",
+ "#### Table.sem()\n",
"\n",
"```\n",
"Table.sem(axis=0, skipna=True, numeric_only=False, ddof=0)\n",
@@ -3007,7 +3176,7 @@
"id": "ae7afe5a",
"metadata": {},
"source": [
- "Calculate sem accross columns with ddof=0:"
+ "Calculate sem across columns with ddof=0:"
]
},
{
@@ -3025,7 +3194,7 @@
"id": "ff51630f",
"metadata": {},
"source": [
- "### Table.skew()\n",
+ "#### Table.skew()\n",
"\n",
"```\n",
"Table.skew(axis=0, skipna=True, numeric_only=False)\n",
@@ -3064,7 +3233,7 @@
"id": "b054645b",
"metadata": {},
"source": [
- "### Table.std()\n",
+ "#### Table.std()\n",
"\n",
"```\n",
"Table.std(axis=0, skipna=True, numeric_only=False, ddof=0)\n",
@@ -3150,7 +3319,7 @@
"id": "ad38071b",
"metadata": {},
"source": [
- "Calculate std accross columns with ddof=0:"
+ "Calculate std across columns with ddof=0:"
]
},
{
@@ -3168,7 +3337,7 @@
"id": "5f1e5350",
"metadata": {},
"source": [
- "## Group By"
+ "### Group By"
]
},
{
@@ -3176,7 +3345,7 @@
"id": "57fe61a2",
"metadata": {},
"source": [
- "### Table.groupby()\n",
+ "#### Table.groupby()\n",
"\n",
"```\n",
"Table.groupby(\n",
@@ -3232,8 +3401,8 @@
"source": [
"tab = kx.Table(data={\n",
" 'Animal': ['Falcon', 'Falcon', 'Parrot', 'Parrot'],\n",
- " 'Max Speed': [380., 370., 24., 26.],\n",
- " 'Max Altitude': [570., 555., 275., 300.]\n",
+ " 'Speed': [380., 370., 24., 26.],\n",
+ " 'Max_Altitude': [570., 555., 275., 300.]\n",
"})\n",
"\n",
"tab"
@@ -3244,7 +3413,7 @@
"id": "0487cfe5",
"metadata": {},
"source": [
- "Group on the `Animal` column and calculate the mean of the resulting `Max Speed` and `Max Altitude` columns."
+ "Group on the `Animal` column and calculate the mean of the resulting `Speed` and `Max_Altitude` columns."
]
},
{
@@ -3256,7 +3425,25 @@
},
"outputs": [],
"source": [
- "tab.groupby(kx.SymbolVector(['Animal'])).mean()"
+ "tab.groupby('Animal').mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "79edc388",
+ "metadata": {},
+ "source": [
+ "Group on the `Animal` column and calculate the maximum speed only"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e54baf50",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab.groupby('Animal')['Speed'].max()"
]
},
{
@@ -3278,7 +3465,7 @@
" data={\n",
" 'Animal': ['Falcon', 'Falcon', 'Parrot', 'Parrot', 'Parrot'],\n",
" 'Type': ['Captive', 'Wild', 'Captive', 'Wild', 'Wild'],\n",
- " 'Max Speed': [390., 350., 30., 20., 25.]\n",
+ " 'Speed': [390., 350., 30., 20., 25.]\n",
" })\n",
"tab = tab.set_index(2)\n",
"tab"
@@ -3289,7 +3476,7 @@
"id": "ae3d3244",
"metadata": {},
"source": [
- "Group on multiple columns using thier indexes."
+ "Group on multiple columns using their indexes."
]
},
{
@@ -3372,9 +3559,9 @@
"id": "56cf152e",
"metadata": {},
"source": [
- "## Apply\n",
+ "### Function Application\n",
"\n",
- "### Table.apply()\n",
+ "#### Table.apply()\n",
"\n",
"```\n",
"Table.apply(\n",
@@ -3472,14 +3659,301 @@
"tab.apply(lambda x: sum(x), axis=1)"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "883319ad",
+ "metadata": {},
+ "source": [
+ "#### Table.map()\n",
+ "\n",
+ "```\n",
+ "Table.map(\n",
+ " func,\n",
+ " na_action=None,\n",
+ " *args,\n",
+ " **kwargs\n",
+ ")\n",
+ "```\n",
+ "\n",
+ "Apply a function to all elements of a table.\n",
+ "\n",
+ "\n",
+ "**Parameters:**\n",
+ "\n",
+ "| Name | Type | Description | Default |\n",
+ "| :--------------: | :---------------------------------: | :-------------------------------------------------------------------------- | :------: |\n",
+ "| func | function | Function to apply to every element of a table. | |\n",
+ "| na_action | str | if supplied 'ignore' then propagate null values without passing to the function | None |\n",
+ "| `**kwargs` | dict | Additional keyword arguments to pass as keywords to `func`, this argument is not implemented in the case `func` is a kx callable function. | None | \n",
+ "\n",
+ "\n",
+ "**Returns:**\n",
+ "\n",
+ "| Type | Description |\n",
+ "| :-----------------------: | :---------------------------------------------- |\n",
+ "| Table | The transformed `kx.Table` object. |\n",
+ "\n",
+ "\n",
+ "A vectorized version of a function you are attempting to call often exists, using the vectorized operation will be much faster in most cases. For example multiplying all elements of a table by 2:\n",
+ "\n",
+ "```python\n",
+ "tab.map(lambda x: 2*x)\n",
+ "```\n",
+ "\n",
+ "The following is a vectorized version of the previous example:\n",
+ "\n",
+ "```python\n",
+ "tab * 2\n",
+ "```\n",
+ "\n",
+ "**Examples:**\n",
+ "\n",
+ "Example Table."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3ec5778b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab = kx.Table(data={\n",
+ " 'x': [[1, 2, 3], 1, [1, 2]],\n",
+ " 'y': [kx.LongAtom.null, 1, 2]})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f53b488f",
+ "metadata": {},
+ "source": [
+ "Apply Python lambda function returning the length of the string representation of all elements within a table"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7fb28418",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab.map(lambda x:len(str(x)))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ae268cf7",
+ "metadata": {},
+ "source": [
+ "Apply a Python lambda function returning the length of the string representation of all elements within a table ignoring any null values"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "031ac1ee",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab.map(lambda x:len(str(x)), na_action='ignore')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "84697dc8",
+ "metadata": {},
+ "source": [
+ "Apply a Python function to all elements within a table to count the number of values in all cells. Including an example where multiple arguments are supplied"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "34ee6f12",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def _multi_arg_count(x, y=0):\n",
+ " try:\n",
+ " count = len(x)\n",
+ " except TypeError as err:\n",
+ " count = 1\n",
+ " return count + y"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e57346e9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab.map(_multi_arg_count)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "40589187",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab.map(_multi_arg_count, y = 5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6c147c39",
+ "metadata": {},
+ "source": [
+ "#### Table.applymap()\n",
+ "\n",
+ "```\n",
+ "Table.applymap(\n",
+ " func,\n",
+ " na_action=None,\n",
+ " *args,\n",
+ " **kwargs\n",
+ ")\n",
+ "```\n",
+ "\n",
+ "`Table.applymap` and `Table.map` functions are aliases, both have the same underlying definition and are both supplied to allow user migration of code if using Pandas 2.1 (which uses `map`) or versions prior to this which make use of `applymap`\n",
+ "\n",
+ "Apply a function to all elements of a table.\n",
+ "\n",
+ "**Parameters:**\n",
+ "\n",
+ "| Name | Type | Description | Default |\n",
+ "| :--------------: | :---------------------------------: | :-------------------------------------------------------------------------- | :------: |\n",
+ "| func | function | Function to apply to every element of a table. | |\n",
+ "| na_action | str | if supplied 'ignore' then propagate null values without passing to the function | None |\n",
+ "| `**kwargs` | dict | Additional keyword arguments to pass as keywords to `func`, this argument is not implemented in the case `func` is a kx callable function. | None | \n",
+ "\n",
+ "\n",
+ "**Returns:**\n",
+ "\n",
+ "| Type | Description |\n",
+ "| :-----------------------: | :---------------------------------------------- |\n",
+ "| Table | The transformed `kx.Table` object. |\n",
+ "\n",
+ "\n",
+ "A vectorized version of a function you are attempting to call often exists, using the vectorized operation will be much faster in most cases. For example multiplying all elements of a table by 2:\n",
+ "\n",
+ "```python\n",
+ "tab.applymap(lambda x: 2*x)\n",
+ "```\n",
+ "\n",
+ "The following is a vectorized version of the previous example:\n",
+ "\n",
+ "```python\n",
+ "tab * 2\n",
+ "```\n",
+ "\n",
+ "**Examples:**\n",
+ "\n",
+ "Example Table."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c495f330",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab = kx.Table(data={\n",
+ " 'x': [[1, 2, 3], 1, [1, 2]],\n",
+ " 'y': [kx.LongAtom.null, 1, 2]})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "dadcd844",
+ "metadata": {},
+ "source": [
+ "Apply Python lambda function returning the length of the string representation of all elements within a table"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "32a7ef2b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab.applymap(lambda x:len(str(x)))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0ff32fa1",
+ "metadata": {},
+ "source": [
+ "Apply a Python lambda function returning the length of the string representation of all elements within a table ignoring any null values"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "24e34eec",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab.applymap(lambda x:len(str(x)), na_action='ignore')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3db85bb0",
+ "metadata": {},
+ "source": [
+ "Apply a Python function to all elements within a table to count the number of values in all cells. Including an example where multiple arguments are supplied"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fc6f87c7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def _multi_arg_count(x, y=0):\n",
+ " try:\n",
+ " count = len(x)\n",
+ " except TypeError as err:\n",
+ " count = 1\n",
+ " return count + y"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9a62bf1e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab.applymap(_multi_arg_count)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0ce7bd44",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tab.applymap(_multi_arg_count, y = 5)"
+ ]
+ },
{
"cell_type": "markdown",
"id": "c20acb8a",
"metadata": {},
"source": [
- "## Aggregate\n",
+ "### Aggregate\n",
"\n",
- "### Table.agg()\n",
+ "#### Table.agg()\n",
"\n",
"```\n",
"Table.agg(\n",
@@ -3593,7 +4067,7 @@
"id": "dc726b75",
"metadata": {},
"source": [
- "## Data Preprocessing"
+ "### Data Preprocessing"
]
},
{
@@ -3601,7 +4075,7 @@
"id": "d508891a",
"metadata": {},
"source": [
- "### Table.add_prefix()\n",
+ "#### Table.add_prefix()\n",
"\n",
"```\n",
"Table.add_prefix(columns)\n",
@@ -3666,7 +4140,7 @@
"id": "8fb874ba",
"metadata": {},
"source": [
- "### Table.add_suffix()\n",
+ "#### Table.add_suffix()\n",
"\n",
"```\n",
"Table.add_suffix(columns)\n",
@@ -3731,7 +4205,7 @@
"id": "a5bb7631",
"metadata": {},
"source": [
- "### Table.astype()\n",
+ "#### Table.astype()\n",
"\n",
"```\n",
"Table.astype(dtype, copy=True, errors='raise')\n",
@@ -3865,7 +4339,7 @@
"id": "c7422edd",
"metadata": {},
"source": [
- "### Table.drop()\n",
+ "#### Table.drop()\n",
"\n",
"```\n",
"Table.drop(item, axis=0)\n",
@@ -3948,7 +4422,7 @@
"id": "d30d870b",
"metadata": {},
"source": [
- "### Table.drop_duplicates()\n",
+ "#### Table.drop_duplicates()\n",
"\n",
"```\n",
"Table.drop_duplicates()\n",
@@ -4011,7 +4485,7 @@
"id": "6110d8d9",
"metadata": {},
"source": [
- "### Table.pop()\n",
+ "#### Table.pop()\n",
"\n",
"```\n",
"Table.pop(item)\n",
@@ -4085,7 +4559,7 @@
"id": "68e67196",
"metadata": {},
"source": [
- "### Table.rename()\n",
+ "#### Table.rename()\n",
"\n",
"```\n",
"Table.rename(labels=None, index=None, columns=None, axis=None, copy=None, inplace=False, level=None, errors='ignore', mapper=None)\n",
@@ -4132,7 +4606,7 @@
"outputs": [],
"source": [
"tab.head()\n",
- "key_tab = kx.KeyedTable(data=tab) "
+ "key_tab = kx.KeyedTable(data=tab)"
]
},
{
@@ -4194,7 +4668,7 @@
"id": "fda14bd0-5be3-44f3-a5ba-36ab067eb384",
"metadata": {},
"source": [
- "### Table.replace()\n",
+ "#### Table.replace()\n",
"``` Table.replace(to_replace, value) ```\n",
"\n",
"Replace all values in a table with another given value.\n",
@@ -4225,70 +4699,10 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": null,
"id": "bbbec511-0395-4be3-b9b4-e6d3c09a21a7",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " \n",
- " | \n",
- " b | \n",
- " c | \n",
- " d | \n",
- " e | \n",
- "
\n",
- " \n",
- " a | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 2 | \n",
- " 4 | \n",
- " 1b | \n",
- " a | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 2 | \n",
- " 0b | \n",
- " b | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 6 | \n",
- " 1b | \n",
- " c | \n",
- " `a | \n",
- "
\n",
- " \n",
- "
"
- ],
- "text/plain": [
- "pykx.KeyedTable(pykx.q('\n",
- "a| b c d e \n",
- "-| --------\n",
- "2| 4 1 a 1 \n",
- "2| 2 0 b 2 \n",
- "3| 6 1 c `a\n",
- "'))"
- ]
- },
- "execution_count": 14,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"tab = kx.q('([] a:2 2 3; b:4 2 6; c:(1b;0b;1b); d:(`a;`b;`c); e:(1;2;`a))')\n",
"ktab = kx.q('([a:2 2 3]b:4 2 6; c:(1b;0b;1b); d:(`a;`b;`c); e:(1;2;`a))')\n",
@@ -4305,70 +4719,10 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": null,
"id": "3a36a978-022a-4e49-8191-05a768d5f30e",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " \n",
- " | \n",
- " b | \n",
- " c | \n",
- " d | \n",
- " e | \n",
- "
\n",
- " \n",
- " a | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 2 | \n",
- " 4 | \n",
- " 1b | \n",
- " a | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 123 | \n",
- " 0b | \n",
- " b | \n",
- " 123 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 6 | \n",
- " 1b | \n",
- " c | \n",
- " `a | \n",
- "
\n",
- " \n",
- "
"
- ],
- "text/plain": [
- "pykx.KeyedTable(pykx.q('\n",
- "a| b c d e \n",
- "-| -----------\n",
- "2| 4 1 a 1 \n",
- "2| 123 0 b 123\n",
- "3| 6 1 c `a \n",
- "'))"
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"ktab.replace(2,123)"
]
@@ -4383,75 +4737,10 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": null,
"id": "a1b87680-f2aa-4434-bcb6-2f4b384b735c",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " \n",
- " | \n",
- " a | \n",
- " b | \n",
- " c | \n",
- " d | \n",
- " e | \n",
- "
\n",
- " \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 2 | \n",
- " 4 | \n",
- " `one`two`three | \n",
- " a | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 2 | \n",
- " 2 | \n",
- " 0b | \n",
- " b | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 3 | \n",
- " 6 | \n",
- " `one`two`three | \n",
- " c | \n",
- " `a | \n",
- "
\n",
- " \n",
- "
"
- ],
- "text/plain": [
- "pykx.Table(pykx.q('\n",
- "a b c d e \n",
- "-----------------------\n",
- "2 4 `one`two`three a 1 \n",
- "2 2 0b b 2 \n",
- "3 6 `one`two`three c `a\n",
- "'))"
- ]
- },
- "execution_count": 20,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"tab.replace(True, (b\"one\", b\"two\", b\"three\"))"
]
@@ -4461,7 +4750,7 @@
"id": "73059996",
"metadata": {},
"source": [
- "### Table.reset_index()\n",
+ "#### Table.reset_index()\n",
"\n",
"```\n",
"Table.reset_index(levels, *,\n",
@@ -4647,7 +4936,7 @@
"id": "2201d826",
"metadata": {},
"source": [
- "### Table.set_index()\n",
+ "#### Table.set_index()\n",
"\n",
"```\n",
"Table.set_index(\n",
@@ -4716,7 +5005,7 @@
"metadata": {},
"outputs": [],
"source": [
- "#Setting multipe indexes\n",
+ "#Setting multiple indexes\n",
"tab.set_index(['sym', 'traded'])"
]
},
diff --git a/docs/user-guide/advanced/attributes.md b/docs/user-guide/advanced/attributes.md
index d4d1c50..56720dd 100644
--- a/docs/user-guide/advanced/attributes.md
+++ b/docs/user-guide/advanced/attributes.md
@@ -1,24 +1,26 @@
-# Attributes
+---
+title: Apply attributes in PyKX
+description: How to use attributes in PyKX
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, PyKX objects,
+---
-Attributes are metadata that you attach to lists of special forms. They are also used on table
-columns to speed retrieval for some operations. PyKX can make certain optimizations
-based on the structure of the list implied by the attribute.
+# Apply Attributes
+_This page provides details on how to apply attributes in PyKX._
-Attributes (other than `` `g#``) are descriptive rather than prescriptive. By this we mean that
-by applying an attribute you are asserting that the list has a special form, which PyKX will check.
-It does not instruct PyKX to (re)make the list into the special form; that is your job. A list
-operation that respects the form specified by the attribute leaves the attribute intact
-(other than `` `p#``), while an operation that breaks the form results in the attribute being
-removed in the result.
+!!! tip "Tip: For the best experience, we recommend reading about [PyKX attributes](..//../learn/objects.md#what-are-pykx-attributes) first."
-## Applying Attributes
+In PyKX, you can apply attributes to various data structures, including `#!python Vector`/`#!python List` types, `#!python Tables`, and `#!python KeyedTable`s. To apply the attributes, call the `#!python sorted`, `#!python unique`, `#!python grouped`, and `#!python parted` methods on these objects.
+
+### Sorted
+
+The `#!python sorted` attribute ensures that all items in the `#!python Vector` / `#!python Table` column are sorted in ascending
+order. This attribute will be removed if you append to the list with an item that is not in sorted
+order.
-Attributes can be applied on the various `Vector`/`List` types as well as `Tables` and `KeyedTable`'s.
-These attributes can be applied to their supported types by directly calling the `sorted`, `unique`,
-`grouped`, and `parted` methods on these objects.
-Examples: Applying the sorted attribute to a `Vector` can be done by calling the `sorted` method on
-the `Vector`.
+!!! example "Example of applying the `sorted` attribute to a `Vector` by calling the `sorted` method on the `Vector`:"
```Python
>>> a = kx.q.til(10)
@@ -28,7 +30,13 @@ pykx.LongVector(pykx.q('0 1 2 3 4 5 6 7 8 9'))
pykx.LongVector(pykx.q('`s#0 1 2 3 4 5 6 7 8 9'))
```
-Applying the unique attribute to the first column of the table.
+### Unique
+
+The `#!python unique` attribute ensures that all items in the `#!python Vector` / `#!python Table` column are unique (there are
+no duplicated values). This attribute will be removed if you append to the list with an item that
+is not unique.
+
+!!! example "Example of applying the `unique` attribute to the first column of the table:"
```Python
>>> a = kx.Table(data = {
@@ -52,7 +60,24 @@ b| s
'))
```
-Applying the grouped attribute to a specified column of a table.
+### Grouped
+
+The `#!python grouped` attribute ensures that all items in the `#!python Vector` / `#!python Table` column are stored in a
+different format to help reduce memory usage. It creates a backing dictionary to store the value and
+indexes that each value has within the list.
+
+Unlike other attributes, the `#!python grouped` attribute will be kept on all insert operations to the list. For instance, this is how a grouped list would be stored:
+
+```q
+// The list
+`g#`a`b`c`a`b`b`c
+// The backing dictionary
+a| 0 3
+b| 1 4 5
+c| 2 6
+```
+
+!!! example "Example of applying the `#!python grouped` attribute to a specified column of a table:"
```Python
>>> a = kx.Table(data = {
@@ -76,7 +101,21 @@ b| s g
'))
```
-Applying the parted attribute to multiple columns on a table.
+### Parted
+
+The `#!python parted` attribute is similar to the `#!python grouped` attribute with the additional requirement that each unique value must be adjacent to its other copies, where the grouped attribute allows them to be dispersed throughout the `#!python Vector` / `#!python Table`.
+
+When possible, the `#!python parted` attribute results in a larger performance gain than using the `#!python grouped` attribute. This attribute will be removed if you append to the list with an item that is not in the `#!python parted`
+order.
+
+```q
+// Can be parted
+`p#`a`a`a`e`e`b`b`c`c`c`d
+// Has to be grouped as the `d symbols are not all contiguous within the vector
+`g#`a`a`d`e`e`b`b`c`c`c`d
+```
+
+!!! example "Example of applying the `parted` attribute to multiple columns on a table:"
```Python
>>> a = kx.Table(data = {
@@ -100,65 +139,19 @@ b| s p
'))
```
-### Sorted
-
-The sorted attribute ensures that all items in the `Vector` / `Table` column are sorted in ascending
-order. This attribute will be removed if you append to the list with an item that is not in sorted
-order.
-
-### Unique
-
-The unique attribute ensures that all items in the `Vector` / `Table` column are unique (there are
-no duplicated values). This attribute will be removed if you append to the list with an item that
-is not unique.
-
-### Grouped
-
-The grouped attribute ensures that all items in the `Vector` / `Table` column are stored in a
-different format to help reduce memory usage, it creates a backing dictionary to store the value and
-indexes that each value has within the list. Unlike other attributes the grouped attribute will be
-kept on all insert operations to the list.
-
-For example this is how a grouped list would be stored.
-
-```q
-// The list
-`g#`a`b`c`a`b`b`c
-// The backing dictionary
-a| 0 3
-b| 1 4 5
-c| 2 6
-```
-
-### Parted
-
-The parted attribute is similar to the grouped attribute with the additional requirement that each
-unique value must be adjacent to its other copies, where the grouped attribute allows them to be
-dispersed throughout the `Vector` / `Table`. When possible the parted attribute will result in a
-larger performance gain than using the grouped attribute.
-This attribute will be removed if you append to the list with an item that is not in the parted
-order.
-
-```q
-// Can be parted
-`p#`a`a`a`e`e`b`b`c`c`c`d
-// Has to be grouped as the `d symbols are not all contiguous within the vector
-`g#`a`a`d`e`e`b`b`c`c`c`d
-```
-
## Performance
-When attributes are set on PyKX objects various functions can use these attributes to speed up their
-execution, by using different algorithms. For example searching through a list without an attribute
-requires checking every single value, however setting the sorted attribute allows a search algorithm
-to use a binary search in stead and then only a fraction of the values actually need to be checked.
+When attributes are set on PyKX objects, various functions can use these attributes to speed up their
+execution, by using different algorithms. For example, searching through a list without an attribute
+requires checking every single value. However, setting the `#!python sorted` attribute allows a search algorithm
+to use a binary search instead and then only a fraction of the values actually needs to be checked.
-Examples of some functions that can use attributes to speed up execution.
+Examples of functions that can use attributes to speed up execution:
-- Where clauses in `select` and `exec` templates run faster with `where =`, `where in` and `where within`.
-- Searching with [`bin`](../../api/pykx-execution/q.md#bin), [`distinct`](../../api/pykx-execution/q.md#distinct),
- [`Find`](https://code.kx.com/q/ref/find/) and [`in`](https://code.kx.com/q/ref/in/).
-- Sorting with [`iasc`](../../api/pykx-execution/q.md#iasc) or [`idesc`](../../api/pykx-execution/q.md#idesc).
+- Where clauses in `#!python select` and `#!python exec` templates run faster with `#!python where =`, `#!python where in` and `#!python where within`.
+- Searching with [`#!python bin`](../../api/pykx-execution/q.md#bin), [`#!python distinct`](../../api/pykx-execution/q.md#distinct),
+ [`#!python Find`](https://code.kx.com/q/ref/find/) and [`#!python in`](https://code.kx.com/q/ref/in/).
+- Sorting with [`#!python iasc`](../../api/pykx-execution/q.md#iasc) or [`#!python idesc`](../../api/pykx-execution/q.md#idesc).
!!!Note
Setting attributes consumes resources and is likely to improve performance on large lists.
diff --git a/docs/user-guide/advanced/compress-encrypt.md b/docs/user-guide/advanced/compress-encrypt.md
new file mode 100644
index 0000000..0d851d9
--- /dev/null
+++ b/docs/user-guide/advanced/compress-encrypt.md
@@ -0,0 +1,180 @@
+---
+title: PyKX compress and encrypt
+description: How to compress and encrypt data in PyKX
+date: October 2024
+author: KX Systems, Inc.,
+tags: compression, encryption, PyKX
+---
+
+# Compress and encrypt data
+_This page explains how to compress and encrypt data in PyKX._
+
+With the volumes of sensitive data being produced within real-time applications today the ability to securely store this data and quickly access it can be challenging. PyKX provides several utilities, in the form of class objects, for the management of how data is compressed and encrypted when being persisted.
+
+### Compress
+
+PyKX supports the compression of data to disk, allowing you to reduce disk space required for your persisted historical data. PyKX gives you a variety of compression/decompression options through the following algorithms:
+
+- [`#!python gzip`](https://en.wikipedia.org/wiki/Gzip)
+- [`#!python snappy`](https://en.wikipedia.org/wiki/Snappy_(compression))
+- [`#!python zstd`](https://en.wikipedia.org/wiki/Zstd)
+- [`#!python LZ4HC`](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm))
+
+In addition to this, you can compress data to KX's own qIPC format. For full information, go to [KX file compression within kdb+/q](https://code.kx.com/q/kb/file-compression/).
+
+### Encrypt
+
+PyKX supports Data At Rest Encryption (DARE) with an explicit requirement on at least OpenSSL v1.0.2. To find out which version of OpenSSL is available to you via PyKX, use the following:
+
+```python
+>>> import pykx as kx
+>>> kx.ssl_info()
+pykx.Dictionary(pykx.q('
+SSLEAY_VERSION | OpenSSL 1.1.1q 5 Jul 2022
+SSL_CERT_FILE | /usr/local/anaconda3/ssl/server-crt.pem
+SSL_CA_CERT_FILE | /usr/local/anaconda3/ssl/cacert.pem
+SSL_CA_CERT_PATH | /usr/local/anaconda3/ssl
+SSL_KEY_FILE | /usr/local/anaconda3/ssl/server-key.pem
+SSL_CIPHER_LIST | ECDBS-ECASD-CHACHA94-REAL305:ECDHE-RSM-CHACHA20-OOTH1305:..
+SSL_VERIFY_CLIENT| NO
+SSL_VERIFY_SERVER| YES
+'))
+```
+
+The encryption provided by this functionality is Transparent Disk Encryption (TDE). TDE protects data at rest by encrypting database files on the hard drive and as a result on backup media. Encrypting your data with PyKX is fully transparent to queries requiring no change to the logic used when querying data but results in a time penalty.
+
+To use this functionality, you must have a password-protected master key available, ideally with a unique password of high-entropy. For more information on the generation of a master key and a password, go to the [DARE configuration](https://code.kx.com/q/kb/dare/#configuration) section.
+
+## Functional walkthrough
+
+This walkthrough demonstrates the following steps:
+
+- Create a compression object for global and per-partition data persistence.
+- Persist a variety of database partitions setting various compression configurations.
+- Set the Python session to have globally configured encryption and compression settings.
+
+### Generate compression objects
+
+With PyKX, you can create compression and encryption class objects to set global configurations or use in specific individual functions. These respectively are supported via the `#!python kx.Compress` and `#!python kx.Encrypt` classes. For this section we will deal only with compression.
+
+The full list of algorithms is part of the `#!python kx.CompressionAlgorithm` enumeration:
+
+```python
+>>> import pykx as kx
+>>> list(kx.CompressionAlgorithm)
+[, , , , , ]
+```
+
+You can further details through the `help` command:
+
+```python
+>>> help(kx.CompressionAlgorithm)
+```
+
+Once you are familiar with the options available to you, it's time to initialize your first compression class. In this case generating a compression object which uses the `#!python gzip` algorithm at compression `#!python level 8`.
+
+```python
+>>> import pykx as kx
+>>> compress = kx.Compress(algo=kx.CompressionAlgorithm.gzip, level=8)
+```
+
+We use this object in the remaining sections of the walkthrough, in a local (one-shot) and global context.
+
+### Persist database partitions with various configurations
+
+Not all data is created equally, in time-series applications such as algorithmic trading it is often the case that older data is less valuable than newer data. As a result, when backfilling historical data, you may more aggressively compress older datasets. The PyKX compression logic allows you to persist different partitions within a historical database to different levels.
+
+1. Create a database with the most recent data uncompressed
+
+ ```python
+ >>> import pykx as kx
+ >>> from datetime import date
+ >>> N = 10000
+ >>> db = kx.DB(path='/tmp/db')
+ >>> qtable = kx.Table(
+ ... data={
+ ... 'x': kx.random.random(N, 1.0),
+ ... 'x1': 5 * kx.random.random(N, 1.0),
+ ... 'x2': kx.random.random(N, ['a', 'b', 'c'])
+ ... }
+ ... )
+ >>> db.create(qtable, 'tab', date(2020, 1, 1))
+ ```
+
+2. Add a new partition using `#!python gzip` compression
+
+ ```python
+ >>> gzip = kx.Compress(algo=kx.CompressionAlgorithm.gzip, level=4)
+ >>> qtable = kx.Table(
+ ... data={
+ ... 'x': kx.random.random(N, 1.0),
+ ... 'x1': 5 * kx.random.random(N, 1.0),
+ ... 'x2': kx.random.random(N, ['a', 'b', 'c'])
+ ... }
+ ... )
+ >>> db.create(qtable, 'tab', date(2020, 1, 2), compress=gzip)
+ ```
+
+3. Add a final partition using `#!python lz4hc` compression
+
+ ```python
+ >>> lz4hc = kx.Compress(algo=kx.CompressionAlgorithm.lz4hc, level=10)
+ >>> qtable = kx.Table(
+ ... data={
+ ... 'x': kx.random.random(N, 1.0),
+ ... 'x1': 5 * kx.random.random(N, 1.0),
+ ... 'x2': kx.random.random(N, ['a', 'b', 'c'])
+ ... }
+ ... )
+ >>> db.create(qtable, 'tab', date(2020, 1, 3), compress=lz4hc)
+ ```
+
+Notice the information about the persistence characteristics of your data using `#!python kx.q('-21!')`, for example:
+
+```python
+>>> kx.q('-21!`:/tmp/db/2020.01.01/tab/x')
+pykx.Dictionary(pykx.q(''))
+>>> kx.q('-21!`:/tmp/db/2020.01.02/tab/x')
+pykx.Dictionary(pykx.q('
+compressedLength | 5467
+uncompressedLength| 8016
+algorithm | 2i
+logicalBlockSize | 17i
+zipLevel | 4i
+'))
+>>> kx.q('-21!`:/tmp/db/2020.01.03/tab/x')
+pykx.Dictionary(pykx.q('
+compressedLength | 6374
+uncompressedLength| 8016
+algorithm | 4i
+logicalBlockSize | 17i
+zipLevel | 10i
+'))
+```
+
+### Initialize compression and encryption globally
+
+Global initialization of compression and encryption allows all data that is persisted within, from a process, to be compressed. This can be useful when completing large batch operations on data where being specific about per partition/per file operations isn't necessary. In the below section we will deal with compression and encryption separately.
+
+PyKX uses compression settings that are globally readable via `#!python kx.q.z.zd`. When unset, this value returns a PyKX Identity value as follows:
+
+```python
+>>> kx.q.z.zd
+pykx.Identity(pykx.q('::'))
+```
+
+To set the `#!python gzip` globally, use the `#!python global_init` on the generated `#!python kx.Compress` object.
+
+```python
+>>> compress = kx.Compress(algo=kx.CompressionAlgorithm.gzip, level=9)
+>>> compress.global_init()
+>>> kx.q.z.z.d
+pykx.LongVector(pykx.q('17 2 9'))
+```
+
+Complete the global encryption initialisation by loading of the users encryption key into the process as follows:
+
+```python
+>>> encrypt = kx.Encrypt(path='/path/to/my.key', password='PassWorD')
+>>> encrypt.global_init()
+```
diff --git a/docs/user-guide/advanced/context_interface.md b/docs/user-guide/advanced/context_interface.md
index 4b9927e..b91f89e 100644
--- a/docs/user-guide/advanced/context_interface.md
+++ b/docs/user-guide/advanced/context_interface.md
@@ -1,24 +1,54 @@
-# Using q functions in a Pythonic way
+---
+title: Import existing q functions
+description: How to use q functions in a Pythonic way in PyKX
+date: October 2024
+author: KX Systems, Inc.,
+tags: interface, q, PyKX
+---
-For many users of PyKX the q programming language is not their language of choice when developing analytics and applications. However, when dealing with large volumes of kdb+ data or operations where vector analytic performance is of paramount importance they may desire or need to make use of q.
+# Import existing q functions
-Functionality within the PyKX library makes the access and use of this functionality easier, this is known as the Context Interface
+For you and many users of PyKX the q programming language may not be your primary language of choice when developing analytics and applications. There are a number of circumstances under which access to q functionality or the ability to use functions written in q may be critical to your use-case:
-## The Context Interface
+- When dealing with large volumes of kdb+ data or operations where vector analytic performance is of paramount importance.
+- When you wish to make use of existing q code/libraries in a Python first way.
+- When you need access to functions of the q language directly.
-The Context Interface provided with PyKX provides an easy to use way of accessing q contexts (also known as namespaces when at the top level.) For more information about contexts/namespaces in q please refer to [Chapter 12 of Q for Mortals](https://code.kx.com/q4m3/12_Workspace_Organization/).
+The below sections make use of what is known as "The Context Interface". In q, a context (known as a namespace when at the top level) is an organizational structure which is used to organize code into libraries/common utilities. For more information on contexts/namespaces in q please refer to [Chapter 12 of Q for Mortals](https://code.kx.com/q4m3/12_Workspace_Organization/). PyKX exposes these contexts as special [`kx.QContext`](../../api/pykx-execution/ctx.md#pykx.ctx.QContext) objects. These context objects have attributes for their members, which can either be sub-contexts or K objects. For example:
-This provides users who are unfamiliar with kdb+/q analytic development to gain from the expertise of domain experts in this area through the consumption of common libraries and utilities.
+* `#!python pykx.q.Q` is a KdbContext instance for the builtin `#!q .Q` context/namespace
+* `#!python pykx.q.ctxA.ctxB` is a KdbContext instance for the `#!q .ctxA.ctxB` context
+* `#!python pykx.q.ctxA.ctxB.kObject` is a pykx.K instance for the `#!q .ctxA.ctxB.kObject` K object
-Both the embedded q instance at pykx.q, and pykx.QConnection instances, have attributes for q namespaces, which are exposed in Python as pykx.QContext objects. These context objects have attributes for their members, which can either be sub-contexts or K objects. For example:
+## Use the in-built q functionality
-* pykx.q.Q is a KdbContext instance for the builtin .Q context/namespace
-* pykx.q.ctxA.ctxB is a KdbContext instance for the .ctxA.ctxB context
-* pykx.q.ctxA.ctxB.kObject is a pykx.K instance for the .ctxA.ctxB.kObject K object
+When you start a q process there are 4 namespaces loaded which provide useful functionality to users of PyKX.
-Just as in q, the .q context is accessible at the top-level, so for instance instead of accessing pykx.q.q.md5, you can access it as pykx.q.md5. Some q builtins cannot be accessed like this such as or and not as these result in Python syntax errors. Such functions can instead be accessed either with getattr, or by evaluating them as q code (e.g. pykx.q('not')).
+| **Namespace** | **Contents** | **Link** |
+| :------------ | :-------------------------------------------------------------- | :-------------------------------------- |
+| `#!q .q` | Fundamental keywords and operators of the q language. | [link](https://code.kx.com/q/ref/) |
+| `#!q .Q` | Miscellaneous tooling for database interactions, debugging etc. | [link](https://code.kx.com/q/ref/dotq/) |
+| `#!q .z` | Environment and callback configuration functionality | [link](https://code.kx.com/q/ref/dotz/) |
+| `#!q .j` | Functionality for the serialization/deserialization of json | [link](https://code.kx.com/q/ref/dotj/) |
-Accessing attributes of pykx.q (or a pykx.QConnection instance) which do not correspond to a context that has been loaded in memory in q will result in it trying to find a script with a matching name. This process is detailed in the flowchart below:
+These namespaces are loaded by default and accessible as follows
+
+```python
+>>> kx.q.max # Access the max native function
+>>> kx.q.Q.qt # Access the .Q.qt function
+>>> kx.q.z.p # Access the .z.p function
+>>> kx.q.j.k # Access the .j.k function
+```
+
+As can be seen above, just as in q, the .q context is accessible at the top-level, so for instance instead of accessing pykx.q.q.md5, you can access it as pykx.q.md5.
+
+!!! Note
+
+ Some q builtins cannot be accessed like this such as or and not as these result in Python syntax errors. Such functions can instead be accessed either with getattr, or by evaluating them as q code (e.g. pykx.q('not')).
+
+## Using executed q code Python first
+
+Much of the code you write or need to access will come from executed code locally in your process or will be contained in scripts which you have access to. The flow chart below shows the hierarchy of search/loading that happens when PyKX cannot find a requested context.
```mermaid
graph LR
@@ -43,158 +73,124 @@ graph LR
H --> C;
```
-The fact that the context might not be defined even after the context interface changes the context
-and executes the script might be confusing. This can happen because the script can switch into
-other contexts, which overrides the context switch done by the context interface. Additionally the
-script might use fully qualified names for its definitions, which can bypass the effect of
-switching contexts.
+As described in the flow chart if a context is found to exist within the `q` memory space this will be presented to the user as the `kx.QContext`. Take for example the below case where you have defined two q functions and wish to access them in a Python first manner:
+
+```python
+>>> import pykx as kx
+>>> kx.q('.test.function0:{x+y}')
+>>> kx.q('.test.function1:{x-y}')
+>>> kx.q.test
+
+```
+
+If the namespace/context you are requesting doesn't exist in the `q` memory space then a search is carried out for a script matching the supplied context which if found is executed. The search logic is outlined in the expandable section below.
+
+??? Note "File Search Path"
+
+ When the context interface cannot find a namespace (i.e. a top-level context) that is being accessed it attempts to find a q/k script that has a matching name. This process is done via a depth first search of a tree where each node corresponds to part of the path, and each leaf corresponds to a possible file. Only the first file found that exists is executed. If none of the files exist then an `AttributeError` is raised.
+
+ The layers of the tree are as follows:
+
+ - Each of the paths in `pykx.q.paths`/`pykx.ipc.Connection(...).paths` (which defaults to `pykx.ctx.default_paths`)
+ - `.` prefix or not
+ - The name of the attribute accessed (i.e. `pykx.q.script` -> `script`)
+ - `.q` or `.k`
+ - No trailing `_` or a trailing `_` ([n.b. why a q/k script path would end with an underscore](https://code.kx.com/q/basics/syscmds/#_-hide-q-code))
-Note that context switches persists across `pykx.q` calls (but not `pykx.QConnection(...)` calls).
-One should take care when switching contexts, as unexpectedly being in an different context can
-result in undesirable behavior. `QContext` objects are Python context managers, which means they
-can be used with the `with` statement like so:
+ So for example if `pykx.q.script` was accessed, the context `.script` was not defined in memory in q, and `paths` was set to `['.', pykx.qhome]` (where `pykx.qhome == pathlib.Path('/opt/kdb')`), then the following paths would be checked in order until one is found to exist, or they have all been checked:
+
+ 1. `./.script.q`
+ 2. `./.script.q_`
+ 3. `./.script.k`
+ 4. `./.script.k_`
+ 5. `./script.q`
+ 6. `./script.q_`
+ 7. `./script.k`
+ 8. `./script.k_`
+ 9. `/opt/kdb/.script.q`
+ 10. `/opt/kdb/.script.q_`
+ 11. `/opt/kdb/.script.k`
+ 12. `/opt/kdb/.script.k_`
+ 13. `/opt/kdb/script.q`
+ 14. `/opt/kdb/script.q_`
+ 15. `/opt/kdb/script.k`
+ 16. `/opt/kdb/script.k_`
+
+To show the script search logic in action you can first write a file to the `#!python kx.qhome` location used by PyKX containing a namespace matching the name of the script
```python
-# q code here executes in the global context
-with q.myctx:
- # q code here executes in the `.myctx` context
- pass
-# q code here executes in the global context
+>>> demo_extension_source = '''
+... \d .demo_extension
+... N:100
+... test_data:([]N?`a`b`c;N?1f;N?10;N?0b)
+... test_function:{[data]
+... analytic_keys :`max_x1`avg_x2`med_x3;
+... analytic_calcs:(
+... (max;`x1);
+... (avg;`x2);
+... (med;`x3));
+... ?[data;
+... ();
+.. k!k:enlist `x;
+... analytic_keys!analytic_calcs
+... ]
+... }
+.. '''
+>>>
+>>> demo_extension_filename = kx.qhome/'demo_extension.q'
+>>> with open(demo_extension_filename, 'w') as f:
+... f.write(demo_extension_source)
```
-If you would like to switch into a q context using a string for the context name, use `getattr`
-like so:
+Now that your script is available as a file `demo_extension.q` you can access and use the functions as follows:
```python
-# q code here executes in the global context
-with getattr(q, 'myctx'):
- # q code here executes in the `.myctx` context
- pass
-# q code here executes in the global context
+>>> kx.q.demo_extension
+
+>>> kx.q.demo_extension.test_data
+pykx.Table(pykx.q('
+x x1 x2 x3
+------------------
+c 0.2086614 2 0
+a 0.9907116 1 1
+a 0.5794801 8 1
+b 0.9029713 8 0
+a 0.2011578 1 0
+..
+'))
+>>> kx.q.demo_extension.test_function
+pykx.SymbolicFunction(pykx.q('`.demo_extension.test_function'))
+>>> kx.q.demo_extension.test_function(kx.q.demo_extension.test_data)
+pykx.KeyedTable(pykx.q('
+x| max_x1 avg_x2 med_x3
+-| -------------------------
+a| 0.9907116 4.74359 1
+b| 0.9550901 4.580645 1
+c| 0.9830794 4.433333 0
+'))
```
-The following provides a number of examples of the context interface being used:
+## Extend where PyKX searches for scripts
-* Access a variety of functions for use from the `.q`, `.Q` and `.j` namespaces
+In addition to the default search locations you can add additional locations to be searched through appending of additional search paths to the `kx.q.paths` list which is used in the search.
- ```python
- >>> import pykx as kx
- >>> kx.q.max
- pykx.UnaryPrimitive(pykx.q('max'))
- >>> kx.q.max([1, 2, 3])
- pykx.LongAtom(pykx.q('3'))
- >>>
- >>> kx.q.mavg
- pykx.Lambda(pykx.q('k){msum[x;0.0^y]%mcount[x;y]}'))
- >>> kx.q.mavg(3, kx.q('til 10'))
- pykx.FloatVector(pykx.q('0 0.5 1 2 3 4 5 6 7 8'))
- >>>
- >>> kx.q.Q
-
- >>> kx.q.Q.ty(np.array([1, 2, 3]))
- pykx.CharAtom(pykx.q('"j"'))
- >>>
- >>> kx.q.j
-
- >>> kx.q.j.j({'test': 2, 'dict' : [1, 2, 3]})
- pykx.CharVector(pykx.q('"{\"test\":2,\"dict\":[1,2,3]}"'))
- ```
+The following shows a practical example of this, accessing a file `my_context.q` at a new location `/tmp/files`. In this example you can see the behavior if attempting to access a namespace without this location set for search:
-* Generate a script containing a context and make use of its functionality
- * Generate the script saving it to `QHOME`
- ```python
- >>> demo_extension_source = '''
- ... \d .demo_extension
- ... N:100
- ... test_data:([]N?`a`b`c;N?1f;N?10;N?0b)
- ... test_function:{[data]
- ... analytic_keys :`max_x1`avg_x2`med_x3;
- ... analytic_calcs:(
- ... (max;`x1);
- ... (avg;`x2);
- ... (med;`x3));
- ... ?[data;
- ... ();
- ... k!k:enlist `x;
- ... analytic_keys!analytic_calcs
- ... ]
- ... }
- ... '''
- >>>
- >>> demo_extension_filename = kx.qhome/'demo_extension.q'
- >>> with open(demo_extension_filename, 'w') as f:
- ... f.write(demo_extension_source)
- ```
- * Access the defined context using the variables and functions defined
- ```python
- >>> kx.q.demo_extension
-
- >>> kx.q.demo_extension.test_data
- pykx.Table(pykx.q('
- x x1 x2 x3
- ------------------
- c 0.2086614 2 0
- a 0.9907116 1 1
- a 0.5794801 8 1
- b 0.9029713 8 0
- a 0.2011578 1 0
- ..
- '))
- >>> kx.q.demo_extension.test_function
- pykx.SymbolicFunction(pykx.q('`.demo_extension.test_function'))
- >>> kx.q.demo_extension.test_function(kx.q.demo_extension.test_data)
- pykx.KeyedTable(pykx.q('
- x| max_x1 avg_x2 med_x3
- -| -------------------------
- a| 0.9907116 4.74359 1
- b| 0.9550901 4.580645 1
- c| 0.9830794 4.433333 0
- '))
- ```
-
-## Script Search Logic
-
-When the context interface cannot find a namespace (i.e. a top-level context) that is being
-accessed it attempts to find a q/k script that has a matching name. This process is done via a
-depth first search of a tree where each node corresponds to part of the path, and each leaf
-corresponds to a possible file. Only the first file found that exists is executed. If none of the
-files exist then an `AttributeError` is raised.
-
-The layers of the tree are as follows:
-
-- Each of the paths in `pykx.q.paths`/`pykx.ipc.Connection(...).paths` (which defaults to
- `pykx.ctx.default_paths`)
-- `.` prefix or not
-- The name of the attribute accessed (i.e. `pykx.q.script` -> `script`)
-- `.q` or `.k`
-- No trailing `_` or a trailing `_` ([n.b. why a q/k script path would end with an
- underscore](https://code.kx.com/q/basics/syscmds/#_-hide-q-code))
-
-So for example if `pykx.q.script` was accessed, the context `.script` was not defined in memory in
-q, and `paths` was set to `['.', pykx.qhome]`
-(where `pykx.qhome == pathlib.Path('/opt/kdb')`), then the following paths would be checked
-in order until one is found to exist, or they have all been checked:
-
-1. `./.script.q`
-2. `./.script.q_`
-3. `./.script.k`
-4. `./.script.k_`
-5. `./script.q`
-6. `./script.q_`
-7. `./script.k`
-8. `./script.k_`
-9. `/opt/kdb/.script.q`
-10. `/opt/kdb/.script.q_`
-11. `/opt/kdb/.script.k`
-12. `/opt/kdb/.script.k_`
-13. `/opt/kdb/script.q`
-14. `/opt/kdb/script.q_`
-15. `/opt/kdb/script.k`
-16. `/opt/kdb/script.k_`
-
-??? "Failure to find a script"
-
- Failure to find a script associated with your defined extension will result in an error being raised, the following error message provides a truncated example of this when attempting to retrieve a context `test_extension`:
+```python
+>>> import pykx as kx
+>>> from pathlib import Path
+>>> kx.q.my_context
+Traceback (most recent call last):
+ File "/usr/local/anaconda3/lib/python3.8/site-packages/pykx/__init__.py", line 132, in __getattr__
+..
+>>> kx.q.paths.append(Path('/tmp/files'))
+>>> kx.q.my_context
+
+```
+
+If PyKX fails to find a script an `#!python AttributeError` will be raised, the expanding section below provides an example of this
+
+??? Note "Failed to find a script"
```python
>>> kx.q.test_extension
@@ -224,50 +220,11 @@ in order until one is found to exist, or they have all been checked:
AttributeError: 'pykx.ctx.QContext' object has no attribute 'test_extension'
```
-### Adding context interface search locations
-
-In addition to the default search locations users can add additional locations to be searched through appending of additional search paths to the `kx.q.paths` list which underpins the search. The following shows a practical example of this accessing a file `my_context.q` at a new location `/tmp/files`, in this example we attempt initially to access the namespace without this location set for search
-
-```python
->>> import pykx as kx
->>> from pathlib import Path
->>> kx.q.my_context
-Traceback (most recent call last):
- File "/usr/local/anaconda3/lib/python3.8/site-packages/pykx/__init__.py", line 132, in __getattr__
-..
->>> kx.q.paths.append(Path('/tmp/files'))
->>> kx.q.my_context
-
-```
-
-## Best Practices
-
-To take full advantage of the automatic script loading one should ensure that every q/k script
-defines at most one public context. Ideally every q/k script should define exactly one context,
-and the name of the context should be equivalent to the name of the file without the file
-extension. For instance, `script.q` should place its definitions within the `.script` namespace.
-This ensures that when the context interface executes a script to load a context, it doesn't load
-in more contexts than intended. Furthermore the context name matching the file name ensures that
-when that file is executed because its name matches the desired context, that context will actually
-be defined.
-
-When these best practices cannot be followed it may be impossible to use the automatic loading of
-scripts via the context interface. In that case we can resort to manually loading scripts either
-by executing the q code `system "l "`, or by calling `pykx.q._register` with the
-path to the script.
-
-When switching contexts within a script, one should always save the context they were in prior to
-their context switch, and then switch back into it afterwards, rather than explicitly switching
-into the global context.
-
-## Execution Contexts for Functions
+## Use functions retrieved with the Context Interface
Functions returned by the context interface are provided as [`pykx.SymbolicFunction`][] instances.
-These objects are symbol atoms whose symbol is a named function (with a fully-qualified name). They
-can be called like regular [`pykx.Function`][] objects, but unlike regular [`pykx.Function`][]
-objects, they will execute in the `pykx.Q` instance (also known as its "execution context") in
-which it was defined.
+These objects are symbol atoms whose symbol is a named function (with a fully-qualified name). They can be called like regular [`pykx.Function`][] objects, but unlike regular [`pykx.Function`][] objects, they will execute in the `pykx.Q` instance (also known as its "execution context") in which it was defined.
The following shows an example of the retrieval of a function from a context vs defining the function itself:
@@ -289,3 +246,34 @@ The following shows an example of the retrieval of a function from a context vs
>>> qfunc(2)
pykx.LongAtom(pykx.q('3'))
```
+
+## Use Contexts via IPC
+
+The context interface is also supported against remote processes thus allowing you to run analytic operations Python first against a remote kdb+/q server. The syntax and operational restrictions outlined in the previous sections also exist for the IPC instance which you can call as follows
+
+```python
+>>> with kx.SyncQConnection(port=5050) as conn:
+... print(conn.max([1, 2, 3, 4]))
+4
+>>> with kx.SyncQConnection(port=5050) as conn:
+... conn('.test.func:{x+1}')
+... print(conn.test.func(10))
+11
+```
+
+!!! Warning "Performance Impact"
+
+ The context interface adds overhead to remote queries as it requires the retrieval of information about the members of namespaces prior to execution, optimal use of IPC connections should limit access to this functionality by setting `#!python no_ctx=True`
+
+## Best practice for organize scripts
+
+For efficient automatic script loading, each q/k script should only define at most one public context. The name of the context should be equivalent to the name of the file without the file extension. For instance, `script.q` should place its definitions within the `#!q .script` namespace. This ensures the context will be defined and ensures that when the context interface executes a script to load a context, it doesn't load in more contexts than intended.
+
+When these best practices cannot be followed it may be impossible to use the automatic loading of scripts via the context interface. In that case we can resort to manually loading scripts either by executing the q code `#!q system "l "`, or by calling `#!python pykx.q._register` with the path to the script.
+
+When switching contexts within a script, one should always save the context they were in prior to their context switch, and then switch back into it afterwards, rather than explicitly switching into the global context.
+
+## Next Steps
+
+- [Learn how to interact via IPC](ipc.md)
+- [Query a database using Python](../fundamentals/query/pyquery.md)
diff --git a/docs/user-guide/advanced/database/db_gen.md b/docs/user-guide/advanced/database/db_gen.md
new file mode 100644
index 0000000..cf44417
--- /dev/null
+++ b/docs/user-guide/advanced/database/db_gen.md
@@ -0,0 +1,151 @@
+---
+title: Generating and extending a database
+description: Introduction to the PyKX database creation and management functionality
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, database, maintenance, management, generation
+---
+
+
+# Generate and extend a database
+
+_This page explains how to create and expand databases using PyKX._
+
+!!! tip "Tip: For the best experience, we recommend reading [Databases in PyKX](index.md) first. If you already have access to a database and only need to load it, you can skip this page and jump right to [load database](db_loading.md)."
+
+Before leveraging the performance of PyKX when querying on-disk data, you need to create a [persisted database](..//..//..//extras/glossary.md#persisted-database). In the following sections we complete the following:
+
+1. [Create a new database](#1-create-database) containing a single table `#!python trade` and multiple days of data.
+1. [Add a new day worth of data](#2-add-new-database-partition) for `#!python today` to the database for the `#!python trade` table.
+1. [On-board a new table](#3-add-new-table-to-database) (`#!python quote`) which contains data from `#!python today`.
+1. Ensure that the [new table is queryable](#4-ensure-new-table-is-queryable).
+
+!!! note "Bring your own data"
+
+ The below example makes use of randomly-generated data using PyKX, where we use `#!python trade` or `#!python quote` tables generated in that manner. You can replace them with an equivalent Pandas/PyArrow table which will be converted to a PyKX table before being persisted.
+
+## 1. Create database
+
+For more information on database structures, see the linked section on [what is a database](index.md#whats-a-pykx-database). With PyKX, use the `#!python pykx.DB` class for all database interactions in Python. This class lets you create, expand, and maintain on-disk partitioned databases. First, we need to create a database.
+
+In the next cell, we create a `#!python trade` table with data from multiple days in the chat.
+
+```python
+>>> import pykx as kx
+>>> N = 10000000
+>>> trade = kx.Table(data={
+... 'date': kx.random.random(N, kx.DateAtom('today') - [1, 2, 3, 4]),
+... 'time': kx.q.asc(kx.random.random(N, kx.q('1D'))),
+... 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']),
+... 'price': kx.random.random(N, 10.0)
+... })
+```
+
+Now that we have generated our trade table, we can persist it to disk at the location `#!python /tmp/db`.
+
+```python
+>>> db = kx.DB(path='/tmp/db')
+>>> db.create(trade, 'trade', 'date')
+```
+
+That's it, you now have a persisted database. To verify the availability of the database and its tables, we can examine the database object:
+
+```python
+>>> db.tables
+['trade']
+>>> type(db.trade)
+
+```
+
+The above database persistence uses the default parameters within the `#!python create` function. If you need to compress/encrypt the persisted database partitions or need to define a `#!python by` or specify the symbol enumeration name, you can follow the API documentation [here](../../../api/db.md#pykx.db.DB.create).
+
+## 2. Add new database partition
+
+Now that you have generated a database, you can add extra partitions using the same database class and the `#!python create` function. In this example we will add new data for the current day created in the below cell:
+
+```python
+>>> N = 2000000
+>>> trade = kx.Table(data={
+... 'time': kx.q.asc(kx.random.random(N, kx.q('1D'))),
+... 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']),
+... 'price': kx.random.random(N, 10.0)
+... })
+```
+
+Note that in comparison to the original database creation logic, we do not have a `#!python date` column. Instead, we add a date at partition creation. Below we provide a variety of examples of adding new partitions under various conditions:
+
+=== "Generate default partition"
+
+ ```python
+ >>> db.create(trade, 'trade', kx.DateAtom('today'))
+ ```
+
+=== "Compress data in a partition"
+
+ In the below example, we compress data within the persisted partition using [`gzip`](https://en.wikipedia.org/wiki/Gzip). For further details on supported compression formats see [here](../compress-encrypt.md) or look at the API reference [here](../../../api/compress.md).
+
+ ```python
+ >>> gzip = kx.Compress(kx.CompressionAlgorithm.gzip, level=2)
+ >>> db.create(trade, 'trade', kx.DateAtom('today'), compress=gzip)
+ ```
+
+=== "Encrypt persisted data"
+
+ In the below example, we encrypt the data persisted for the added partition. For further details on how encryption works within PyKX see [here](../compress-encrypt.md) or look at the API reference [here](../../../api/compress.md).
+
+ ```python
+ >>> encrypt = kx.Encrypt('/path/to/mykey.key', 'mySuperSecretPassword')
+ >>> db.create(trade, 'trade', kx.DateAtom('today'), encrypt=encrypt)
+ ```
+
+## 3. Add new table to database
+
+After onboarding your first table to a database, a common question is “How can I add a new table of related data?”. You can use the `#!python database` class and the `#!python create` function to do this. For instance, let’s add a `#!python quote` table for the current day:
+
+```python
+>>> N = 1000000
+>>> quote = kx.Table(data={
+... 'time': kx.q.asc(kx.random.random(N, kx.q('1D'))),
+... 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']),
+... 'ask': kx.random.random(N, 100),
+... 'bid': kx.random.random(N, 100)
+... })
+```
+
+We can now add this as the data for the current day to the `#!python quote` table and see that the table is defined:
+
+```python
+>>> db.create(quote, 'quote', kx.DateAtom('today'))
+>>> db.tables
+['quote', 'trade']
+>>> type(db.quote)
+
+```
+
+## 4. Ensure new table is queryable
+
+You have now persisted another table to your database, however, you will notice if you access the `#!python quote` table that the return is surprising:
+
+```python
+>>> db.quote
+pykx.PartitionedTable(pykx.q('+`time`sym`ask`bid!`quote'))
+```
+
+The reason for this is that you currently do not have data in each partition of your database for the `#!python quote` table. To rectify this, run the `#!python fill_database` method off the `#!python database` class which adds relevant empty quote data to tables to the partitions from which it's missing:
+
+```python
+>>> db.fill_database()
+```
+
+Now you should be able to access the `#!python quote` data for query:
+
+```python
+>>> db.quote
+```
+
+## Next Steps
+
+- [Load an existing database](db_loading.md).
+- [Modify the contents of your database](db_mgmt.md)
+- [Query your database with Python](../../fundamentals/query/pyquery.md)
+- [Compress/encrypt data](../compress-encrypt.md#persisting-database-partitions-with-various-configurations) for persisting database partitions.
diff --git a/docs/user-guide/advanced/database/db_loading.md b/docs/user-guide/advanced/database/db_loading.md
new file mode 100644
index 0000000..390e7cd
--- /dev/null
+++ b/docs/user-guide/advanced/database/db_loading.md
@@ -0,0 +1,50 @@
+---
+title: Load an existing database
+description: How to load an existing database into a Python process
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, database, loading
+---
+
+# Load an existing database
+
+_This page explains how to load an existing database into a Python process._
+
+!!! tip "Tip: For the best experience, we recommend reading [Databases in PyKX](index.md) and [generate a database](db_gen.md) first."
+
+By default, you can only load one database into a Python process when using PyKX. To automatically load a database when initializing the `#!python pykx.DB` class, set the database location as the path:
+
+```python
+>>> import pykx as kx
+>>> db = kx.DB(path='/tmp/db')
+>>> db.tables
+['quote', 'trade']
+```
+
+To load a database after initialization, use the `#!python load` command as shown below:
+
+```python
+>>> import pykx as kx
+>>> db = kx.DB()
+>>> db.tables
+>>> db.load('/tmp/db')
+>>> db.tables
+['quote', 'trade']
+```
+
+## Change the loaded database
+
+To overwrite the database loaded and use another database if needed, use the `#!python overwrite` keyword.
+
+In the below example, we are loading a new database `#!python /tmp/newdb` which in our case doesn't exist but mimics the act of loading a separate database:
+
+```python
+>>> db = kx.DB(path='/tmp/db')
+>>> db.load(path='/tmp/newdb', overwrite=True)
+```
+
+## Next Steps
+
+- [Modify the contents of your database](db_mgmt.md).
+- [Query your database with Python](../../fundamentals/query/pyquery.md).
+- [Compress/encrypt data](../compress-encrypt.md#persisting-database-partitions-with-various-configurations) for persisting database partitions.
diff --git a/docs/user-guide/advanced/database/db_mgmt.md b/docs/user-guide/advanced/database/db_mgmt.md
new file mode 100644
index 0000000..fbf3446
--- /dev/null
+++ b/docs/user-guide/advanced/database/db_mgmt.md
@@ -0,0 +1,69 @@
+---
+title: Manage a PyKX Database
+description: How to modify an existing database
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, database, loading
+---
+
+# Manage a database
+
+_This page explains how to modify databases generated in PyKX._
+
+!!! tip "Tip: For the best experience, we recommend reading [Databases in PyKX](index.md), [Generate a database](db_gen.md) and [Load a database](db_loading.md) first."
+
+With PyKX, you can use various methods to modify your on-disk database. These changes can take many forms:
+
+- Add new columns to the database
+- Apply functions to existing columns
+- Rename columns
+- Delete columns
+
+!!! Warning "A cautionary note"
+
+ Operations on persisted databases can lead to changes that are hard to undo. For instance, applying functions that modify row values in a column can result in updated values that make it impossible to retrieve the original data. Before using this functionality for complex tasks, ensure you understand the impact of your changes and have a backup of your data to mitigate any issues.
+
+The next section demonstrates how to edit the `#!python trade` table generated [here](db_gen.md) to extract information from the table columns, sanitize the data, and update the database schema.
+
+## Update your database
+
+Over time, the data you work with will change. This includes the names and types of columns, and even which columns are in the table. These changes can occur as new sensors are introduced in a manufacturing setting or when your data provider updates the information they supply in the financial sector.
+
+To that end, we can take the `#!python trade` table and make the following changes:
+
+1. Rename the column `#!python sym` to `#!python symbol`.
+1. Change the type of the `#!python price` column from a `#!python pykx.FloatAtom` to `#!python pykx.RealAtom` to reduce storage requirements.
+1. Add a new column `#!python exchange` which initially has an empty `#!python pykx.SymbolAtom` entry under the expectation that newly added partitions will have this column available.
+
+```python
+>>> import pykx as kx
+>>> db = kx.DB(path='/tmp/db')
+>>> db.rename_column('trade', 'sym', 'symbol')
+>>> db.set_column_type('trade', 'price', kx.RealAtom)
+>>> db.add_column('trade', 'exchange', kx.SymbolAtom.null)
+```
+
+Now that we’ve made some basic changes, we can proceed with more detailed modifications to the database. These changes can significantly impact the data since they involve free-form edits to individual columns and partitions. If you’re unsure about the changes or your ability to undo them, it’s a good idea to make a copy of the column first.
+
+In the below cell, we complete the following:
+
+1. Cache the order of columns prior to changes.
+1. Make a copy of the column `#!python price` named `#!python price_copy`.
+1. Adjust the value of the stock price on the copied column to account for a two-for-one stock split by multiplying the price by half.
+1. Delete the original `#!python price` column.
+1. Rename the copied column `#!python symbol_copy` to be `#!python symbol`.
+1. Reorder the columns.
+
+```python
+>>> col_order = db.trade.columns.py()
+>>> db.copy_column('trade', 'price', 'price_copy')
+>>> db.apply_function('trade', 'price_copy', lambda x: x * 0.5)
+>>> db.delete_column('trade', 'price')
+>>> db.rename_column('trade', 'price_copy', 'price')
+>>> db.reorder_columns(col_order)
+```
+
+## Next Steps
+
+- [Query your database with Python](../../fundamentals/query/pyquery.md)
+- [Compress/encrypt data](../compress-encrypt.md#persisting-database-partitions-with-various-configurations) for persisting database partitions.
diff --git a/docs/user-guide/advanced/database/index.md b/docs/user-guide/advanced/database/index.md
new file mode 100644
index 0000000..4bb6d38
--- /dev/null
+++ b/docs/user-guide/advanced/database/index.md
@@ -0,0 +1,64 @@
+---
+title: Databases in PyKX
+description: PyKX database creation and management
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, database, maintenance, management, generation
+---
+
+# Databases in PyKX
+
+_This page explains the concept of databases in PyKX, including the creation and management of databases._
+
+## What's a PyKX database?
+
+In PyKX, the term database refers to [partitioned kdb+ databases](https://code.kx.com/q/kb/partition/). A partitioned kdb+ database consists of one or more tables saved on-disk, where they are split into separate folders called partitions. These partitions are most often based on a temporal field within the dataset, such as date or month. Each table within the database must follow the same partition structure.
+
+A visual representation of a database containing 2 tables (trade and quote) partitioned by date would be as follows, where `#!python price`, `#!python sym`, `#!python time` in the quotes folder are columns within the table:
+
+```bash
+db
+├── 2020.10.04
+│ ├── quotes
+│ │ ├── .d
+│ │ ├── price
+│ │ ├── sym
+│ │ └── time
+│ └── trades
+│ ├── .d
+│ ├── price
+│ ├── sym
+│ ├── time
+│ └── vol
+├── 2020.10.06
+│ ├── quotes
+..
+└── sym
+```
+
+!!! note "More information on partitioned databases"
+
+ The partitioned database format used by PyKX has been used in production environments for decades in many of the world's best-performing tier-1 investment banks. Today, there is a significant amount of information available on the creation and maintenance of these databases. Below are some articles related to their creation and querying.
+
+ - [Blog: Partitioning data with kdb+](https://kx.com/blog/partitioning-data-in-kdb/)
+ - [Q for Mortals Partitioned Tables](https://code.kx.com/q4m3/14_Introduction_to_Kdb%2B/#143-partitioned-tables)
+ - [Partitioned Tables](https://thinqkdb.wordpress.com/partitioned-tables/)
+
+## How to use databases in PyKX
+
+Creating and managing databases is crucial for handling large amounts of data. The `#!python pykx.DB` module helps make these tasks easier, Pythonic, and more user-friendly.
+
+PyKX Database API supports the following operations:
+
+| **Operation** | **Description** |
+|:-------------------------|:--------------------------------------------------------------------------------------------------|
+| [Generate](db_gen.md) | Learn how to generate a new historical database using data from Python/q and expand it over time. |
+| [Load](db_loading.md) | Learn how to load existing databases and fix some common issues with databases. |
+| [Manage](db_mgmt.md) | Copy, change datatypes or names of columns, apply functions to columns, delete columns from a table, rename tables and backfill data. |
+
+Check out a full breakdown of the [database API](../../../api/db.md).
+
+## Next Steps
+
+- Learn how to create a new database or update an existing one [here](db_gen.md).
+- Learn how to load an existing database [here](db_loading.md).
diff --git a/docs/user-guide/advanced/examples/streamlit.py b/docs/user-guide/advanced/examples/streamlit.py
new file mode 100644
index 0000000..5b881da
--- /dev/null
+++ b/docs/user-guide/advanced/examples/streamlit.py
@@ -0,0 +1,39 @@
+# Set environment variables needed to run Steamlit integration
+import os
+os.environ['PYKX_BETA_FEATURES'] = 'true'
+
+# This is optional but suggested as without it's usage caching
+# is not supported within streamlit
+os.environ['PYKX_THREADING'] = 'true'
+
+import streamlit as st
+import pykx as kx
+import matplotlib.pyplot as plt
+
+
+def main():
+ st.header('PyKX Demonstration')
+ connection = st.connection('pykx',
+ type=kx.streamlit.PyKXConnection,
+ port=5050,
+ username='user',
+ password='password')
+ if connection.is_healthy():
+ tab = connection.query('select from tab where size<11')
+ else:
+ raise kx.QError('Connection object was not deemed to be healthy')
+ fig, x = plt.subplots()
+ x.scatter(tab['size'], tab['price'])
+
+ st.write('Queried kdb+ remote table')
+ st.write(tab)
+
+ st.write('Generated plot')
+ st.pyplot(fig)
+
+
+if __name__ == "__main__":
+ try:
+ main()
+ finally:
+ kx.shutdown_thread()
diff --git a/docs/user-guide/advanced/images/chained-subscriber.png b/docs/user-guide/advanced/images/chained-subscriber.png
new file mode 100644
index 0000000..2a3b216
Binary files /dev/null and b/docs/user-guide/advanced/images/chained-subscriber.png differ
diff --git a/docs/user-guide/advanced/images/simple-no-feed.png b/docs/user-guide/advanced/images/simple-no-feed.png
new file mode 100644
index 0000000..1ff9f7c
Binary files /dev/null and b/docs/user-guide/advanced/images/simple-no-feed.png differ
diff --git a/docs/user-guide/advanced/ipc.md b/docs/user-guide/advanced/ipc.md
index a9258e1..7b450fa 100644
--- a/docs/user-guide/advanced/ipc.md
+++ b/docs/user-guide/advanced/ipc.md
@@ -1,124 +1,372 @@
-# Communicating via IPC
+---
+title: Communicate via IPC
+description: Use PyKX via IPC
+date: June 2024
+author: KX Systems, Inc.,
+tags: PyKX, IPC,
+---
-q IPC connections are often used to connect into a central server / gateway that contains large amounts
-of historical data. PyKX `QConnection` instances provide a way to connect into these servers and directly query
-the data within them. This allows users to access data within a running q process, optionally convert it into
-a Python object and then locally perform analysis / transformations to the data within python. For licensed users
-the local object can be used within embedded q, for unlicensed users they will first have to convert it to a
-python type with one of the helper methods (`.py()`/`.np()`/`.pd()`/`.pa()`). This allows users to get
-the best of both worlds where they can harness the power of q as well as the power of other existing python
-libraries to perform analysis and modifications to q data.
+# Communicate via IPC
-## Modalities of use for IPC
+_This page explains how to use PyKX to communicate with q processes via IPC._
-Using the IPC module is available to both `licensed` and `unlicensed` users. Using a QConnection instance
-is the only way for an unlicensed user to run `q` code directly within PyKX. When using a
-`QConnection` instance in unlicensed mode you must convert the resulting value back into a python
-type before it is usable. In licensed mode the resulting value can be directly modified and used
-within Embedded Q without first converting it. For both licensed and unlicensed users this module can be
-used to replace the functionality of [`qPython`](https://github.com/exxeleron/qPython).
+Interprocess Communication (IPC) forms a central mechanism by which you can connect to and query existing kdb+/q infrastructures.
+
+The processes to which users are connecting and running queries often connect into a central server/gateway that contains vast amounts of historical data.
+
+There are 4 main types of IPC connections in PyKX.
+
+| **Connection Name** | **When it's often used** |
+| :-------------------------------------------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------- |
+| [`kx.SyncQConnection`](../../api/ipc.md#pykx.ipc.SyncQConnection) | When you need to retrieve data from a server. |
+| [`kx.AsyncQConnection`](../../api/ipc.md#pykx.ipc.AsyncQConnection) | When you need to integrate with Python's `asyncio` library or when integration running queries on an event loop. |
+| [`kx.SecureQConnection`](../../api/ipc.md#pykx.ipc.SecureQConnection) | When you need to connect to a kdb+/q server which has TLS enabled. |
+| [`kx.RawQConnection`](../../api/ipc.md#pykx.ipc.RawQConnection) | Used when more fine-grained control is required by a user to handle when messages are read, also used if emulating a q server from Python. |
+
+In the below sections you will learn more about these connections and how to
+
+- Establish a connection to an existing kdb+/q process
+- Run analytics/queries on existing kdb+/q processes
+- Reconnect to a process
+- Execute a local file
+- Integrate with Python asynchronous frameworks
+- Create your own IPC Server using PyKX
+
+!!! Note "To run the examples"
+
+ Before we get started the following sections will make use of a q process running on port 5050.
+
+ To emulate this you can download [this file](scripts/server.py) and run it as follows:
+
+ ```python
+ >>> import pykx as kx
+ >>> import subprocess
+ >>> with kx.PyKXReimport():
+ ... server = subprocess.Popen(
+ ... ('python', 'server.py'),
+ ... stdin=subprocess.PIPE,
+ ... stdout=subprocess.DEVNULL,
+ ... stderr=subprocess.DEVNULL,
+ ... )
+ ... time.sleep(2)
+ ```
+
+!!!Warning
+
+ This emulated server is less flexible and performant than a typical q server and as such, for best results use a q process for testing.
+
+Once you're done you can shut down the server as follows
```python
-# Licensed mode
-with kx.SyncQConnection('localhost', 5001) as q:
- result = q.til(10)
- print(result)
- print(result.py())
+>>> server.stdin.close()
+>>> server.kill()
+```
+
+## Connect to an existing system
+
+You can connect to processes in two ways
+
+1. Direct connection creation and management
+2. Connections established within a `#!python with` statement
+
+The documentation below also shows you how to servers with additional requirements for establishing a connection, such as requiring a username/password or only allowing TLS encrypted connections.
+
+### Connect directly
+
+!!! Tip "Close connections"
+
+ It is best practice to close connections to processes once you have finished with them.
+
+In the below examples you can connect to a process on port 5050 and run a query.
+
+- Establish a connection to the server on port 5050, run a query and close the connection
+
+ ```python
+ >>> conn = kx.SyncQConnection('localhost', 5050)
+ >>> print(conn('1+1').py())
+ 2
+ >>> conn.close()
+ ```
+
+- Establish a connection using an `#!python kx.AsyncQConnection`, run a query and close the connection
+
+ ```python
+ >>> conn = await kx.AsyncQConnection('localhost', 5050)
+ >>> print(await conn('til 10').py())
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+ >>> conn.close()
+ ```
+
+### Connect using a with statement
+
+To reduce the need to manually open/close connections, use the `#!python with` statement. This will automatically close the connection following execution:
+
+- Query a server on port 5050, run a query and automatically close the connection
+
+ ```python
+ >>> with kx.SyncQConnection('localhost', 5050) as conn:
+ ... print(conn('1+1').py())
+ 2
+ ```
+
+- Establish a connection using an `#!python kx.AsyncQConnection`, run a query and automatically close the connection
+
+ ```python
+ >>> async with kx.AsyncQConnection('localhost', 5050) as conn:
+ ... print(await conn('1+1'))
+ 2
+ ```
-0 1 2 3 4 5 6 7 8 9
-[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+### Connect to a restricted server
+
+You can authenticate on protected servers during connection creation using the optional parameters `username` and `password`
+
+```python
+>>> with kx.SyncQConnection('localhost', 5050, username='user', password='pass') as conn:
+... print(conn('1+1').py())
+2
+```
+
+If establishing a connection to a server where TLS encryption is required you can either use the `tls` keyword when establishing your [`kx.SyncQConnection`](../../api/ipc.md#pykx.ipc.SyncQConnection)/[`kx.AsyncQConnection`](../../api/ipc.md#pykx.ipc.AsyncQConnection) instances, or use an instance of [`kx.SecureQConnection`](../../api/ipc.md#pykx.ipc.SecureQConnection)
+
+=== "Using a sync connection"
+
+ ```python
+ >>> with kx.SyncQConnection('localhost', 5050, tls=True) as conn:
+ ... print(conn('1+1'))
+ 2
+ ```
+
+=== "Using a secure connection"
+
+ ```python
+ >>> with kx.SecureQConnection('localhost', 5050) as conn:
+ ... print(conn('1+1'))
+ 2
+ ```
+
+## Run analytics on an existing system
+
+Once you have established a connection to your existing system there are various ways that you can run analytics or pass data to the server. The following breaks down the most important approaches
+
+- Call the connection directly
+- Using the context interface to call server side functions directly
+
+### Call the connection directly
+
+The most basic method of doing this is through direct calls against the connection object as has been used in the previous section and can be seen as follows:
+
+```python
+>>> with kx.SyncQConnection('localhost', 5050) as conn:
+... print(conn('1+1').py())
+2
```
+In this case any `#!python q` code can be used, for example querying a table on the remote server using qSQL:
+
```python
-# Unlicensed mode
-with kx.SyncQConnection('localhost', 5001) as q:
- result = q.til(10)
- print(result)
- print(result.py())
+>>> with kx.SyncQConnection('localhost', 5050) as conn:
+... print(conn('select from tab where x=`a, x1>0.9995').pd())
+ x x1 x2
+0 a 0.999522 3
+1 a 0.999996 8
+2 a 0.999742 2
+3 a 0.999641 6
+4 a 0.999515 1
+5 a 0.999999 3
+```
+
+You can call the connection object with an associated function and supplied parameters, for example:
-kx.LongVector._from_addr(0x7fcab6800b80)
-[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+```python
+>>> with kx.SyncQConnection(port=5050) as conn:
+... print(conn('{x+y+z}', 1, 2, 3))
+6
```
-## Methods of Instantiating
+### Call a named function on the server
-There are two ways to create each subclass of [`pykx.QConnection`][], the first is to directly instantiate
-the connection instance and the second option is to create them in the form of a context interface. Using
-the context interface method of declaring these [`pykx.QConnection`][] instances should be preferred as it will
-ensure that the connection instance is properly closed automatically when leaving the scope of the context.
+Using the "Context Interface", you can call namespaced functions on a remote server. This sends a message before executing a function to validate whether the function being called exists.
-Manually creating a `QConnection`
+In the below examples we will make use of two functions registered on a server. To facilitate this testing you can first set these functions on the server explicitly as follows
```python
-q = kx.SyncQConnection('localhost', 5001) # Directly instantiate a QConnection instance
-q(...) # Make some queries
-q.close() # Must manually ensure it is closed when no longer needed
+>>> with kx.SyncQConnection(port=5050) as conn:
+... conn('.test.addition:{x+y}')
+... conn('.test.namespace.subtraction:{x-y}')
```
-Using a context interface to create and manage the `QConnection`
+Firstly you can call the function `#!python .test.addition` directly:
```python
-with kx.SyncQConnection('localhost', 5001) as q:
- q(...) # Make some queries
-# QConnection is automatically closed here
+>>> with kx.SyncQConnection(port=5050) as conn:
+... print(conn.test.addition(4, 2))
+6
```
-## Performance Considerations
+Next you can call the function `#!python .test.namespace.subtraction` which uses a nested namespace:
+
+```python
+>>> with kx.SyncQConnection(port=5050) as conn:
+... print(conn.test.namespace.subtraction(4, 2))
+2
+```
+
+In the case that you do not have access to a named function/namespace you will receive an `#!python AttributeError`:
+
+```python
+>>> with kx.SyncQConnection(port=5050) as conn:
+... print(conn.test.unavailable(4, 2))
+AttributeError: 'pykx.ctx.QContext' object has no attribute 'unavailable'
+QError: '.test.unavailable
+```
+
+For more information on the context interface and how to use your q code Python first see [here](context_interface.md)
+
+### Run a local Python function on a server
+
+While not explicitly part of the IPC module of PyKX the ability to run your local Python functions on remote servers makes use of the IPC logic provided by PyKX heavily. Outlined in full detail [here](remote-functions.md), this functionality works by sending across to your server instructions to import relevant libraries, evaluate the function being run and pass data to this function for execution.
-When querying [`pykx.Table`][] instances on the remote process you should avoid directly calling the table object as
-that will result in the entirety of the table being sent over IPC and then loaded within the `Python` process.
-You should ensure that when querying tables over IPC that you are applying sufficient filters to your query,
-so that you limit the amount of data being converted and transfered between processes.
+In the examples below we can see the registration and use of these functions in practice where the `#!python kx.remote.session` objects are a form of IPC connection. In each case the function is defined in your local session but executed remotely:
-## Execution Contexts
+=== "Zero argument function"
-Functions pulled in over IPC execute locally within PyKX by default using embedded q.
-[Symbolic functions][pykx.SymbolicFunction] can be used to execute in a different context instead,
-such as over IPC in the q instance where the function was originally defined. The
-[context interface](../../api/pykx-execution/ctx.md) provides symbolic functions for all functions accessed through it by
-default.
+ ```python
+ >>> import pykx as kx
+ >>> session = kx.remote.session(host='localhost', port=5050)
+ >>> @kx.remote.function(session)
+ ... def zero_arg_function():
+ ... return 10
+ >>> zero_arg_function()
+ pykx.LongAtom(pykx.q('10'))
+ ```
-In the following example, `q` is a [`pykx.QConnection`][] instance.
+=== "Single argument function"
-The following call to the q function [`save`](../../api/pykx-execution/q.md#save) executes locally using embedded q,
-because `q('save')` returns a regular [`pykx.Function`][] object.
+ ```python
+ >>> import pykx as kx
+ >>> session = kx.remote.session(host='localhost', port=5050)
+ >>> @kx.remote.function(session)
+ ... def single_arg_function(x):
+ ... return x+10
+ >>> single_arg_function(10)
+ pykx.LongAtom(pykx.q('20'))
+ ```
+
+=== "Multi argument function"
+
+ ```python
+ >>> import pykx as kx
+ >>> session = kx.remote.session(host='localhost', port=5050)
+ >>> @kx.remote.function(session)
+ ... def multi_arg_function(x, y):
+ ... return x+y
+ >>> multi_arg_function(10, 20)
+ pykx.LongAtom(pykx.q('30'))
+ ```
+
+## Reconnect to a kdb+ server
+
+When a server with active connections becomes unavailable, restarts, or suffers an outage, all active connections will need to reconnect whenever the server recovers. This could mean closing an existing stale connection and reconnecting using the same credentials.
+
+PyKX allows you to manually configure reconnection attempts for clients connecting to servers using the #!python reconnection_attempts keyword argument. The following example shows the output of when attempting to make use of a connection which has been cancelled and is subsequently re-established:
+
+```python
+>>> conn = kx.SyncQConnection(port=5050, reconnection_attempts=5)
+>>> conn('1+1') # after this call the server on port 5050 is shutdown for 2 seconds
+pykx.LongAtom(pykx.q('2'))
+>>> conn('1+2')
+WARNING: Connection lost attempting to reconnect.
+Failed to reconnect, trying again in 0.5 seconds.
+Failed to reconnect, trying again in 1.0 seconds.
+Connection successfully reestablished.
+pykx.LongAtom(pykx.q('3'))
+```
+
+While configuring `reconnection_attempts` allows you to perform an exponential backoff starting with a delay of 0.5 seconds and multiplying by 2 at each attempt for users wishing to have more control over how reconnection attempts are processed can modify the following keywords
+
+- `reconnection_delay`: The initial delay between the first and second reconnection attempts
+- `reconnection_function`: The function/lambda which is used to change the delay between reconnections
+
+As an example take the following where connection which when created sets a delay of 1 second between each connection attempt
```python
-with kx.SyncQConnection('localhost', 5001) as q:
- q('save')('t') # Executes locally within Embedded q
+>>> conn = kx.SyncQConnection(port=5050, reconnection_attempts=5, reconnection_delay=1, reconnection_function=lambda x:x)
+>>> conn('1+1') # after this call the server on port 5050 is shutdown for 3 seconds
+pykx.LongAtom(pykx.q('2'))
+>>> conn('1+2')
+WARNING: Connection lost attempting to reconnect.
+Failed to reconnect, trying again in 1.0 seconds.
+Failed to reconnect, trying again in 1.0 seconds.
+Failed to reconnect, trying again in 1.0 seconds.
+Connection successfully reestablished.
+pykx.LongAtom(pykx.q('3'))
```
-When [`save`](../../api/pykx-execution/q.md#save) is accessed through the [context interface](../../api/pykx-execution/ctx.md), it is a
-[`pykx.SymbolicFunction`][] object instead, which means it is simultaneously an instance of
-[`pykx.Function`][] and [`pykx.SymbolAtom`][]. When it is executed, the function retrived within
-its execution context using its symbol value, and so it is executed in the q server where
-[`save`](../../api/pykx-execution/q.md#save) is defined.
+To read more about reconnection options see the parameters of the [`kx.SyncQConnection`](../../api/ipc.md#pykx.ipc.SyncQConnection) class in the API documentation [here](../../api/ipc.md#pykx.ipc.SyncQConnection).
+
+## Execute a file on a server
+
+In addition to executing code remotely via explicit calls to various [`kx.SyncQConnection`]((../../api/ipc.md#pykx.ipc.SyncQConnection) instances, you can also pass the name of a locally available file to these instances for remote execution. This allows you to package larger code updates as q files for reuse/persistence locally while testing against a remote process.
+
+This is possible provided that the file contains all necessary logic for execution, or the server has the required libraries and associated files to support the execution. In the below examples we will use a file created locally called `file.q` which can be generated as follows:
```python
-with kx.SyncQConnection('localhost', 5001) as q:
- q.save('t') # Executes in the q server over IPC
+>>> with open('file.q', 'w') as file:
+... file.write('''
+... .test.namespace.variable:2;
+... .test.namespace.function:{x+y};
+... ''')
```
-Alternatively, one can simply access & use the function by name manually within a single query.
-This differs from the first case because the query includes the argument for [`save`](../../api/pykx-execution/q.md#save),
-and so what is returned is the result of calling [`save`](../../api/pykx-execution/q.md#save) with the argument `t`,
-rather than the [`save`](../../api/pykx-execution/q.md#save) function itself.
+Here's an example of how to use this functionality on both a synchronous and asynchronous use case.
+
+
+=== "Synchronous"
+
+ ```python
+ >>> with kx.SyncQConnection(port = 5050) as q:
+ ... q.file_execute('file.q')
+ ... ret = q('.test.namespace.variable')
+ >>> ret.py()
+ 2
+ ```
+=== "Asynchronous"
+
+ ```python
+ >>> async with kx.AsyncQConnection('localhost', 5050) as q:
+ ... q.file_execute('file.q')
+ ... ret = await q('.test.namespace.function')
+ >>> ret
+ pykx.Lambda(pykx.q('{x+y}'))
+ ```
+
+
+To read more about the file execution API functionality see [here](../../api/ipc.md#pykx.ipc.QConnection.file_execute).
+
+## Communicate asynchronously
+
+When talking about asynchronous communication between `#!python Python` and `#!python q` there are two ways this can be interpreted, we will deal with these cases separately.
+
+1. Attempting to send Asynchronous messages to a `#!python q` processes which don't expect a response
+2. Integrating IPC workflows with Python's `#!python asyncio` library
+
+### Send messages without expecting a response
+
+To send messages to a q process without a response you do _not_ need to use a [`kx.AsyncQConnection`](../../api/ipc.md#pykx.ipc.AsyncQConnection) instance, sending messages to a q process without anticipation of response is facilitated through the `#!python wait` keyword which should be set to `#!python False` in the case you are not expecting a response from the q server. Calls made with this keyword set will return `#!python pykx.Identity` objects
```python
-with kx.SyncQConnection('localhost', 5001) as q:
- q('save', 't') # Executes in the q server over IPC
+>>> with kx.SyncQConnection('localhost', 5050) as q:
+... ret = q('1+1', wait=False)
+>>> ret
+pykx.Identity(pykx.q('::'))
```
-## Asynchronous Execution
+### Integrate with Python Async libraries
-In order to make asynchronous queries to `q` with `PyKX` a [`pykx.AsyncQConnection`][] must be used. When an
-instance of an [`pykx.AsyncQConnection`][] is called the query will be sent to the `q` server and control
-will be immediately handed back to the running Python program. The `__call__` function returns a
-[`pykx.QFuture`][] instance that can later be awaited on to block until a result has been received.
+To make integrate with Python's async libraries such as `#!python asyncio` with `#!python PyKX`, you must use a [`kx.AsyncQConnection`](../../api/ipc.md#pykx.ipc.AsyncQConnection). When calling an instance of an [`kx.AsyncQConnection`](../../api/ipc.md#pykx.ipc.AsyncQConnection), the query is sent to the `#!python q` server and control is immediately handed back to the running Python program. The `#!python __call__` function returns a [`kx.QFuture`](../../api/ipc.md##pykx.ipc.QFuture) instance that can later be awaited on to block until it receives a result.
-If you are using a third party library that runs an eventloop to manage asynchronous calls, you must ensure
-you use the `event_loop` keyword argument to pass the event loop into the [`pykx.AsyncQConnection`][] instance.
-This will allow the eventloop to properly manage the returned [`pykx.QFuture`][] objects.
+If you're using a third-party library that runs an eventloop to manage asynchronous calls, ensure you use the `#!python event_loop` keyword argument to pass the event loop into the [`kx.AsyncQConnection`](../../api/ipc.md#pykx.ipc.AsyncQConnection) instance. This allows the eventloop to properly manage the returned [`kx.QFuture`](../../api/ipc.md##pykx.ipc.QFuture) objects and its lifecycle.
```python
async with kx.AsyncQConnection('localhost', 5001, event_loop=asyncio.get_event_loop()) as q:
@@ -126,9 +374,7 @@ async with kx.AsyncQConnection('localhost', 5001, event_loop=asyncio.get_event_l
await fut # await the future object to get the result
```
-If you are using an [`pykx.AsyncQConnection`][] to make q queries that respond in a [deferred manner](https://code.kx.com/q/basics/ipc/#async-message-set)
-, you must make the call using the `reuse=False` parameter. By using this parameter the query will be made over
-a dedicated [`pykx.AsyncQConnection`][] instance that is closed upon the result being received.
+If you're using a [`kx.AsyncQConnection`](../../api/ipc.md#pykx.ipc.AsyncQConnection) to make q queries that respond in a [deferred manner](https://code.kx.com/q/basics/ipc/#async-message-set), you must make the call using the `#!python reuse=False` parameter. This parameter helps to make the query over a dedicated `#!python pykx.AsyncQConnection` instance that is closed upon the result being received.
```python
async with kx.AsyncQConnection('localhost', 5001, event_loop=asyncio.get_event_loop()) as q:
@@ -136,36 +382,58 @@ async with kx.AsyncQConnection('localhost', 5001, event_loop=asyncio.get_event_l
await fut # await the future object to get the result
```
-## File Execution
+## Create your own IPC Server using PyKX
-In addition to the ability to execute code remotely using explicit calls to the various [`pykx.QConnection`][] instances, it is also possible to pass to these instances the name of a file available locally which can be executed on the remote server. This is supported under the condition that the file being executed remotely contains all of the required logic to be executed, or the server contains sufficient libraries and associated files to allow execution to occur.
+There are several cases where providing the ability for users to open IPC connections to Python processes via the q native IPC protocol provides advantages. In particular if you are looking to manage infrastructure in Python which kdb+ users are likely to communicate with using q.
-The following provide and example of the usage of this functionality on both a syncronous and asyncronous use-case.
+The [`server.py`](scripts/server.py) file that you may have called at the start of this page makes use of this functionality and specifically uses a [`kx.RawQConnection`](../../api/ipc.md#pykx.ipc.RawQConnection) to allow connections to be made, this script is defined in plain text as follows:
```python
-with kx.SyncQConnection(port = 5000) as q:
- q.file_execute('/absolute/path/to/file.q')
- ret = q('.test.variable.set.in.file.q', return_all=True)
-```
+import asyncio
+import sys
-```python
-async with kx.AsyncQConnection('localhost', 5001) as q:
- q.file_execute('../relative/path/to/file.q')
- ret = await q('.test.variable.set.in.file.q')
-```
-## Reconnecting to a kdb+ server
+import pykx as kx
-When generating a client-server architecture it is often the case that for short periods of time your server may be inaccessible due to network issues or planned outages. At such times clients connected to these servers will need to reconnect, this may require them to manually 'close' their existing stale connection and reconnect using the same credentials to the now restarted server. From PyKX 2.4+ the ability to manually configure reconnection attempts for clients connecting to servers has been added via the addition of the `reconnection_attempts` keyword argument. The following example shows the output of when attempting to make use of a connection which has been cancelled and is subsequently re-established:
+port = 5010
+if len(sys.argv)>1:
+ port = int(sys.argv[1])
-```python
->>> conn = kx.SyncQconnection(port=5050, reconnection_attempts=5)
->>> conn('1+1') # after this call the server on port 5050 is shutdown for 2 seconds
-pykx.LongAtom(pykx.q('2')
->>> conn('1+2')
-WARNING: Connection lost attempting to reconnect.
-Failed to reconnect, trying again in 0.5 seconds.
-Failed to reconnect, trying again in 1.0 seconds.
-Connection successfully reestablished.
-pykx.LongAtom(pykx.q('3'))
+
+def qval_sync(query):
+ res = kx.q.value(query)
+ print("sync")
+ print(f'{query}\n{res}\n')
+ return res
+
+
+def qval_async(query):
+ res = kx.q.value(query)
+ print("async")
+ print(f'{query}\n{res}\n')
+
+
+async def main():
+ kx.q.z.pg = qval_sync
+ kx.q.z.ps = qval_async
+ kx.q('@[system"l ",;"s.k_";{show "Failed to load SQL"}]')
+ kx.q('tab:([]1000?`a`b`c;1000?1f;1000?10)')
+ async with kx.RawQConnection(port=port, as_server=True, conn_gc_time=20.0) as q:
+ print('Server Initialized')
+ while True:
+ q.poll_recv()
+
+
+if __name__ == "__main__":
+ asyncio.run(main())
```
+
+Notably the definition of [`kx.RawQConnection`](../../api/ipc.md#pykx.ipc.RawQConnection) uses the keyword `#!python as_server=True` to indicate that it should anticipate external connections, and the tight while loop running `#!python q.poll_recv` will manage the execution of incoming queries. It is also worth noting that in the definition of the `#!python main` function that you can set and specify both the `#!python kx.q.z.pg` and `#!python kx.q.z.ps` functions which manage how messages are handled in synchronous and asynchronous cases.
+
+For a full breakdown on `#!python kx.RawQConnection` type connections see [here](../../api/ipc.md#pykx.ipc.RawQConnection)
+
+## Next Steps
+
+- [Deep dive into how to execute Python functions remotely](remote-functions.md)
+- [Create your first database](database/db_gen.md)
+- [Query data using Python](../fundamentals/query/pyquery.md)
diff --git a/docs/user-guide/advanced/license.md b/docs/user-guide/advanced/license.md
new file mode 100644
index 0000000..0cfb9db
--- /dev/null
+++ b/docs/user-guide/advanced/license.md
@@ -0,0 +1,98 @@
+---
+title: Manage your license
+description: Tips and tricks for managing licenses
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, license, licenses
+---
+
+# Manage your license
+
+_Tips and tricks for managing licenses_
+
+In the [install page](../../getting-started/installing.md) you can follow along with how to install your first license, this is an important first step for you to get the most out of the functionality provided by PyKX. However, there are a number of cases where upgrading this license will be necessary:
+
+1. Your license has expired
+2. You need to upgrade from a personal to enterprise/commercial license
+
+## When will your license expire?
+
+For personal edition licenses your license will expire one year after initial download, for enterprise/commercial licenses the expiry date will vary. To provide some forewarning of when your license will expire PyKX provides the following:
+
+- For the 10 days before expiry PyKX will print a message at start up indicating the license will expire in N days
+- The utility function `#!python kx.license.expires` provides a programmatic method of finding days to expiry.
+
+As an example the following shows you both in action:
+
+=== "Expiring soon"
+
+ ```python
+ >>> import pykx as kx
+ WARNING:root:PyKX license set to expire in 8 days, please consider installing an updated license
+ ```
+
+=== "Checking expiry"
+
+ ```python
+ >>> import pykx as kx
+ >>> kx.license.expires()
+ 8
+ ```
+
+## Update a license
+
+When your license is expired/expiring you will need to update it to ensure you can continue to use the software.
+
+There are three methods by which updating your license is possible with PyKX.
+
+- You have allowed your license to expire and on restart of PyKX you will be presented with an new license install walkthrough similar to [installing](../../getting-started/installing.md).
+- You pre-emptively install a newly downloaded license using `#!python kx.license.install`.
+
+=== "After Expiry"
+
+ Now that your license has expired importing PyKX will result in the following walkthrough being presented, following this will allow you to install a new license.
+
+ ```python
+ >>> import pykx as kx
+ Your PyKX license has now expired.
+
+ Captured output from initialization attempt:
+ '2023.10.18T13:27:59.719 licence error: exp
+
+ License location used:
+ /usr/local/anaconda3/pykx/kc.lic
+
+ Would you like to renew your license? [Y/n]: Y
+
+ Do you have access to an existing license for PyKX that you would like to use? [N/y]:
+ ```
+
+=== "Pre-emptive install"
+
+ If you have downloaded your new license prior to expiry you can install it with `#!python kx.license.install`.
+
+ - Install an updated `kc.lic` license from a file
+
+ ```python
+ >>> import pykx as kx
+ >>> kx.license.install('/tmp/new/location/kc.lic', force=True)
+ ```
+
+ - Install an updated `k4.lic` license from the base64 license key
+
+ ```python
+ >>> import pykx as kx
+ # String truncated for display purposes
+ >>> b64key = 'dajsi8d9asnhda8sld..'
+ >>> kx.license.install(b64key,
+ ... format='STRING',
+ ... license_type='k4.lic',
+ ... force=True)
+ ```
+
+## Upgrade to a commercial license
+
+If you are currently using a `kc.lic` personal license and need to upgrade to a `k4.lic` license the following steps allow you to ensure this can be done effectively.
+
+1. Delete your existing `kc.lic` license.
+2. Install your new license following the [license installation](../../getting-started/installing.md)
diff --git a/docs/user-guide/advanced/modes.md b/docs/user-guide/advanced/modes.md
index bd2eb7d..69c70ec 100644
--- a/docs/user-guide/advanced/modes.md
+++ b/docs/user-guide/advanced/modes.md
@@ -1,52 +1,66 @@
-# Modes of Operation
+---
+title: PyKX modes of operation
+description: Operating PyKX in Python and q
+date: June 2024
+author: KX Systems, Inc.,
+tags: PyKX, q language, Python, licensed, unlincensed,
+---
-PyKX exists to supersede all previous interfaces between q and Python, this document outlines the various conditions under which PyKX can operate and the limitations/requirements which are imposed under these distinct operating modalities, specifically this document breaks down the following:
+# Modes of operation
-- PyKX within a Python session
- - Operating with a valid KX License
- - Operating in the absence of a valid KX License
-- PyKX within a q session with a valid KX License
+_This page explains how to operate PyKX in Python and q, with or without a KDB Insights license._
-## PyKX within a Python session
+PyKX can operate in different modes. Each mode has its limitations and requirements, so we're breaking them down into the following:
-PyKX operating within a Python session is intended to offer a replacement for [qPython](https://github.com/exxeleron/qPython) and [PyQ](https://github.com/kxsystems/pyq). In order to facilitate replacement of qPython PyKX provides a mode of operation for IPC based communication which allows for the creation of IPC connections and the conversion of data from Pythonic representations to kx objects, this IPC only modality is referred to as `"Unlicensed mode"` within the documentation. The following outline the differences between `"Licensed"` and `"Unlicensed"` operation.
+1. PyKX within Python
+ - 1.a Unlicensed
+ - 1.b Licensed
+2. PyKX within q with a kdb Insights license
-The following table outlines some of the key differences between the two operating modes
+## 1. PyKX within Python
-| Feature | With a PyKX Enabled License | Without a PyKX Enabled License |
-|------------------------------------------------------------------------------|-----------------------------|--------------------------------|
-| Convert objects from q to Pythonic types and vice-versa | :material-check: | :material-check: |
-| Query synchronously and asynchronously an existing q server via IPC | :material-check: | :material-check: |
-| Query synchronously and asynchronously an existing q server with TLS enabled | :material-check: | :material-close: |
-| Interact with PyKX tables via a Pandas like API | :material-check: | :material-close: |
-| Can run arbitrary q code within a Python session | :material-check: | :material-close: |
-| Display PyKX/q objects within a Python session | :material-check: | :material-close: |
-| Load kdb+ Databases within a Python session | :material-check: | :material-close: |
-| Can read/write JSON, CSV and q formats to/from disk | :material-check: | :material-close: |
-| Access to Python classes for SQL, schema creation custom data conversion | :material-check: | :material-close: |
-| Run Python within a q session using PyKX under q | :material-check: | :material-close: |
-| Full support for nulls, infinities, data slicing and casting | :material-check: | :material-close: |
-| Production Support | :material-check: | :material-close: |
+The purpose of operating PyKX within a Python session is to replace [qPython](https://github.com/exxeleron/qPython) and [PyQ](https://github.com/kxsystems/pyq). Within Python, PyKX has two modes of operation:
-### Operating in the absence of a KX License
+- `#!python Licensed` (this means you have a kdb Insights license with PyKX enabled)
+- `#!python Unlicensed` (this means you don't have a kdb Insights license or a license in which PyKX is not enabled)
-Unlicensed mode is a feature-limited mode of operation for PyKX which aims to replace qPython, which has the benefit of not requiring a valid q license (except for the q license required to run the remote q process that PyKX will connect to in this mode).
+The main difference between the two is that the `#!python Unlicensed` mode is for IPC-based communication. This mean that it allows to create IPC connections and convert data from Pythonic representations to PyKX objects.
-This mode cannot run q embedded within it, and so it lacks the ability to run q code within the local Python process, and also every feature that depends on running q code. Despite this limitation, it provides the following features (which are all also available in licensed mode):
+The following table outlines more key differences:
-- Conversions from Python to q
- - With the exception of Python callable objects
+| **Feature** | **Licensed** | **Unlicensed** |
+| :--------------------------------------------------------------------------- | :-------------------- | :--------------------- |
+| Convert objects from q to Pythonic types and vice-versa | :material-check: | :material-check: |
+| Query synchronously and asynchronously a q server via IPC | :material-check: | :material-check: |
+| Query synchronously and asynchronously a q server with TLS enabled | :material-check: | :material-close: |
+| Interact with PyKX tables via a Pandas like API | :material-check: | :material-close: |
+| Run arbitrary q code within a Python session | :material-check: | :material-close: |
+| Display PyKX/q objects within a Python session | :material-check: | :material-close: |
+| Load kdb+ Databases within a Python session | :material-check: | :material-close: |
+| Read/write JSON, CSV and q formats to/from disk | :material-check: | :material-close: |
+| Access to Python classes for SQL, schema creation, custom data conversion | :material-check: | :material-close: |
+| Run Python within a q session using PyKX under q | :material-check: | :material-close: |
+| Full support for nulls, infinities, data slicing and casting | :material-check: | :material-close: |
+| Production support | :material-check: | :material-close: |
+
+### 1.a Running in Unlicensed mode
+
+Unlicensed mode is a feature-limited mode of operation for PyKX. Its aim is to replace qPython, which has the benefit of not requiring a valid q license (except for the q license required to run the remote q process that PyKX connects to in this mode).
+
+This mode cannot run q embedded within it. Also, it lacks the ability to run q code within the local Python process or any functionality that depends on running q code. Despite this limitation, it provides the following features (which are all also available in licensed mode):
+
+- Conversions from Python to q, except Python-callable objects
- Conversions from q to Python
- [A q IPC interface](../../api/ipc.md)
-### Operating with a valid KX License
+### 1.b Running in Licensed mode
-Licensed mode is the standard mode of operation of PyKX, wherein it is running under a Python process [with a valid q license](../../getting-started/installing.md#licensing-code-execution-for-pykx). This modality aims to replace PyQ as the Python first library for KX. All PyKX features are available in this mode.
+Licensed mode is the standard way to operate PyKX, wherein it's running under a Python process [with a valid q license](../../getting-started/installing.md#licensing-code-execution-for-pykx). This modality aims to replace PyQ as the Python-first library for KX. All PyKX features are available in this mode.
-The following are the differences provided through operation with a valid KX License
+The differences provided through operating with a valid kdb Insights license are:
-1. Users can execute PyKX/q functionality directly within a Python session
-2. PyKX objects can be represented in a human readable format rather than as a memory address, namely
+1. You can execute PyKX/q functionalities directly within a Python session.
+2. PyKX objects can be represented in a human readable format rather than as a memory address, namely:
=== "Licensed mode"
@@ -58,6 +72,7 @@ The following are the differences provided through operation with a valid KX Lic
0 8c6b8b64-6815-6084-0a3e-178401251b68
1 5ae7962d-49f2-404d-5aec-f7c8abbae288
2 5a580
+ '))
```
=== "Unlicensed mode"
@@ -67,7 +82,7 @@ The following are the differences provided through operation with a valid KX Lic
pykx.Table._from_addr(0x7f5b72ef8860)
```
-3. PyKX objects can be introspected through indexing
+3. You can analyze PyKX objects through indexing:
=== "Licensed mode"
@@ -92,7 +107,7 @@ The following are the differences provided through operation with a valid KX Lic
pykx.exceptions.LicenseException: A valid q license must be in a known location (e.g. `$QLIC`) to index into K object.
```
-4. Users can cast between kx object types explicitly
+4. Licensed mode allows users to cast between PyKX object types. Unlicensed mode doesn't support this, showing an error as below:
=== "Licensed mode"
@@ -117,37 +132,41 @@ The following are the differences provided through operation with a valid KX Lic
return toq(x, ktype=None if cls is K else cls, cast=cast) # TODO: 'strict' and 'cast' flags
File "pykx/toq.pyx", line 2543, in pykx.toq.ToqModule.__call__
File "pykx/toq.pyx", line 470, in pykx.toq.from_pykx_k
- pykx.exceptions.LicenseException: A valid q license must be in a known location (e.g. `$QLIC`) to directly convert between K types..
+ pykx.exceptions.LicenseException: A valid q license must be in a known location (e.g. `$QLIC`) to directly convert between K types.
```
-5. Access to the following classes/functionality are supported when running in the licensed modality but not unlicensed, note this is not an exhaustive list
+5. Only licensed mode supports the classes/functionalities below. This is not an exhaustive list:
1. kx.q.sql
2. kx.q.read
3. kx.q.write
4. kx.q.schema
5. kx.q.console
-6. [Pandas API](Pandas_API.ipynb) functionality for interactions with and PyKX Table objects
-6. Keyed tables can be converted to equivalent Numpy types
+6. [Pandas API](Pandas_API.ipynb) functionality for interactions with and PyKX Table objects.
+6. You can convert keyed tables to equivalent Numpy types.
7. All types can be disambiguated, generic null can be discerned from a projection null, and similar for regular vs splayed tables.
-8. Numpy list object conversion when operating with a valid PyKX license are optimized relative to unlicensed mode.
-9. The `is_null`, `is_inf`, `has_nulls`, and `has_infs` methods of `K` objects are only supported when using a license.
+8. Numpy list object conversion is optimized only in licensed mode.
+9. Only licensed mode grants users access to the `#!python is_null`, `#!python is_inf`, `#!python has_nulls`, and `#!python has_infs` methods of `#!python K` objects.
+
+### How to choose between Licensed and Unlicensed
-### Choosing to run with/without a license
+You can choose to initialise PyKX under one of these modes through the use of the `#!python QARGS` environment variable as follows:
-Users can choose to initialise PyKX under one of these modalities explicitly through the use of the `QARGS` environment variable as follows:
+| **Mode argument** | **Description** |
+| :---------------- | :--------------------------------------------------------------------------------------------------------------------------------------- |
+| `--unlicensed` | Starts PyKX in unlicensed mode. No license check is performed, and no warning is emitted at start-up if embedded q initialization fails. |
+| `--licensed` | Raises a `PyKXException` (as opposed to emitting a `PyKXWarning`) if embedded q initialization fails. |
-| Modality argument| Description|
-|------------------|----------|
-| `--unlicensed` | Starts PyKX in unlicensed mode. No license check will be performed, and no warning will be emitted at start-up if embedded q initialization fails. |
-| `--licensed` | Raise a `PyKXException` (as opposed to emitting a `PyKXWarning`) if embedded q initialization fails.
+In addition to the PyKX specific start-up arguments, you can also use `#!python QARGS` to set the standard [q command-line arguments](https://code.kx.com/q/basics/cmdline/).
-In addition to the PyKX specific start-up arguments `QARGS` also can be used to set the standard [q command-line arguments](https://code.kx.com/q/basics/cmdline/).
+Alternatively, if you wish to access PyKX in unlicensed mode, you set the environment variable `#!python PYKX_UNLICENSED="true"` or define this in your `#!python .pykx-config` file as outlined [here](../configuration.md).
-Alternatively for users who wish to make use of PyKX in unlicensed mode they can set the environment variable `PYKX_UNLICENSED="true"` or define this in their `.pykx-config` file as outlined [here](../configuration.md).
+## 2. PyKX within q
-## PyKX within a q session
+Fully described [here](../../pykx-under-q/intro.md), the ability to use PyKX within a q session allows you to achieve the following:
-Fully described [here](../../pykx-under-q/intro.md) the ability to use PyKX within a q session directly is intended to provide the ability to replace [embedPy](https://github.com/kxsystems/embedpy) functionally with an updated and more flexible interface. Additionally it provides the ability to use Python functionality within a q environment which does not have the central limitations that exist for PyKX as outlined [here](limitations.md), namely Python code can be used in conjunction with timers and subscriptions within a q/kdb+ ecosystem upon which are reliant on these features of the language.
+- Replace [embedPy](https://github.com/kxsystems/embedpy) functionally with an updated, more flexible interface.
+- Use Python within a q environment without the [limitations for PyKX](../../help/issues.md).
+- Use Python code in conjunction with timers and subscriptions within a q/kdb+ ecosystem.
-Similar to the use of PyKX in it's licensed modality PyKX running under q requires a user to have access to an appropriate license containing the `insights.lib.pykx` and `insights.lib.embedq` licensing flags.
+Similar to the use of PyKX in licensed mode, PyKX running under q requires a user to have access to an appropriate license containing the `#!python insights.lib.pykx` and `#!python insights.lib.embedq` licensing flags.
diff --git a/docs/user-guide/advanced/numpy.md b/docs/user-guide/advanced/numpy.md
index cea734a..cf49338 100644
--- a/docs/user-guide/advanced/numpy.md
+++ b/docs/user-guide/advanced/numpy.md
@@ -1,56 +1,69 @@
-# Numpy Integration
+---
+title: NumPy Integration
+description: Integrate PyKX with NumPy
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, NumPy
+---
-PyKX is designed for advanced integration with Numpy.
-This integration is built on three pillars: [NEP-49](https://numpy.org/neps/nep-0049.html), the Numpy [array interface](https://numpy.org/doc/stable/reference/arrays.interface.html), and [universal functions](https://numpy.org/doc/stable/reference/ufuncs.html).
+# NumPy Integration
+_This page explains how to integrate PyKX with NumPy._
+
+PyKX is designed for advanced integration with NumPy. This integration is built on three pillars:
+
+- [NEP-49](https://numpy.org/neps/nep-0049.html)
+- the NumPy [array interface](https://numpy.org/doc/stable/reference/arrays.interface.html)
+- [universal functions](https://numpy.org/doc/stable/reference/ufuncs.html)
## Support for NEP-49 and 0-copy data transfer from Numpy to q (when possible)
-To use NEP-49 and benefit from 0-copy data transfers from Numpy to q, the following environment variable needs to be set before importing PyKX: `PYKX_ALLOCATOR=1`.
-Once enabled, PyKX leverages NEP-49 to replace Numpy's memory allocator with the q/k memory allocator. This makes Numpy arrays directly available to q (by passing only a pointer) and accelerates the conversion time from Numpy arrays to q significantly.
+To use NEP-49 and benefit from 0-copy data transfers from NumPy to q, you need to set the `#!python PYKX_ALLOCATOR=1` environment variable before importing PyKX.
+Once enabled, PyKX leverages NEP-49 to replace NumPy's memory allocator with the q/k memory allocator. This makes NumPy arrays directly available to q (by passing only a pointer) and accelerates the conversion time from NumPy arrays to q significantly.
-Without NEP-49 (`PYKX_ALLOCATOR=0`):
+Without NEP-49 (`#!python PYKX_ALLOCATOR=0`):
```python
In [1]: arr = np.random.rand(1000000)
In [2]: %timeit kx.toq(arr)
421 µs ± 9.42 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
```
-With NEP-49 (`PYKX_ALLOCATOR=1`):
+With NEP-49 (`#!python PYKX_ALLOCATOR=1`):
```python
In [1]: arr = np.random.rand(1000000)
In [2]: %timeit kx.toq(arr)
5.4 µs ± 150 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
```
-In the example above, transferring a Numpy array of one million `float64` numbers runs 80x faster with NEP-49 enabled (`PYKX_ALLOCATOR=1`).
+In the example above, transferring a NumPy array of one million `#!python float64` numbers runs 80x faster with NEP-49 enabled (`#!python PYKX_ALLOCATOR=1`).
!!! Note
- With NEP-49 enabled, 0-copy data transfer will happen for the following target q types: booleans (`1h`), bytes (`4h`), shorts (`5h`), integers (`6h`), longs (`7h`), reals (`8h`), floats (`9h`), timespan (`16h`), minutes (`17h`), seconds (`18h`) and times (`19h`).
+ With NEP-49 enabled, 0-copy data transfer happens for the following target q types: booleans (`1h`), bytes (`4h`), shorts (`5h`), integers (`6h`), longs (`7h`), reals (`8h`), floats (`9h`), timespan (`16h`), minutes (`17h`), seconds (`18h`) and times (`19h`).
- A data copy will happen for the following target q types: guids (`2h`), chars (`10h`), symbols (`11h`), timestamps (`12h`), months (`13h`) and dates (`14h`).
+ A data copy happens for the following target q types: guids (`2h`), chars (`10h`), symbols (`11h`), timestamps (`12h`), months (`13h`) and dates (`14h`).
+
+## Support for NumPy array interface and universal functions on pykx/q vectors
-## Support for Numpy array interface and universal functions on pykx/q vectors
+PyKX vectors implement the NumPy array interface and are compatible with universal functions. This means all those NumPy functions (and more) can be used directly on PyKX vectors and hence, on q vectors.
-PyKX vectors implement the Numpy array interface and are compatible with universal functions. This means all those Numpy functions (and more) can be used directly on PyKX vectors and hence, on q vectors.
-The following are a number of useful links relating to universal functions which can be leveraged using this:
+Here are several helpful links related to universal functions that you can use with this:
-* [Numpy universal functions](https://numpy.org/doc/stable/reference/ufuncs.html#available-ufuncs)
+* [NumPy universal functions](https://numpy.org/doc/stable/reference/ufuncs.html#available-ufuncs)
* [Scipy universal functions](https://docs.scipy.org/doc/scipy/reference/special.html#available-functions)
-* [CuPy universal functions (GPU)](https://docs.cupy.dev/en/stable/reference/ufunc.html) (q vectors will be transferred to GPU)
+* [CuPy universal functions (GPU)](https://docs.cupy.dev/en/stable/reference/ufunc.html) (transfers q vectors to GPU)
* [Custom universal functions with Numba](https://numba.readthedocs.io/en/stable/user/vectorize.html)
* [Custom universal functions with C++ and the Boost library](https://www.boost.org/doc/libs/1_65_1/libs/python/doc/html/numpy/tutorial/ufunc.html)
-## A little experiment with universal functions
+## Experiment with universal functions
-Let's take the Greater Common Divisor problem (GCD) to compare different implementations using Python, q, and some custom universal functions.
+Let's take the Greater Common Divisor problem (GCD) to compare different implementations using Python, q, and custom universal functions.
The script below implements 5 different solutions for GCD calculation:
-* `qgcd`: Naive q implementation, process one pair of integers at a time.
-* `qgcd2`: q vectorized implementation.
-* `gcd`: Naive python implementation, process one pair of integers at a time.
-* `gcd2`: Custom `ufunc` vectorized and compiled JIT with Numba.
-* `gcd3`: Custom `ufunc` vectorized, parallelized on all cores and compiled JIT with Numba.
+* `#!python qgcd`: Naive q implementation, process one pair of integers at a time.
+* `#!python qgcd2`: q vectorized implementation.
+* `#!python gcd`: Naive python implementation, process one pair of integers at a time.
+* `#!python gcd2`: Custom `#!python ufunc` vectorized and compiled JIT with Numba.
+* `#!python gcd3`: Custom `#!python ufunc` vectorized, parallelized on all cores and compiled JIT with Numba.
```python
import numpy as np
@@ -94,7 +107,7 @@ qa = kx.toq(a)
qb = kx.toq(b)
```
-We can use IPython to load this script and benchmark the different implementations with `%timeit`. We will also compare to `np.gcd`, the Numpy ufunc for GCD calculation.
+We can use IPython to load this script and benchmark the different implementations with `#!python %timeit`. We will also compare to `#!python np.gcd`, the NumPy ufunc for GCD calculation.
```bash
$ PYKX_ALLOCATOR=1 ipython -i test_numpy_ufuncs.py
diff --git a/docs/user-guide/advanced/performance.md b/docs/user-guide/advanced/performance.md
index 535bedb..ea38f75 100644
--- a/docs/user-guide/advanced/performance.md
+++ b/docs/user-guide/advanced/performance.md
@@ -1,24 +1,47 @@
-# Performance considerations
-
-To get the best performance out of PyKX, follow the guidelines explained on this page. Note that this page doesn't concern itself with getting the best performance out of Python itself, or out of q itself. Rather this page is focused on how to interface between the two most efficiently.
-
-- Avoid converting K objects with their `.py`/`.np`/`.pd`/etc. methods. Oftentimes the K object itself is sufficient for the task at hand.
-- **Do as little work as necessary:**
- - When conversion is necessary, only convert what is really needed. For instance, instead of converting an entire q table to a dataframe, perhaps only a subset of the columns need to be converted into Numpy arrays. You could get these columns by indexing into the [`pykx.Table`][pykx.Table], then calling `.np` on the columns returned.
- - When using an IPC connection, make use of select statements and indexing to only send the subset of the data you want to process in Python over the IPC connection.
-- Prefer using `.np` and `.pd` over `.py`. If a conversion must happen, try to stick to the Numpy/Pandas conversions which avoid copying data where possible. Converting objects with `.py` will always incur a data copy (if the conversion is possible at all - n.b. some K objects return themselves when `.py` is called on them, such as [`pykx.Function`][pykx.Function]) instances.
-- Convert with the keyword argument `raw=True` when performance is more important than the richness of the output. Using a raw conversion can be much more efficient in many cases by not doing some work, such as adjusting the temporal epoch from 2000-01-01 to 1970-01-01, turning q GUIDs into Python `UUID` objects (instead they will come through as complex numbers, as that is the only widely available 128 bit type), converting bytes into strings, and more.
-- Avoid nested columns when converting q tables into Pandas dataframes, as this currently incurs a data copy.
-- **Let q do the heavy lifting:**
- - When running in licensed mode, make use of q code and q functions (e.g. `q.avg`, `q.sdev`, etc.) instead of pure Python code. This is similar to how you should use Numpy functions to operate on Numpy arrays instead of pure Python code. Note that the performance of Numpy functions on K vectors that have been converted to Numpy arrays is often comparable, even when including the conversion overhead.
- - When using an IPC connection to a remote q process, consider using q code to offload some of the work to the q process by pre-processing the data in q.
-- Avoid converting large amounts of data from Python to q. Conversions from q to Python (via Numpy) can often avoid copying data, but conversions from Python to q always incur a copy of the data.
+---
+title: PyKX Performance
+description: How to optimize PyKX
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, perfromance, paralellization, secondary q threads, multithreading, peach
+---
+
+# Performance tips
+
+_This page includes PyKX performance optimization tips, including insights on parallelization, secondary q threads, multithreading, and peach._
+
+To get the best performance out of PyKX, follow these guidelines. Note that this page focuses on efficiently interfacing between Python and q, rather than optimizing Python or q individually.
+
+## General guidelines
+
+1. **Avoid Unnecessary Conversions**. Avoid converting K objects with their `#!python .py`/`#!python .np`/`#!python .pd` methods unless necessary. Often, the K object itself is sufficient.
+
+1. **Avoid Nested Columns** when converting q tables into Pandas dataframes, as this currently incurs data copy.
+
+1. **Do as Little Work as Necessary**. Convert only what is needed. For example, instead of converting an entire q table to a dataframe, convert only the required columns into Numpy arrays by indexing into the [`#!python pykx.Table`][pykx.Table] and calling `#!python .np` on the columns. Use select statements and indexing to send only the necessary subset of data over an IPC connection.
+
+1. **Prefer `#!python .np` and `#!python .pd` Over `#!python .py`**. Use Numpy/Pandas conversions to avoid data copying where possible. Converting objects with `.py` always incurs a data copy and may not always be possible (for example, some K objects return themselves when `.py` is called, such as [`pykx.Function`][pykx.Function]) instances.
+
+1. **Use `#!python raw=True` for Performance**. When performance is more important than the richness of the output, use the `#!python raw=True` keyword argument. This can be more efficient by skipping certain adjustments, such as:
+
+ - Temporal epoch adjustments from `#!python 2000-01-01` to `#!python 1970-01-01`.
+ - Converting q `#!python GUIDs` to Python `#!python UUID` objects (they will come through as complex numbers instead).
+ - Converting bytes into strings.
+
+1. **Let q do the heavy lifting.** When using licensed mode, prefer q code and functions (like `#!python q.avg`, `#!python q.sdev`) over pure Python code. This is similar to using Numpy functions for Numpy arrays instead of pure Python.
+
+ - Numpy functions on K vectors converted to Numpy arrays perform well, even with conversion overhead.
+ - When using an IPC connection to a remote q process, use q code to pre-process data and reduce the workload on Python.
+ - Avoid converting large data from Python to q. Conversions from q to Python (via Numpy) often avoid data copying, but Python to q conversions always copy the data.
## Parallelization
+Parallelization involves distributing computational tasks across multiple threads to improve performance and efficiency.
+Use the following methods if you want to allow PyKX to handle large-scale data processing tasks efficiently by utilizing the available computational resources: secondary q threads, multithreading, or `#!python peach`.
+
### Secondary q threads
-PyKX starts embedded q with as many secondary q threads enabled as are available. These threads are automatically used by q to parallelize some computations as it deems appropriate. The `QARGS` environment variable can be used to provide command-line arguments and other startup flags to q/PyKX, including the number of secondary threads:
+PyKX starts embedded q with as many secondary q threads enabled as are available. q automatically uses these threads to parallelize some computations as it deems appropriate. You can use the `#!python QARGS` environment variable to provide command-line arguments and other startup flags to q/PyKX, including the number of secondary threads:
```sh
QARGS='-s 0' python # disable secondary threads
@@ -28,22 +51,18 @@ QARGS='-s 0' python # disable secondary threads
QARGS='-s 12' python # use 12 secondary threads by default
```
-The value set using `-s` provides both the default, and the maximum available to the process - it cannot be changed after PyKX has been imported.
-
-PyKX exposes this maximum value as `pykx.q.system.max_num_threads`, which cannot be assigned to. The current number of secondary threads being used by q is exposed as `pykx.q.system.num_threads`. It is initially equal to `pykx.q.system.max_num_threads`, but can be assigned to a lower value.
+- The value set using `#!python -s` sets both the default and the maximum available to the process; you can't change it after importing PyKX.
+- `#!python pykx.q.system.max_num_threads` shows the maximum number of threads and cannot be changed.
+- `#!python pykx.q.system.num_threads` shows the current number of threads in use. It starts at the maximum value but can be set to a lower number.
-### Multi-threading
-By default PyKX does not currently support calling into q from multiple threads within a Python process simultaneously.
-The [GIL](https://wiki.python.org/moin/GlobalInterpreterLock) generally prevents this from occurring.
+### Multithreading
-However enabling the `PYKX_RELEASE_GIL` environment variable will cause the Python Global Interpreter Lock to be dropped when calling into `q`.
-Caution must be used when calling into q from multiple threads if this environment variable is set as it will no longer be thread safe, you can optionally also
-enable the `PYKX_Q_LOCK` environment variable as well which will add an extra re-entrant lock around embedded q to ensure two threads cannot access `q`'s memory in an unsafe manner.
+By default, PyKX doesn’t support calling q from multiple threads in a Python process due to the Global Interpreter Lock [GIL](https://wiki.python.org/moin/GlobalInterpreterLock). Enabling the `#!python PYKX_RELEASE_GIL` environment variable drops the GIL when calling q, making it unsafe to call q from multiple threads. To ensure thread safety, you can also enable the `#!python PYKX_Q_LOCK` environment variable, which adds a re-entrant lock around q. Learn [how to enable multithreaded execution](threading.md) and set up a Python process using PyKX to [call into EmbeddedQ from multiple threads](../../examples/threaded_execution/threading.md)
-## Peach
+### Peach
-Having q use [`peach`](../../api/pykx-execution/q.md#peach) to call into Python is not supported unless `PYKX_RELEASE_GIL` is enabled, and will hang indefinitely.
+Using the [`#!python peach`](../../api/pykx-execution/q.md#peach) function in q to call Python is not supported unless you enable the `#!python PYKX_RELEASE_GIL` setting. Without enabling this setting, the process will hang indefinitely.
For example, calling from Python into q into Python works normally:
@@ -55,14 +74,13 @@ pykx.List(pykx.q('
0 1 2
'))
```
-
-But by default calling from Python into q into Python using `peach` hangs:
+But, by default, using `#!python peach` to call from Python into q and back into Python hangs:
```python
>>> kx.q('{x peach 1 2 3}', lambda x: range(x)) # Warning: will hang indefinitely
```
-However if `PYKX_RELEASE_GIL` is enabled this will work:
+However, if you enable `#!python PYKX_RELEASE_GIL`, it works:
```python
>>> import os
diff --git a/docs/user-guide/advanced/remote-functions.md b/docs/user-guide/advanced/remote-functions.md
new file mode 100644
index 0000000..10c3118
--- /dev/null
+++ b/docs/user-guide/advanced/remote-functions.md
@@ -0,0 +1,150 @@
+---
+title: PyKX Remote Functions
+description: How to execute Python functions on q servers in PyKX
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, perfromance, paralellization, secondary q threads, multithreading, peach
+---
+
+# Remote Function Execution
+
+_This page explains how to execute Python functions on q servers in PyKX._
+
+Remote Functions let you define Python functions within your Python environment which can interact with kdb+ data on a q process. Once defined, these functions are registered to a [remote session object](../../api/remote.md) along with any Python dependencies which need to be imported. The [remote session object](../../api/remote.md) establishes and manages the remote connection to the kdb+/q server.
+
+To execute kdb+/q functions using PyKX, go to [PyKX under q](../../pykx-under-q/intro.md)
+
+## Requirements and limitations
+
+Before you start:
+
+- Make sure all necessary Python requirements are installed on the client server. For this functionality you need `#!python dill>=0.2`.
+- Confirm that the kdb+/q server you connect to can load PyKX under q.
+- Ensure that you have the correct versions of Python library dependencies in your kdb+/q environment at runtime.
+- Run the following command:
+
+```bash
+pip install pykx[remote]
+```
+
+## Functional walkthrough
+
+This walkthrough demonstrates the following steps:
+
+1. Initialize a q/kdb+ server loading PyKX under q on a specified port.
+1. Import PyKX and generate a remote session object which denotes the process against which the Python functions will be executed.
+1. Define a number of Python functions which will be executed on the remote q/kdb+ server.
+
+### Initializea q/kdb+ server with PyKX under q
+
+This step ensures you have a q process running with PyKX under q, as well as having a kdb+ table available to query. If you have this already, proceed to the next step.
+
+Ensure that you have q installed. If you do not have this installed please follow the guide provided [here](https://code.kx.com/q/learn/install/), retrieving your license following the instructions provided [here](https://kx.com/kdb-insights-personal-edition-license-download).
+
+Install PyKX under q using the following command.
+
+```bash
+python -c "import pykx;pykx.install_into_QHOME()"
+```
+
+Start the q process to which you will execute your functions.
+
+```bash
+q pykx.q -p 5050
+```
+
+Create a table which you will use within your Python analytics defined below.
+
+```q
+q)N:1000
+q)tab:([]sym:N?`AAPL`MSFT`GOOG`FDP;price:100+N?100f;size:10+N?100)
+```
+
+Set a requirement for users to provide a username/password if you wish to add security to your q process.
+
+```q
+.z.pw:{[u;p]$[(u~`user)&p~`password;1b;0b]}
+```
+
+### Import PyKX and create a session
+
+Create a session object from a Python environment of your choice, which establishes and manages the remote connection to the kdb+/q server.
+
+```python
+>>> import pykx as kx
+>>> session = kx.remote.session(host='localhost', port=5050, username='user', password='password')
+```
+
+### Define and execute Python functions using a session
+
+Tag the Python functions you want to run on the remote server using the `#!python kx.remote.function` decorator. This registers the functions on the `#!python session` object you have just created.
+
+=== "Zero argument function"
+
+ ```python
+ >>> @kx.remote.function(session)
+ ... def zero_arg_function():
+ ... return 10
+ >>> zero_arg_function()
+ pykx.LongAtom(pykx.q('10'))
+ ```
+
+=== "Single argument function"
+
+ ```python
+ >>> @kx.remote.function(session)
+ ... def single_arg_function(x):
+ ... return x+10
+ >>> single_arg_function(10)
+ pykx.LongAtom(pykx.q('20'))
+ ```
+
+=== "Multi argument function"
+
+ ```python
+ >>> @kx.remote.function(session)
+ ... def multi_arg_function(x, y):
+ ... return x+y
+ >>> multi_arg_function(10, 20)
+ pykx.LongAtom(pykx.q('30'))
+ ```
+
+Add any Python libraries which need to be available when executing the function(s) you have just defined. You can achieve this in three ways:
+
+1. Adding the `#!python libraries` keyword when generating your session object
+1. Using `#!python session.libraries` on an existing session to import required libraries before defining your function
+1. Importing libraries within the body of the function being executed
+
+Examples of each of these methods can be seen below:
+
+=== "Libraries being defined at initialisation"
+
+ ```python
+ >>> import pykx as kx
+ >>> session = kx.remote.session(port=5050, libraries={'kx': pykx})
+ ```
+
+=== "Library addition functionality"
+
+ ```python
+ >>> session.libraries({'np': 'numpy', 'kx': 'pykx'})
+ >>> @function(session)
+ ... def dependent_function(x, y, z):
+ ... return kx.q.mavg(4, np.linspace(x, y, z))
+ >>> dependent_function(0, 10, 10)
+ pykx.FloatVector(pykx.q('0 0.5555556 1.111111 2.222222 3...'))
+ ```
+
+=== "Defining imports within function body"
+
+ ```python
+ >>> @function(remote_session)
+ ... def dependent_function(x, y, z):
+ ... import pykx as kx
+ ... import numpy as np
+ ... return kx.q.mavg(4, np.linspace(x, y, z))
+ >>> dependent_function(0, 10, 10)
+ pykx.FloatVector(pykx.q('0 0.5555556 1.111111 2.222222 3...'))
+ ```
+
+While both are valid, we suggest using `#!python libraries` as a method or keyword as it allows for pre-checking of the libraries prior to definition of the function and will be expanded over time to include additional validation.
diff --git a/docs/user-guide/advanced/serialization.md b/docs/user-guide/advanced/serialization.md
index 05a3d3b..105b8bd 100644
--- a/docs/user-guide/advanced/serialization.md
+++ b/docs/user-guide/advanced/serialization.md
@@ -1,29 +1,30 @@
-# Serialization and de-serialization
+---
+title: Serialization and De-serialization
+description: Learn how to serialize and de-serialize in PyKX
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, python, serialize, de-serialize
+---
-PyKX allows users to serialize and de-serialize kdb+/q data structures directly to and from Python byte objects. Interoperating with Pythons [`pickle`](https://docs.python.org/3/library/pickle.html) library this allows users to persist and retrieve objects generated or accessed via PyKX into entities which can be saved to disk or sent via IPC to another process.
+# Serialize and de-serialize data
-While the application of serialization and de-serialization can be completed using q code directly within PyKX it is advised that users leverage [`pickle.dumps`](https://docs.python.org/3/library/pickle.html#pickle.dumps) and [`pickle.loads`](https://docs.python.org/3/library/pickle.html#pickle.loads) when attempting to interact with serialized representations of kdb+/q data for usage within a Python only environment.
+_This page explains how to use PyKX to serialize and de-serialize kdb+/q data structures directly to and from Python byte objects._
-!!! Warning
+There are two main ways to serialize/de-serialize data with PyKX:
- De-serialization of data is not inherently secure, if you are de-serializing data please only do so if retrieved from a trusted source.
+- By interfacing with Python's [`pickle`](https://docs.python.org/3/library/pickle.html) library to persist data to disk in a Python friendly format.
+- By using the [`kx.serialize`](../../api/serialize.md) module to prepare data in q IPC data format.
-## Limitations
+!!! Warning
-Serialization of PyKX objects is limited to objects which are purely generated from kdb+/q data. Serialization of `pykx.Foreign` objects, for example, is not supported as these represent underlying objects defined in C of arbitrary complexity.
+ In all cases de-serializing data can be risky. Proceed only if you’re sure the data comes from a trusted source.
-```python
->>> import pykx as kx
->>> import pickle
->>> pickle.dumps(kx.Foreign(1))
-TypeError: Unable to serialize pykx.Foreign objects
-```
-Similarly on-disk representations of tabular data such as `pykx.SplayedTable` and `pykx.PartitionedTable` cannot be serialized.
+## Serialization using pickle
-## Examples
+Serializing data is extremely useful in cases where you need to convert a data object into a format that is easily transmittable, such as storing data or transferring it to a remote process. When serializing your PyKX data in most cases it is suggested that you make use of the integration between PyKX and Pickle.
-The following are examples showing the serialization and de-serialization of PyKX objects with
+In the following three examples you can see the serialization and de-serialization of various PyKX objects:
1. PyKX Table
@@ -35,10 +36,13 @@ The following are examples showing the serialization and de-serialization of PyK
x x1 x2
-------
1 2 3
- >>> print(pickle.loads(pickle.dumps(table)))
+ >>> pdump = pickle.dumps(table)
+ >>> print(pdump)
+ b'\x80\x04\x95\xf5\x00..'
+ >>> print(pickle.loads(pdump))
x x1 x2
- -------
- 1 2 3
+ -------
+ 1 2 3
```
2. PyKX Float Vector
@@ -49,7 +53,10 @@ The following are examples showing the serialization and de-serialization of PyK
>>> qvec = kx.random.random(10, 2.0)
>>> print(qvec)
0.7855048 1.034182 1.031959 0.8133284 0.3561677 0.6035445 1.570066 1.069419 1..
- >>> print(pickle.loads(pickle.dumps(qvec)))
+ >>> pdump = pickle.dumps(qvec)
+ >>> print(pdump)
+ b'\x80\x04\x95\n\x..'
+ >>> print(pickle.loads(pdump))
0.7855048 1.034182 1.031959 0.8133284 0.3561677 0.6035445 1.570066 1.069419 1..
```
@@ -63,9 +70,107 @@ The following are examples showing the serialization and de-serialization of PyK
>>> print(qlist)
1
`b
- 540bad66-0838-46ca-b5eb-b4bab5e32228
+ 7c667128-4ebd-45da-971c-38d5c54e36e1
+ >>> pdump = pickle.dumps(qlist)
+ >>> print(pdump)
+ b'\x80\x04\x95\xd7..'
>>> print(pickle.loads(pickle.dumps(qlist)))
1
`b
- 540bad66-0838-46ca-b5eb-b4bab5e32228
+ 7c667128-4ebd-45da-971c-38d5c54e36e1
+ ```
+
+## Serialization using `kx.serialize`
+
+While using `#!python pickle` will be sufficient in most cases, there will be times where you are required to convert data to or from the q IPC format byte representation. Using the `#!python kx.serialize` and `#!python kx.deserialize` functions will provide better performance in these situations.
+
+Unlike with `#!python pickle`, which returns the byte representation immediately on serialization, PyKX allows the generation of this byte object to be deferred by creating a [`memoryview`](https://docs.python.org/3/library/stdtypes.html#memoryview). Deserialization can be completed directly from this `#!python memoryview` or from the raw byte objects
+
+Similar to the examples in the previous section in the below we will serialize and deserialize various PyKX objects:
+
+1. PyKX Table
+
+ ```python
+ >>> import pykx as kx
+ >>> table = kx.Table([[1, 2, 3]])
+ >>> print(table)
+ x x1 x2
+ -------
+ 1 2 3
+ >>> sertab = kx.serialize(table)
+ >>> sertab
+
+ >>> sertab.copy()
+ b'\x01\x00\x00\x00I\..'
+ >>> print(kx.deserialize(sertab))
+ x x1 x2
+ -------
+ 1 2 3
+ >>> print(kx.deserialize(sertab.copy())
+ x x1 x2
+ -------
+ 1 2 3
+ ```
+
+2. PyKX Float Vector
+
+ ```python
+ >>> import pykx as kx
+ >>> import pickle
+ >>> qvec = kx.random.random(10, 2.0)
+ >>> print(qvec)
+ 0.7855048 1.034182 1.031959 0.8133284 0.3561677 0.6035445 1.570066 1.069419 1..
+ >>> servec = kx.serialize(qvec)
+ >>> print(servec)
+
+ >>> print(servec.copy())
+ b'\x01\x00\x00\x00^..'
+ >>> print(kx.deserialize(servec))
+ 0.7855048 1.034182 1.031959 0.8133284 0.3561677 0.6035445 1.570066 1.069419 1..
+ >>> print(kx.deserialize(servec.copy()))
+ 0.7855048 1.034182 1.031959 0.8133284 0.3561677 0.6035445 1.570066 1.069419 1..
+ ```
+
+3. PyKX List
+
+ ```python
+ >>> import pykx as kx
+ >>> import pickle
+ >>> import uuid
+ >>> qlist = kx.toq([1, 'b', uuid.uuid4()])
+ >>> print(qlist)
+ 1
+ `b
+ 7c667128-4ebd-45da-971c-38d5c54e36e1
+ >>> serlist = kx.serialize(qlist)
+ >>> print(serlist)
+
+ >>> print(serlist.copy())
+ b'\x01\x00\x00\x00..'
+ >>> print(kx.deserialize(serlist))
+ 1
+ `b
+ 7c667128-4ebd-45da-971c-38d5c54e36e1
+ >>> print(kx.deserialize(serlist.copy()))
+ 1
+ `b
+ 7c667128-4ebd-45da-971c-38d5c54e36e1
```
+
+## What are the limitations?
+
+Serialization of PyKX objects is limited to objects which are purely generated from kdb+/q data. Serialization of `pykx.Foreign` objects, for example, is not supported as these represent underlying objects defined in C of arbitrary complexity.
+
+```python
+>>> import pykx as kx
+>>> import pickle
+>>> pickle.dumps(kx.Foreign(1))
+TypeError: Unable to serialize pykx.Foreign objects
+```
+
+Similarly, you cannot serialize on-disk representations of tabular data such as `pykx.SplayedTable` and `pykx.PartitionedTable`.
+
+## Next Steps
+
+- [Learn how to interact via IPC](ipc.md)
+- [Learn how to call q functions in a Python first way](context_interface.md)
diff --git a/docs/user-guide/advanced/streaming/basic.md b/docs/user-guide/advanced/streaming/basic.md
new file mode 100644
index 0000000..b66138d
--- /dev/null
+++ b/docs/user-guide/advanced/streaming/basic.md
@@ -0,0 +1,119 @@
+---
+title: Basic Streaming Ingest
+description: How to start basic data ingest with PyKX
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, streaming, simple
+---
+
+# Basic streaming ingest
+
+_This page outlines the steps required to create a high-velocity data ingest infrastructure using PyKX._
+
+!!! warning "Disclaimer"
+
+ The functionality outlined below provides the necessary tools for users to build complex streaming infrastructures. The generation and management of such workflows rest solely with the users. KX supports only individual elements used to create these workflows, not the end-to-end applications.
+
+## Introduction
+
+The most fundamental task in managing high-velocity data is consuming and persisting this data in the order it arrives. The core components of any workflow that aims to achieve this must manage the following coordinated actions:
+
+1. Capture and log the ingested data allowing for data replay in failure scenarios.
+2. Maintain a record of the most recent data available and allow users/analytics to access it.
+3. Persist and make available data from previous days to facilitate more complex analytics.
+
+These steps are managed by three separate processes known as the Tickerplant, Real-Time Database (RDB), and Historical Database (HDB). In combination, these processes form the 'basic' building block of any ingest workflow using PyKX. We will break these processes down in more depth later. For now, let's visualize one of their simplest arrangement:
+
+![basic](../images/simple-no-feed.png)
+
+## Get started
+
+Start this basic infrastructure by running the following commands:
+
+```python
+import pykx as kx
+trade = kx.schema.builder({
+ 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ 'price': kx.FloatAtom, 'volume': kx.LongAtom})
+agg = kx.schema.builder({
+ 'time': kx.TimespanAtom, 'sym': kx.SymbolAtom,
+ 'max_price': kx.FloatAtom , 'median_volume': kx.FloatAtom})
+basic = kx.tick.BASIC(
+ tables = {'trade': trade, 'aggregate': agg},
+ log_directory = 'log',
+ database = 'db')
+
+basic.start()
+```
+
+Congratulations you now have a fully functional high-volume ingestion pipeline running! But did we configure it?
+
+The call to `#!python kx.tick.BASIC` above provides several keyword arguments which help to manage the initialization of the various processes:
+
+1. `#!python tables`: Provides a dictionary mapping the names of tables to be ingested and persisted to their schema. This is the only parameter required for initializing the basic infrastructure.
+2. `#!python log_directory`: Generates a directory (if not currently available) and persists a log of each incoming message to a log-file associated with today's date.
+3. `#!python database`: If omitted, the HDB process outlined above will not be initialized. The `#!python database` denoted here should point to a directory containing a partitioned kdb+ database, for more information on this form of database see [here](../database/index.md).
+
+For a full breakdown of the BASIC API functionality go [here](../../../api/tick.md#pykx.tick.BASIC).
+
+
+If you want to know more about the individual components of the basic streaming infrastructure, read through the next section, otherwise jump to [Next Steps](#next-steps) where you can continue to develop your system.
+
+??? "API documentation"
+ The following bullet-points provide links to the various functions used within the above section
+
+ - [`kx.tick.BASIC`](../../../api/tick.md#pykx.tick.BASIC)
+ - [`basic.start`](../../../api/tick.md#pykx.tick.BASIC.start)
+
+## Component breakdown
+
+To understand how the basic infrastructure operates, it's helpful to understand the role that Tickerplant, RDB and HDB processes play in the data lifecycle. The following sections describe each of these components.
+
+### Tickerplant
+
+The role of a tickerplant is to coordinate the following actions within the lifecycle of streaming data:
+
+- Write all incoming messages to a central log file which can be used to replay data in situations where the Tickerplant process is unexpectedly killed.
+- Push all data received to an upstream RDB to maintain a record of all data.
+- Push all or a subset of data to more complex processes or upstream subscriber.
+- Send a message to all subscribing processes at the end of day (EOD) to execute their end-of-day function.
+
+!!! Warning
+
+ Tickerplants operate as a central source of truth when building streaming applications. Their constant operation is important, particularly in cases where data volumes are extreme.
+
+Should the Tickerplant process be killed and need to recover or data be replayed to repopulate the Database at a later point, the log file is centrally important. When the tickerplant receives messages, it persists each of them to the disk, in a file named `#!python log` within a user-specified directory.
+
+Data is logged as a list with the first argument being the function which should be executed on log replay and the remaining elements being the arguments to this function. Within the PyKX streaming workflows, the function called is `#!python .u.upd` and it takes two arguments: `#!python table_name` and `#!python message`. As such, our log file may consist of the following information (in human readable form below).
+
+```q
+(`.u.upd;`trade;(0D11:31:30.758665000;`AAPL;42.0))
+(`.u.upd;`trade;(0D11:31:30.758666000;`MSFT;40.2))
+(`.u.upd;`trade;(0D11:31:30.758667000;`AAPL;42.1))
+```
+
+Tickerplants can run into issues when subscribers consume their data slower than it can be produced. To explore this problem in more depth, go to the [Real-Time Analytic](rta.md) development page.
+
+### Real-Time Database
+
+A Real-Time Database (RDB) stores today’s data in-memory and writes it to the HDB at the end of day. For persistence of raw data in high-throughput scenarios, avoid to apply analytics on data along the critical path. At a minimum, it's recommended to have RAM of at least 4× expected data size. Therefore, for 5 GB data per day, the machine should have at least 20 GB RAM. In practice, you can use much larger RAM.
+
+### Historical Databases
+
+The Historical Database (HDB) contains data for all available days of processed data before the current day. This data is stored on-disk and loaded into the process as a memory-mapped dataset. Interactions with this data and its management are more complex than in-memory data. You can manage this data in the [Database API for PyKX](../database/index.md) and you can also [query](../../fundamentals/query/index.md) it.
+
+!!! info "Important"
+
+ PyKX allows to load only one database at a time on an HDB. The HDB connecting to an RDB/RTE should have the same tables defined to avoid issues with missing partitions.
+
+## Next steps
+
+Now that you have your basic infrastructure up and running, you might be interested in some of the following:
+
+- Learn how to publish data to your streaming infrastructure [here](publish.md).
+- Learn how to subscribe to data from your streaming infrastructure [here](subscribe.md).
+- If you need more fine-grained control over your infrastructure, see [here](complex.md)
+
+For some further reading, here are some related topics:
+
+- Learn how to generate a Historical Database [here](../database/index.md).
diff --git a/docs/user-guide/advanced/streaming/complex.md b/docs/user-guide/advanced/streaming/complex.md
new file mode 100644
index 0000000..7e4657b
--- /dev/null
+++ b/docs/user-guide/advanced/streaming/complex.md
@@ -0,0 +1,173 @@
+---
+title: Complex Streaming Control
+description: How to edit/manage your streaming workflows with PyKX
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, streaming, simple
+---
+
+# Complex streaming control
+
+_This page outlines some of the more complex and fine-grained controls that are possible for your streaming workflows._
+
+!!! Warning "Disclaimer"
+
+ The functionality outlined below provides the necessary tools for users to build complex streaming infrastructures. The generation and management of such workflows rest solely with the users. KX supports only individual elements used to create these workflows, not the end-to-end applications.
+
+[Basic infrastructure](basic.md), [Analysing streaming data](rta.md) and [Custom query API development](custom_apis.md) sections deal with the simplest interactions supported by PykX. Let's explore additional keyword arguments/functionalities that can provide significant value in building your infrastructure.
+
+The sections below discuss in detail why it's important and how to update the examples used throughout the other sections of the Real-Time Data Capture documentation. The following highlights the topics covered:
+
+| Topic | Description |
+|:---------------------------------|:------------|
+| Fine-grained ingest control | Instead of relying on the packaged [basic](basic.md) logic to generate your tickerplant, RDB and HDB, control these processes more explicitly and learn why this is useful. |
+| Process logs | Learn how to modify startup of your processes to save output to files or print to your process. |
+| How to stop processes | You already know how to start and restart processes. This section shows you how to stop them. |
+
+
+## Fine-grained ingest control
+
+In the [basic infrastructure](basic.md) section we made use of the function [`#!python kx.tick.BASIC`](../../../api/tick.md#pykx.tick.BASIC) to start the component parts of a PyKX streaming workflow namely:
+
+- [Tickerplant](basic.md#tickerplant): The ingestion point which logs incoming messages and publishes messages to down-stream subscribers.
+- [Real-Time Database(RDB)](basic.md#real-time-database): A process which contains the current day's data in-memory and writes the data to disk at end-of-day.
+- [Historical Database(HDB)](basic.md#historical-databases): A process on which data for days prior to the current day has been loaded as a memory-mapped on-disk dataset.
+
+While the single-call basic infrastructure is useful, you might want to load these process types on separate virtual/physical machines. For example, you might consider loading your RDB on a process with significantly higher RAM requirements to your HDB, where user queries are limited in expected RAM by well-controlled APIs.
+
+A full breakdown of the APIs for each of these process types is provided in the dropdown for the API documentation below.
+
+To manually generate a [basic infrastructure](basic.md) using the individual APIs, follow the steps bellow:
+
+1. Start the Tickerplant process by defining the `#!python trade` and `#!python aggregate` tables:
+
+ ```python
+ import pykx as kx
+ trade = kx.schema.builder({
+ 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ 'price': kx.FloatAtom, 'volume': kx.LongAtom})
+ agg = kx.schema.builder({
+ 'time': kx.TimespanAtom, 'sym': kx.SymbolAtom,
+ 'max_price': kx.FloatAtom , 'median_volume': kx.FloatAtom})
+
+ tick = kx.tick.TICK(
+ port=5010,
+ tables = {'trade': trade, 'aggregate': agg},
+ log_directory = 'log'
+ )
+ tick.start()
+ ```
+
+2. Next, generate the Historical Database process on port 5012 by loading historical data (if it exists) from a database at `#!python /tmp/db`. The RDB will connect to this process on initialization and trigger end-of-day operations:
+
+ ```python
+ hdb = kx.tick.HDB(port=5012)
+ hdb.start(database='db')
+ ```
+
+3. Now that you have initialized the tickerplant and HDB, start the RDB process on port 5011. Connect to the tickerplant on port 5010 as follows:
+
+ ```python
+ rdb = kx.tick.RTP(port=5011)
+ rdb.start({
+ 'tickerplant': 'localhost:5010',
+ 'hdb': 'localhost:5012',
+ 'database': 'db'})
+ ```
+
+This workflow is equivalent to the [basic infrastructure](basic.md) walkthrough.
+
+??? "API documentation"
+ Links to the functions used in the above section:
+
+ - [`kx.tick.TICK`](../../../api/tick.md#pykx.tick.TICK)
+ - [`tick.start`](../../../api/tick.md#pykx.tick.TICK.start)
+ - [`kx.tick.RTP`](../../../api/tick.md#pykx.tick.RTP)
+ - [`rtp.start`](../../../api/tick.md#pykx.tick.RTP.start)
+ - [`kx.tick.HDB`](../../../api/tick.md#pykx.tick.HDB)
+ - [`hdb.start`](../../../api/tick.md#pykx.tick.HDB.start)
+
+## Process logs
+
+Each of the process types covered within the documentation for Real-Time Data Capture is a sub-process which runs a separate executable to the Python process which initialized it. The benefit is in allowing to build complex workflows from a single Python process. However, it can make lifecycle management and tracking of these processes difficult.
+
+By default, the initialization of `#!python TICK`, `#!python RTP`, `#!python HDB` and `#!python GATEWAY` processes prints information from `#!python stdout` and `#!python stderr` to the parent process which started the sub-processes. While this is useful in providing a user with up-to-date information about these processes, it makes separating logs from different processes difficult.
+
+Each process type supports a keyword argument `#!python process_logs` which can have the following input types:
+
+| **Input type** | **Description** |
+|:-----------|:---------------------------------------------------------------------------|
+| `#!python True` | Logs should be printed to `#!python stdout`/`#!python stderr` of the parent Python process |
+| `#!python False` | Logs from the child process are suppressed and redirected to `#!python /dev/null` |
+| string | Logs are redirected to the file location specified by the `#!python str` |
+
+1. Here's an example of redirecting logs to a file:
+
+ - Define a query API which prints timing information relating to the query execution.
+ - Register this query API to an `#!python RTP` process which logs data to a file `#!python process_logs.txt`.
+ - Call the query API with a function which sleeps for 5 seconds and read the content of `#!python process_logs.txt`.
+ - Define the query API, using [`#!python datetime`](https://docs.python.org/3/library/datetime.html) to time the query.
+
+ ```python
+ def time_api(query, *parameters):
+ init_time = datetime.datetime.now()
+ result = kx.q(query, *parameters)
+ print(f'query time: {datetime.datetime.now() - init_time}')
+ return result
+ ```
+
+2. Create your RTP process logging output to `#!python process_logs.txt` ensuring access to:
+
+ ```python
+ rtp = kx.tick.RTP(
+ port=5011,
+ libraries={'datetime': 'datetime', 'kx': 'pykx'},
+ process_logs='process_logs.txt',
+ apis={'time_api': time_api}
+ )
+ ```
+
+3. Call the query API and read the content of `#!python process_logs.txt`. Note that to call this API you do not need to `#!python start` the process as we are not attempting to connect to the Tickerplant/HDB processes:
+
+ ```python
+ rtp('time_api', b'{system"sleep 5";x+10}', 10)
+ with open('process_logs.txt') as f:
+ print(f.read())
+ ```
+
+## How to stop processes
+
+While we hope that we will always generate the perfect code, there can be times when being able to stop processing of our system is a requirement. As the streaming infrastructure for PyKX operates by starting sub-processes from Python, the control of these processes is more complex than it would be, should the parent process be in full control.
+
+For each of the `#!python BASIC`, `#!python TICK`, `#!python RTP`, `#!python HDB` and `#!python GATEWAY` classes, the initialized class objects have an associated `#!python stop` function. Call this function if you want to gracefully shut down processing and kill the underlying process. You can invoke it using the `#!python rtp` process started in the previous section as an example:
+
+```python
+rtp.stop()
+```
+
+While graceful process closure is always advised, it may not always be possible. In case your parent process has been shut down and you no longer have access to the `#!python .stop()` functionality, use `#!python kx.util.kill_q_process`. This takes the port number that your sub-process was started on and kills it. Caution should be taken when invoking this function.
+
+```python
+kx.util.kill_q_process(5010)
+```
+
+??? "API documentation"
+ Links to the functions used in this section:
+
+ - [`rtp.stop`](../../../api/tick.md#pykx.tick.RTP.stop)
+ - [`kx.tick.BASIC.stop`](../../../api/tick.md#pykx.tick.BASIC.stop)
+ - [`kx.tick.TICK.stop`](../../../api/tick.md#pykx.tick.TICK.stop)
+ - [`kx.tick.HDB.stop`](../../../api/tick.md#pykx.tick.HDB.stop)
+ - [`kx.tick.GATEWAY.stop`](../../../api/tick.md#pykx.tick.GATEWAY.stop)
+ - [`kx.util.kill_q_process`](../../../api/util.md#pykxutildebug_environment)
+
+## Next steps
+
+Now that you have your basic infrastructure up and running you might be interested in some of the following:
+
+- Learn how to publish data to your streaming infrastructure [here](publish.md).
+- Learn how to subscribe to data from your streaming infrastructure [here](subscribe.md).
+
+For some further reading, here are some related topics:
+
+- Learn how to generate a Historical Database [here](../database/index.md).
diff --git a/docs/user-guide/advanced/streaming/custom_apis.md b/docs/user-guide/advanced/streaming/custom_apis.md
new file mode 100644
index 0000000..055e34d
--- /dev/null
+++ b/docs/user-guide/advanced/streaming/custom_apis.md
@@ -0,0 +1,123 @@
+---
+title: Custom Query API Development
+description: How to generate a custom query API
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, streaming, publishing
+---
+
+# Custom query API development
+
+_This page outlines how you can augment your streaming process with accessible named query APIs._
+
+!!! Warning "Disclaimer"
+
+ The functionality outlined below provides the necessary tools for users to build complex streaming infrastructures. The generation and management of such workflows rest solely with the users. KX supports only individual elements used to create these workflows, not the end-to-end applications.
+
+The addition and use of custom query APIs is often crucial for making your data accessible to users. Users connected to your process via IPC or by a querying Gateway process call these APIs. You can place custom query APIs on any process type discussed in the [basic](basic.md), [analysis](rta.md) and [subscription](subscribe.md) sections.
+
+In each case, you can add a query API by calling the `#!python register_api` method on each of the process types or during the configuration of an [`#!python RTP`](../../../api/tick.md#pykx.tick.RTP) , [`#!python HDB`](../../../api/tick.md#pykx.tick.HDB) or [`#!python GATEWAY`](../../../api/tick.md#pykx.tick.GATEWAY) process in your system. A breakdown of gateway processes follows this section [here](gateways.md). In the examples below we add query APIs to the historical database created when configuring the [basic](basic.md) infrastructure and the RTP processing the aggregate dataset.
+
+## Configure an API for your Real-Time Processor
+
+You can add APIs to your process at configuration time or while the process is in operation, to allow an iterative development. The following sections show how both approaches can be achieved to create a Python function which takes multiple parameters:
+
+1. The `#!python table` which is being queried
+2. The `#!python symbol` which a user is interested in
+
+And returns the number of instances of that symbol:
+
+```python
+def custom_api(table, symbol):
+ return kx.q.sql(f'select count(*) from {table} where sym like $1', symbol)['xcol'][0]
+```
+
+### Add an API to an existing RTP and HDB
+
+Now that you have the function definition, use the `#!python register_api` function to augment the `#!python rtp` class created [here](rta.md#start-your-rtp).
+
+```python
+rtp.register_api('symbol_count', custom_api)
+```
+
+Similarly, you can add the equivalent API to your `#!python HDB` process generated [here](basic.md) by accessing the `#!python hdb` class as follows:
+
+```python
+basic.hdb.register_api('symbol_count', custom_api)
+```
+
+??? "API documentation"
+ The following bullet-points provide links to the various functions used within the above section
+
+ - [`rtp.register_api`](../../../api/tick.md#pykx.tick.STREAMING.register_api)
+
+### Add an API when configuring your system
+
+In the previous section you added custom APIs to a running system. To make APIs available on restart, you can add them at the configuration time for the processes. For instance, let's modify the example [here](rta.md#run-all-setup-at-once) to include an API.
+
+If we're adding an API at configuration, it's supplied as a dictionary mapping the name of the API to the API code:
+
+```python
+def preprocessor(table, data):
+ if table == 'trade':
+ return data
+ else:
+ return None
+
+def postprocessor(table, data):
+ agg = kx.q[table].select(
+ columns = {'min_px':'min price',
+ 'max_px': 'max price',
+ 'spread_px': 'max[price] - min price'},
+ by = {'symbol': 'symbol'})
+ kx.q['agg'] = agg # Make the table accessible from q
+ with kx.SyncQConnection(port=5010, wait=False, no_ctx=True) as q:
+ q('.u.upd', 'aggregate', agg._values)
+ return None
+
+def custom_api(table, symbol):
+ return kx.q.sql(f'select count(*) from {table} where sym like $1', symbol)['xcol'][0]
+
+rtp = kx.tick.RTP(port=5014,
+ subscriptions = ['trade'],
+ libraries={'kx': 'pykx'},
+ pre_processor=preprocessor,
+ post_processor=postprocessor,
+ apis={'symbol_count': custom_api},
+ vanilla=False)
+rtp.start({'tickerplant': 'localhost:5013'})
+```
+
+Currently we don't support the addition of APIs to the components of the [basic infrastructure](basic.md) at startup. To configure a historical database at startup with more fine-grained control, configure it manually as outlined [here](complex.md).
+
+??? "API documentation"
+ The following bullet-points provide links to the various functions used within the above section
+
+ - [`kx.tick.RTP`](../../../api/tick.md#pykx.tick.RTP)
+ - [`rtp.start`](../../../api/tick.md#pykx.tick.RTP.start)
+
+### Test an API
+In the above we are defining that users calling this function will do so by making use of the named function `#!python symbol_count`. You can directly test this once registered, as it follows:
+
+```python
+rtp('symbol_count', 'trade', 'AAPL')
+```
+
+Alternatively, you can test this using IPC:
+
+```python
+with kx.SyncQConnection(port=5014, no_ctx=True) as q:
+ q('symbol_count', 'trade', 'AAPL')
+```
+
+## Next steps
+
+Now that you have data being published to your system you may be interested in the following:
+
+- Generate a query routing gateway to allow queries across multiple processes [here](gateways.md).
+- Manually configuring the [basic infrastructure](basic.md) as outlined [here](complex.md).
+
+For some further reading, here are some related topics:
+
+- Learn more about Interprocess Communication(IPC) [here](../ipc.md).
+- Learn more about how you can query your data [here](../../fundamentals/query/index.md)
diff --git a/docs/user-guide/advanced/streaming/gateways.md b/docs/user-guide/advanced/streaming/gateways.md
new file mode 100644
index 0000000..afbc41d
--- /dev/null
+++ b/docs/user-guide/advanced/streaming/gateways.md
@@ -0,0 +1,178 @@
+---
+title: Managing query routing
+description: How to manage what and how users can query data
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, query, routing, analytics
+---
+
+# Manage query routing
+
+_This page outlines how to provide a central, password-protected query location for users._
+
+!!! Warning "Disclaimer"
+
+ The functionality outlined below provides the necessary tools for users to build complex streaming infrastructures. The generation and management of such workflows rest solely with the users. KX supports only individual elements used to create these workflows, not the end-to-end applications.
+
+When providing users with access to data within your system, you must consider the following priorities:
+
+1. How can you provide a central point for users to query your data?
+2. How do you regulate the users who can query your system?
+3. How do you route queries to multiple processes containing different datasets and aggregate this data?
+
+A `#!python Gateway` process can handle each of these. The gateway is responsible for defining the processes that can be queried within your system and regulates via user-configured logic what is required for a user to establish a connection to the gateway.
+
+PyKX provides a simplistic gateway which allows connections to multiple processes and supports synchronous queries against your processes. Although it doesn't scale to large numbers of high traffic use-cases, it provides a starting infrastructure suitable for small teams of developers.
+
+!!! note "Have your say"
+
+ The above usage patterns provide a basic gateway design but does not cover all cases/usage patterns, if there is functionality that you would like to see let us know by opening an issue [here](https://github.com/KxSystems/pykx/issues).
+
+## Create a gateway
+
+In the following sections we will generate a Gateway process to help you to complete the following:
+
+1. Limit users permitted to query your APIs to those with a username and password provided in a supplied text file.
+2. Generate a custom function which queries the APIs generated on your `#!python RTP` and `#!python HDB` processes [here](custom_apis.md#add-an-api-to-an-existing-rtp-and-hdb) aggregating the results.
+
+### Configure your gateway
+
+Before adding custom gateway APIs and secure login to the process, configure the gateway to operate on port 5015 with established connections against two processes:
+
+1. `#!python 'rtp'`: The Real-Time Processor established [here](rta.md) on port 5014
+2. `#!python 'hdb'`: The Historical Database established [here](basic.md) on port 5012
+
+```python
+gateway = kx.tick.GATEWAY(port=5015, connections={'rtp': 'localhost:5014', 'hdb': 'localhost:5012'})
+```
+
+If you need to add additional connections once you initialized the `#!python GATEWAY`, use the `#!python add_connections` function as shown below:
+
+```python
+gateway.add_connections({'rtp': 'localhost:5014'})
+```
+
+??? "API documentation"
+ The following bullet-points provide links to the various functions used within the above section
+
+ - [`kx.tick.GATEWAY`](../../../api/tick.md#pykx.tick.GATEWAY)
+ - [`gateway.add_connections`](../../../api/tick.md#pykx.tick.GATEWAY.add_connections)
+
+### Add a custom username/password check
+
+Once you have an initialized Gateway process, define a custom username/password check which any user connecting to the gateway will be validated against. In the example below, the validation function checks that a user is named `#!python test_user` and has a password matching the regex `#!python password.*`
+
+```python
+def validation_function(username, password):
+ if username == 'test_user':
+ pattern = re.compile("password.*")
+ if bool(pattern.match(password)):
+ return True
+ return False
+```
+
+Now that you have specified the validation function, set this function on the `#!python Gateway` process. For this to operate, you need to ensure the library `#!python re` is available:
+
+```python
+gateway.libraries({'re': 're'})
+gateway.connection_validation(validation_function)
+```
+
+Users attempting to interact with this gateway will now need to adhere to the above conditions providing the username `#!python test_user` and a password `#!python password.*`.
+
+??? "API documentation"
+ The following bullet-points provide links to the various functions used within the above section
+
+ - [`gateway.libraries`](../../../api/tick.md#pykx.tick.STREAMING.libraries)
+ - [`gateway.connection_validation`](../../../api/tick.md#pykx.tick.GATEWAY.connection_validation)
+
+### Define a custom API for users to call
+
+After establishing the gateway and defining a validation function for connecting processes, add a Custom Gateway API.
+
+Within the Gateway process, there is a Python class defined `#!python gateway` which contains a function `#!python call_port`. This function takes the name given to a port when establishing remote connections [here](#configure-your-gateway) and the parameters required to call this function.
+
+When we developed our custom query APIs [here](custom_apis.md#add-an-api-to-an-existing-rtp-and-hdb) we registered an API `#!python symbol_count` on both the `#!python rtp` and `#!python hdb` processes, the following function definition makes use of the `#!python call_port` function to invoke these functions for a specified table and symbol combination.
+
+```python
+def gateway_function(table, symbol):
+ rtp = gateway.call_port('rtp', table, symbol)
+ try:
+ hdb = gateway.call_port('hdb', table, symbol)
+ except BaseException:
+ print('Failed to retrieve data from HDB')
+ hdb = 0
+ return rtp + hdb
+
+gateway.register_api('sum_of_symbols', gateway_function)
+```
+
+Now that your gateway function has been registered, start the gateway:
+
+```python
+gateway.start()
+```
+
+Users should now be in a position to query the `#!python sum_of_symbols` API on the Gateway process as follows:
+
+```python
+with kx.SyncQConnection(port=5015, username='test_user', password='password123') as q:
+ ret = q('sum_of_symbols', 'trade', 'AAPL')
+ret
+```
+
+??? "API documentation"
+ The following bullet-points provide links to the various functions used within the above section
+
+ - [`gateway.register_api`](../../../api/tick.md#pykx.tick.STREAMING.register_api)
+
+### Run all setup at once
+
+To help with restart and to simplify the configuration of your system, you can complete each of the sections above at configuration time for your initialized class. The following code block contains all the code used to configure the gateway:
+
+```python
+def validation_function(username, password):
+ if username == 'test_user':
+ pattern = re.compile("password.*")
+ if bool(pattern.match(password)):
+ return True
+ return False
+
+def gateway_function(table, symbol):
+ rtp = gateway.call_port('rtp', table, symbol)
+ try:
+ hdb = gateway.call_port('hdb', table, symbol)
+ except BaseException:
+ print('Failed to retrieve data from HDB')
+ hdb = 0
+ return rtp + hdb
+
+gateway = kx.tick.GATEWAY(
+ port=5015,
+ connections={'rtp': 'localhost:5014', 'hdb': 'localhost:5012'},
+ libraries={'re':'re'},
+ apis={'sum_of_symbols': gateway_function},
+ connection_validator=validation_function
+ )
+gateway.start()
+```
+
+The advantage of this approach is that it allows process/workflow restart, for example, in case you lose connection to a downstream process. As all definitions are cached in configuration, you can easily restart them.
+
+```python
+gateway.restart()
+```
+
+??? "API documentation"
+ The following bullet-points provide links to the various functions used within the above section
+
+ - [`kx.tick.GATEWAY`](../../../api/tick.md#pykx.tick.GATEWAY)
+ - [`gateway.start`](../../../api/tick.md#pykx.tick.GATEWAY.start)
+ - [`gateway.restart`](../../../api/tick.md#pykx.tick.GATEWAY.restart)
+
+## Next steps
+
+For some further reading, here are some related topics you may find interesting:
+
+- Learn more about Interprocess Communication (IPC) [here](../ipc.md).
+- Create a Historical Database from static datasets [here](../database/index.md)
diff --git a/docs/user-guide/advanced/streaming/index.md b/docs/user-guide/advanced/streaming/index.md
new file mode 100644
index 0000000..a8a106a
--- /dev/null
+++ b/docs/user-guide/advanced/streaming/index.md
@@ -0,0 +1,52 @@
+---
+title: PyKX Real-Time Data Capture
+description: Introduction to the PyKX Real-Time Data Capture functionality
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, streaming, publishing, real-time data
+---
+
+
+# Real-Time Data Capture
+
+_This page is an introduction to the PyKX Real-Time Data Capture functionality._
+
+The capture, persistence and presentation of high velocity real-time data provides significant challenges to users at all levels from new users attempting to capture this form of data for the first time, to seasoned data-engineers building complex ingestion workflows.
+
+!!! Note "Install q"
+
+ The Real-Time Data Capture functionality provided by PyKX requires you to have access to a `q` executable. A workflow is provided by PyKX to install q as outlined [here](../../../getting-started/installing.md). Alternatively, you will be prompted to install q if not detected when initializing the Real-Time Capture functionality.
+
+The PyKX Real-Time Data Capture functionality described in this documentation provides a framework for users at all levels of their journey to build highly performant real-time systems which can quickly provide users with the following:
+
+- Capture and logging of raw ingested data to facilitate data replay in failure events.
+- A Real-Time Database maintaining reference to data from the current day and persisting this data at end of day.
+- A Historical Database containing data for all days prior to the current day.
+
+Once you're happy with how your data is being captured and persisted, you can build complex workflows and access more advanced features, such as:
+
+- Add real-time streaming analytics to collect valuable insights from your data and alert on issues in mission critical use-cases.
+- Generate real-time and historical query analytics which allow you to derive insights into vast quantities of historical data.
+- Control how users query your captured data through centralized gateways which keep users away from mission critical data-ingest.
+
+Below we're breaking down the documentation sections to guide you through the process of generating these systems using PyKX and what to consider while building up your infrastructure.
+
+## Sections
+
+|*#*| **Title** | **Description** |
+|---|----------------------------------------|------------------|
+|1. |[Start basic ingest](basic.md) | Build your first data ingestion infrastructure covering the logging of incoming messages, creation of a real-time database and loading of a historical database. |
+|2. |[Publish data](publish.md) | Learn how to publish data to your real-time capture system using Python, q and C. |
+|3. |[Subscribe to data](subscribe.md) | Now that data is flowing to your system, how do you subscribe to new updates? |
+|4. |[Real-Time analytics](rta.md) | Analysing real-time data allows for insights to be derived as you need them. Generate insights into your real-time data and account for common problems. |
+|5. |[Custom query APIs](custom_apis.md) | Querying historical and real-time data using custom Python APIs allows you and the consumers of your data to gain complex insights into your data. |
+|6. |[Query access gateways](gateways.md) | Not all users will have free-form access to query your data. They will instead query via authenticated gateway processes. We will outline why this is useful and how to configure this.|
+|7. |[Complex streaming control](complex.md) |Learn how to further edit/manage your streaming workflows with PyKX. Methods include: fine-grained ingest control, process logs and stopping processed. |
+
+!!! Warning "Disclaimer"
+
+ The Real-Time Data Collection functionality provides the necessary tools for users to build complex streaming infrastructures. The generation and management of such workflows rest solely with the users. KX supports only individual elements used to create these workflows, not the end-to-end applications.
+
+## Next steps
+
+- [Start](basic.md) your basic ingest infrastructure.
diff --git a/docs/user-guide/advanced/streaming/publish.md b/docs/user-guide/advanced/streaming/publish.md
new file mode 100644
index 0000000..b418ae1
--- /dev/null
+++ b/docs/user-guide/advanced/streaming/publish.md
@@ -0,0 +1,169 @@
+---
+title: Publish Data
+description: How to publish data to your streaming infrastructure
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, streaming, publishing
+---
+
+# Publish Data
+
+_This page outlines how you can publish new data to your streaming infrastructure._
+
+!!! Warning "Disclaimer"
+
+ The functionality outlined below provides the necessary tools for users to build complex streaming infrastructures. The generation and management of such workflows rest solely with the users. KX supports only individual elements used to create these workflows, not the end-to-end applications.
+
+Publishing data to a PyKX streaming workflow is completed by publishing messages to a [tickerplant process](basic.md#tickerplant) using [Interprocess Communication (IPC)](../ipc.md). The sections below show how to achieve this with Python and q in a [basic streaming infrastructure](basic.md). Commonly in KX literature and whitepapers, processes which publish data to a tickerplant are described as Feedhandlers.
+
+Any messages that is published to a tickerplant is a triplet list with the following structure `#!python [Function;Table;Data]`, where:
+
+- Function: The name of the function to be called on all downstream subscribers. In this case, it's `#!python .u.upd`. This function takes two arguments: table and data.
+- Table: The name of the table to be passed as the first argument to the Function above.
+- Data: The data which is to be passed as the second argument to the Function above.
+
+## Basic examples
+
+The below sections provide examples in Python and q showing the publishing of 10 messages to the `#!python trade` table defined in the basic infrastructure [here](basic.md#get-started). Data will be randomly generated in each case.
+
+!!! Note
+
+ In each example, supply of the `timespan` object is optional. If omitted, data will be tagged with arrival time and persisted by the database using this time information.
+
+In a later section of this page, we provide a more complex data feed which you can use to emulate a data feed from Python which can be used in the remaining pages relating to streaming data.
+
+### Python
+
+The following Python code allows you to publish 10 messages to the streaming infrastructure created [here](basic.md):
+
+```python
+import pykx as kx
+import numpy as np
+
+ticker_list = ['AAPL', 'GOOG', 'IBM', 'BRK']
+
+for i in range(1, 10):
+ with kx.SyncQConnection(port=5010, wait=False) as q:
+ msg = [kx.TimespanAtom('now'),
+ np.random.choice(ticker_list),
+ np.random.random(10) * 10 * i,
+ np.random.randint(100) * i]
+ q('.u.upd', 'trade', msg)
+```
+
+In the above code we create a Synchronous Connection against the Tickerplant process on port 5010, sending messages with no expectation of a response denoted through setting `#!python wait=False`. We create a message (`#!python msg`) containing 4 elements:
+
+1. The current time as a `#!python kx.TimespanAtom` type object
+1. Name of the trade symbol (`#!python ticker`) randomly generated from a pre-determined list
+1. The price of the stock randomly generated
+1. The volume of the stock that was traded.
+
+Finally, this message is sent to the tickerplant alongside the name of the table `#!python trade` and the function which is to be called `#!python .u.upd`.
+
+### q
+
+The following q code allows you to publish 10 messages to the streaming infrastructure created [here](basic.md):
+
+```q
+h:hopen 5010
+
+// Function for sending updates to trade table
+upd_trades:{neg[x](".u.upd";y;z)}[h;`trade]
+
+// Function for generating a sample message
+msg:{
+ (.z.N;
+ rand `AAPL`GOOG`IBM`BRK;
+ x*rand 10.0;
+ x*rand 100)
+ }
+
+// Send 10 messages using the values 1-10 to update the price/volume values
+(upd_trades msg@)each 1+til 10
+```
+
+In the above code we open a connection to the Tickerplant process on port 5010. Sending 10 messages created using the function `msg` and `upd_trades`. The message generated contains 4 elements:
+
+1. The current time generated using `#!python .z.N`
+1. Name of the trade symbol (`#!python ticker`) randomly generated from a pre-determined list
+1. The price of the stock randomly generated
+1. The volume of the stock that was traded.
+
+### Other languages
+
+It's possible to publish data to PyKX streaming infrastructures using other languages, such as C and Java:
+
+- [Publishing to kdb+ using Java](https://www.timestored.com/kdb-guides/kdb-java-api#feedhandling)
+- [Publishing to kdb+ tickerplant using C](https://code.kx.com/q/wp/capi/#publishing-to-a-kdb-tickerplant)
+
+## Continuous streaming example
+
+In the below section we generate a script which completes the following:
+
+1. Takes a parameter at startup which indicates how many messages should be published per update.
+1. Generates a random trade message using `#!python kx.random.random`.
+1. Publishes this message to the [basic infrastructure](basic.md) tickerplant on port 5010.
+1. Repeats until a user stops the processing data feed.
+
+You can view this script below or [download](scripts/feed.py) and run it following the instructions outlined below.
+
+```python
+import pykx as kx
+
+import sys
+
+try:
+ args = sys.argv[1]
+except BaseException:
+ args=''
+n = 1 if args=='' else int(args)
+
+print('Starting Data Feed ...')
+init = False
+
+def main():
+ global init
+ symlist = ['AAPL', 'JPM', 'GOOG', 'BRK', 'WPO', 'IBM']
+ while True:
+ trade = [kx.random.random(n, symlist),
+ 10 * kx.random.random(n, 10.0),
+ 10 * kx.random.random(n, 100)
+ ]
+ with kx.SyncQConnection(port=5010, wait=False, no_ctx=True) as q:
+ q('.u.upd', 'trade', trade)
+ if not init:
+ print('First message(s) sent, data-feed publishing ...')
+ init=True
+
+if __name__ == '__main__':
+ try:
+ main()
+ except KeyboardInterrupt:
+ print('Data feed stopped')
+```
+
+Before you start, ensure you have the basic infrastructure running with default values. To use the above `#!python feed.py` script, run it as follows:
+
+- Publish one message per update
+
+ ```bash
+ python feed.py
+ ```
+
+- Publish ten messages per update
+
+ ```bash
+ python feed.py 10
+ ```
+
+## Next steps
+
+Now that you have data being published to your system you may be interested in the following:
+
+- Subscribe to real-time updates following the instructions [here](subscribe.md).
+- Query your real-time and historical data using custom APIs [here](custom_apis.md).
+- Perform complex analysis on your real-time data following the instructions [here](rta.md).
+
+For some further reading, here are some related topics:
+
+- Learn more about Interprocess Communication (IPC) [here](../ipc.md).
diff --git a/docs/user-guide/advanced/streaming/rta.md b/docs/user-guide/advanced/streaming/rta.md
new file mode 100644
index 0000000..8748122
--- /dev/null
+++ b/docs/user-guide/advanced/streaming/rta.md
@@ -0,0 +1,228 @@
+---
+title: Analyze streaming data
+description: How to apply real-time analytics to streaming data
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, streaming, analytics
+---
+
+# Analyze streaming data
+
+_This page outlines how you can apply analytics to your streaming data._
+
+!!! Warning "Disclaimer"
+
+ The functionality outlined below provides the necessary tools for users to build complex streaming infrastructures. The generation and management of such workflows rest solely with the users. KX supports only individual elements used to create these workflows, not the end-to-end applications.
+
+In previous sections we have walked through how to build a [basic streaming infrastructure](basic.md), [publish data](publish.md) and [subscribe to updates](subscribe.md). In more complex scenarios users may wish to apply analytics to the real-time data that is being consumed by their infrastructure. This can be completed in a subscribing process or, more formally, in a Real-Time Processor (RTP).
+
+With PyKX, an RTP subscribes to data from a tickerplant and completes one of the following:
+
+1. Operates as a Real-Time Database (RDB) ingesting data as quickly as possible, making it available for query during the day and coordinating data persistence at end of day.
+1. Allows analytic application on data prior to and post insertion into the in-memory database (RDB).
+
+!!! note "Have your say"
+
+ The above usage patterns provide flexibility in application development but are not the only approaches to real-time processing possible in data-streaming. If there is functionality that you'd like to see, let us know by opening an issue [here](https://github.com/KxSystems/pykx/issues).
+
+
+In the following sections we will walk through how to limit the impact of slow running analytics, subscribe to specific tables on which our analytics will be built and developing Pythonic pre and post-processing analytics to filter data and derive insights.
+
+The steps discussed below depend on the [basic infrastructure](basic.md) and [publishing](publish.md) sections, so if you don't have a running system with flowing data, revisit these sections.
+
+## Protect data ingest
+
+The following explanation relating to slow subscribers was previously covered [here](subscribe.md#protect-data-ingest) if you are comfortable with the techniques used to limit the impact of slow running analytics you can skip to the [next section](#build-real-time-processors).
+
+As mentioned [here](basic.md#tickerplant) when describing issues that can impact tickerplants, subscribers who process messages too slowly can cause significant issues which can critically impact your streaming infrastructure. Typically, a zero-latency tickerplant (which the basic infrastructure tickerplant is one of) will publish too much data for complex analytics to process.
+
+To reduce the impact of slow subscribers a chained tickerplant can subscribe to all messages from the zero-latency TP and publish messages to down-stream subscribers at a slower rate. An important characteristic of a chained tickerplant is that while it can provide down-stream subscribers with data it does not maintain a log of processed data, as such a chained tickerplant should always connect to a zero-latency tickerplant to ensure data processing in failure events.
+
+An example of such a setup can be seen below and is what we will generate throughout the following sections:
+
+![chained-subscriber](../images/chained-subscriber.png)
+
+Before providing a demonstration of subscribing to new messages, we first must generate the chained tickerplant to which users will subscribe. This is achieved using the `kx.tick.TICK` class and setting the `chained` keyword to `True`.
+
+```python
+>>> chained_tp = kx.tick.TICK(port=5013, chained=True)
+>>> chained_tp.start({'tickerplant': 'localhost:5010'})
+```
+
+In the above code we are setting the chained tickerplant on port 5013 to provide access to external processes. Additionally, on start of the chained tickerplant, we state that it should subscribe to messages from the zero-latency tickerplant on port 5010.
+
+??? "API documentation"
+ The following bullet-points provide links to the various functions used within the above section
+
+ - [`kx.tick.TICK`](../../../api/tick.md#pykx.tick.TICK)
+ - [`chained_tp.start`](../../../api/tick.md#pykx.tick.TICK.start)
+
+## Build real-time processors
+
+Now that a chained tickerplant has been established which can be used for slow subscribers, we can generate some analytics from the data supplied by this process. To achieve this, we use a Real-Time Processor (RTP) which runs pre-processing and post-processing analytics. We generate Python analytics that achieve the following:
+
+1. Pre-process all messages to remove any data which is sourced from a table other than the `#!python trade` table.
+2. A post-processing function written in Python which uses PyKX and numpy to calculate the `#!python min`, `#!python max` and `#!python spread` of prices per symbol from the processed trade data.
+3. Publishes the results of your aggregated information back to the primary tickerplant to ensure the aggregate data is persisted for future inspection.
+
+The reference architecture used for this is as follows:
+
+![chained-subscriber](../images/chained-subscriber.png)
+
+### Start your RTP
+
+To start building and iterating the development of your RTP, first, you need to configure and start your RTP instance. The basic requirements as required by the steps outlined above are:
+
+1. RTP will be started on port 5014
+2. RTP should subscribe to `#!python trade` table messages only from port 5013
+3. The RTP should facilitate complex analytics denoted by setting the keyword `#!python vanilla=False`
+
+```python
+rtp = kx.tick.RTP(port=5014,
+ subscriptions = ['trade'],
+ vanilla=False)
+```
+
+Now that you initialized your RTP process, you can begin to add analytics to the process to run pre and post processing analytics.
+
+??? "API documentation"
+ The following bullet-points provide links to the various functions used within the above section
+
+ - [`kx.tick.RTP`](../../../api/tick.md#pykx.tick.RTP)
+
+### Add pre/post processors
+
+To add a set of Python analytics we first need to define the Python libraries that will be required to run your analytics. Use the `#!python libraries` method for the RTP instance. This method takes a dictionary as its input parameter which maps the alias name of the library to the true name of the library. For example, if you require access to both `#!python numpy` and `#!python pykx` named as `#!python np` and `#!python kx` explicitly within your function you would add this:
+
+```python
+rtp.libraries({'np': 'numpy', 'kx': 'pykx'})
+```
+
+The above example is equivalent to running `#!python import numpy as np` and `#!python import pykx as kx` on the RTP.
+
+Now that the RTP process has access to PyKX and Numpy, we can define the pre-processor function. If this pre-processor returns a `#!python None` object, the data received will not be inserted into the RTP's in-memory database. This function must take two parameters:
+
+1. Name of the table whose data is being processed
+2. The data that is being processed
+
+```python
+def preprocessor(table, data):
+ if table == 'trade':
+ return data
+ else:
+ return None
+```
+
+Now that you defined the function, you can register the pre-processor:
+
+```python
+rtp.pre_processor(preprocessor)
+```
+
+Next, we can add the final function which calculates aggregate data information on the `#!python trade` table following data insertion into the in-memory table. This function takes two parameters:
+
+1. Name of the table whose data is being processed
+2. The data that was processed
+
+In the below function we calculate the `#!python min`, `#!python max` and `#!python spread` of the trade price by symbol and finally publish this data back to the primary tickerplant on port 5010.
+
+```python
+def postprocessor(table, data):
+ agg = kx.q[table].select(
+ columns = {'min_px':'min price',
+ 'max_px': 'max price',
+ 'spread_px': 'max[price] - min price'},
+ by = {'symbol': 'symbol'})
+ kx.q['agg'] = agg # Make the table accessible from q
+ with kx.SyncQConnection(port=5010, wait=False, no_ctx=True) as q:
+ q('.u.upd', 'aggregate', agg._values)
+ return None
+```
+
+Now that you defined this function, set the pre-processor function on the RTP process:
+
+```python
+rtp.post_processor(postprocessor)
+```
+
+On successful application of this function, start your RTP to begin processing:
+
+```python
+rtp.start({'tickerplant': 'localhost:5014'})
+```
+
+Now that you initialized your RTP, validate that data is available by calling the `#!python rtp` process directly and requesting the `#!python aggregate` data:
+
+```python
+rtp('aggregate')
+```
+
+Aggregate messages will be available to any subscribers to all data that you may have configured during the [subscriber generation](subscribe.md).
+
+!!! info "Important"
+
+ When developing your system for the first time, the above workflow allows you to iteratively develop your processing functions quickly before and after the ingestion of data begins. In scenarios where you need to restart your RTP or configuration is cached, running your configuration and starting as single-shot calls makes management of the system easier.
+
+??? "API documentation"
+ The following bullet-points provide links to the various functions used within the above section
+
+ - [`rtp.start`](../../../api/tick.md#pykx.tick.RTP.start)
+ - [`rtp.libraries`](../../../api/tick.md#pykx.tick.RTP.start)
+ - [`rtp.pre_processor`](../../../api/tick.md#pykx.tick.RTP.pre_processor)
+ - [`rtp.post_processor`](../../../api/tick.md#pykx.tick.RTP.post_processor)
+
+
+### Run all setup at once
+
+The following code configures end-to-end the RTP outlined in the above sections:
+
+```python
+def preprocessor(table, data):
+ if table == 'trade':
+ return data
+ else:
+ return None
+
+def postprocessor(table, data):
+ agg = kx.q[table].select(
+ columns = {'min_px':'min price',
+ 'max_px': 'max price',
+ 'spread_px': 'max[price] - min price'},
+ by = {'symbol': 'symbol'})
+ kx.q['agg'] = agg # Make the table accessible from q
+ with kx.SyncQConnection(port=5010, wait=False, no_ctx=True) as q:
+ q('.u.upd', 'aggregate', agg._values)
+ return None
+
+rtp = kx.tick.RTP(port=5014,
+ subscriptions = ['trade'],
+ libraries={'kx': 'pykx'},
+ pre_processor=preprocessor,
+ post_processor=postprocessor,
+ vanilla=False)
+rtp.start({'tickerplant': 'localhost:5013'})
+```
+
+The advantage of this approach is that it allows for process/workflow restart in the scenario that you need lose connection to a downstream process for example. As all definitions are cached in configuration they can easily be restarted.
+
+```python
+rtp.restart()
+```
+
+??? "API documentation"
+ The following bullet-points provide links to the various functions used within the above section
+
+ - [`kx.tick.RTP`](../../../api/tick.md#pykx.tick.RTP)
+ - [`rtp.start`](../../../api/tick.md#pykx.tick.RTP.start)
+ - [`rtp.restart`](../../../api/tick.md#pykx.tick.RTP.restart)
+
+## Next steps
+
+Now that you have created a data subscriber you may be interested in the following:
+
+- Perform complex analysis on your real-time data following the instructions [here](rta.md).
+- Query your real-time and historical data using custom APIs [here](custom_apis.md).
+
+For some further reading, here are some related topics:
+
+- Learn more about Interprocess Communication (IPC) [here](../ipc.md).
diff --git a/docs/user-guide/advanced/streaming/subscribe.md b/docs/user-guide/advanced/streaming/subscribe.md
new file mode 100644
index 0000000..b7f2954
--- /dev/null
+++ b/docs/user-guide/advanced/streaming/subscribe.md
@@ -0,0 +1,175 @@
+---
+title: Subscribing to data
+description: How to subscribe to real-time updates from your streaming infrastructure
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, streaming, subscribing
+---
+
+# Subscribe to data
+
+_This page outlines how you can get real-time updates from your streaming infrastructure._
+
+!!! Warning "Disclaimer"
+
+ The functionality outlined below provides the necessary tools for users to build complex streaming infrastructures. The generation and management of such workflows rest solely with the users. KX supports only individual elements used to create these workflows, not the end-to-end applications.
+
+In previous sections we have walked through how to build a [basic streaming infrastructure](basic.md) and [publish data](publish.md). As this data is being consumed users may wish to access it in real-time. This is achieved by subscribing to updates from the tickerplant and using Python/q as in the examples below.
+
+Subscribing to data gives you the ability to derive analytic insights into your data. However, caution should be taken when creating subscriptions in high-throughput scenarios. More details in the next section.
+
+## Protect data ingest
+
+As mentioned [here](basic.md#tickerplant) when describing issues that can impact tickerplants, subscribers who process messages too slowly can cause significant issues. This can critically impact your streaming infrastructure. Typically, a zero-latency tickerplant (which the basic infrastructure tickerplant is one of) will publish too much data for complex analytics to process.
+
+To reduce the impact of slow subscribers, a chained tickerplant can subscribe to all messages from the zero-latency TP and publish messages to down-stream subscribers at a slower rate. An important characteristic of a chained tickerplant is that while it can provide down-stream subscribers with data it does not maintain a log of processed data, as such a chained tickerplant should always connect to a zero-latency tickerplant to ensure data processing in failure events.
+
+An example of such a setup can be seen below and is what we will generate throughout the following sections:
+
+![chained-subscriber](../images/chained-subscriber.png)
+
+Before providing a demonstration of subscribing to new messages we first must generate the chained tickerplant to which users will subscribe. This is achieved using the `kx.tick.TICK` class setting the `chained` keyword to `True`.
+
+```python
+>>> chained_tp = kx.tick.TICK(port=5013, chained=True)
+>>> chained_tp.start({'tickerplant': 'localhost:5010'})
+```
+
+In the above code we are setting the chained tickerplant on port 5013 to provide access to external processes, additionally on start of the chained tickerplant we state that it should subscribe to messages from the zero-latency tickerplant on port 5010.
+
+??? "API documentation"
+ The following bullet-points provide links to the various functions used within the above section
+
+ - [`kx.tick.TICK`](../../../api/tick.md#pykx.tick.TICK)
+ - [`chained_tp.start`](../../../api/tick.md#pykx.tick.TICK.start)
+
+## Receive updates
+
+The below sections provide examples in Python and q showing how users can subscribe for all updates or can be more specific to get access to specific tables or symbols within their data.
+
+In all cases subscription is achieved through execution of the `#!python .u.sub` function on the tickerplant process which is being connected to. This function takes two parameters:
+
+1. The name(s) of the table(s) for which you are subscribing to, in the case that you are subscribing to all data this argument should be an empty str in Python or ````` in q.
+2. The name(s) of the symbol(s) to which you are subscribing, in the case that you are subscribing to all symbols within the tables this should be an empty str in Python or ````` in q.
+
+### Python
+
+The code blocks provided in the below tabs show how a user can subscribe to all updates from all tables or selectively choose to get updates from a single table and symbol.
+
+=== "Subscribing to all data"
+
+ ```python
+ import pykx as kx
+
+ import sys
+ import asyncio
+
+ trade = kx.schema.builder({
+ 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ 'price': kx.FloatAtom, 'volume': kx.LongAtom})
+
+
+ async def main_loop(q, trade):
+ while True:
+ await asyncio.sleep(0.005)
+ result = q.poll_recv()
+ if result is None:
+ continue
+ table = result[1]
+ if table == 'trade':
+ trade.upsert(result[2], inplace=True)
+ sys.stdout.write(f"Trade count: {len(trade)}\r")
+ sys.stdout.flush()
+
+
+ async def main():
+ global trade
+ async with kx.RawQConnection(port=5013) as q:
+ await q('.u.sub', '', '')
+ await main_loop(q, trade)
+
+
+ if __name__ == '__main__':
+ try:
+ asyncio.run(main())
+ except KeyboardInterrupt:
+ print('Subscriber suspended')
+ ```
+
+=== "Subscribing to AAPL within the trade table"
+
+ ```python
+ import pykx as kx
+
+ import sys
+ import asyncio
+
+ trade = kx.schema.builder({
+ 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ 'price': kx.FloatAtom, 'volume': kx.LongAtom})
+
+
+ async def main_loop(q, trade):
+ while True:
+ await asyncio.sleep(0.005)
+ result = q.poll_recv()
+ if result is None:
+ continue
+ table = result[1]
+ if table == 'trade':
+ trade.upsert(result[2], inplace=True)
+ sys.stdout.write(f"Trade count: {len(trade)}\r")
+ sys.stdout.flush()
+
+
+ async def main():
+ global trade
+ async with kx.RawQConnection(port=5013) as q:
+ await q('.u.sub', 'trade', 'AAPL')
+ await main_loop(q, trade)
+
+
+ if __name__ == '__main__':
+ try:
+ asyncio.run(main())
+ except KeyboardInterrupt:
+ print('Subscriber suspended')
+ ```
+
+### q
+
+The code blocks provided in the below tabs show how a user can subscribe to all updates from all tables or selectively choose to get updates from a single table and symbol.
+
+=== "Subscribing to all data"
+
+ ```q
+ h:hopen 5013
+ upd:insert
+ h(`.u.sub;`;`)
+
+ \t 1000
+ .z.ts:{show count trade;}
+ ```
+
+=== "Subscribing to AAPL within the trade table"
+
+ ```q
+ h:hopen 5013
+ upd:insert
+ h(`.u.sub;`trade;`AAPL)
+
+ \t 1000
+ .z.ts:{show count trade;}
+ ```
+
+
+## Next steps
+
+Now that you have created a data subscriber you may be interested in the following:
+
+- Perform complex analysis on your real-time data following the instructions [here](rta.md).
+- Query your real-time and historical data using custom APIs [here](custom_apis.md).
+
+For some further reading, here are some related topics:
+
+- Learn more about Interprocess Communication (IPC) [here](../ipc.md).
diff --git a/docs/user-guide/advanced/streamlit.md b/docs/user-guide/advanced/streamlit.md
new file mode 100644
index 0000000..10bcd74
--- /dev/null
+++ b/docs/user-guide/advanced/streamlit.md
@@ -0,0 +1,244 @@
+---
+title: Streamlit Integration
+description: Integrate PyKX Connections into you Streamlit application
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, streamlit, visualisation, query, web, application
+---
+
+# Streamlit Integration
+
+!!! Warning
+
+ Streamlit makes use of a caching mechanism which makes use of multiple threads, to make use of PyKX under these conditions it is suggested that users set `PYKX_THREADING` as `True`, for more information on the threading feature see [here](threading.md), for information on setting configuration see [here](../configuration.md).
+
+[Streamlit](https://streamlit.io) provides an open source framework allowing users to turn Python scripts into sharable web applications. Functionally, Streamlit provides access to external data-sources using the concept of `connections` which allow users to develop conforming APIs which will integrate directly with streamlit applications as an extension connection types.
+
+The integration outlined below makes use of this by generating a new `pykx.streamlit.PyKXConnection` connection type which provides the ability to create synchronous connections to existing q/kdb+ sessions.
+
+A full breakdown of the API documentation of this class can be found [here](../../api/streamlit.md).
+
+## Requirements and limitations
+
+To run this functionality, users must have `streamlit>=1.28` installed local to their Python session.
+
+This can be installed using the following command when installing PyKX:
+
+```bash
+pip install pykx[streamlit]
+```
+
+## Using PyKX with Streamlit
+
+The PyKX Streamlit integration provides users with the ability to do the following:
+
+1. Establish a Streamlit compliant connection to a q/kdb+ process
+1. Check health of a connection and restart connection as necessary
+1. Query the remote process using `q`, `SQL` and `qSQL`
+
+As mentioned above PyKX provides a streamlit connection type `pykx.streamlit.PyKXConnection` which can be used with the streamlit [`st.connection`](https://docs.streamlit.io/develop/api-reference/connections/st.connection) functionality to integrate your streamlit application with PyKX.
+
+In the below section we will discuss how these connections are established, maintained and used for query.
+
+### Connecting to kdb+
+
+In the below example we connect to a variety of kdb+ processes on port 5050 with a streamlit connection. In each case we name the connection `'pykx'` but this name is arbitrary and is open to a user to modify
+
+=== "Basic Connection generation"
+
+ Connect to a process which does not require username/password
+
+ ```python
+ import streamlit as st
+ import pykx as kx
+ connection = st.connection('pykx',
+ type=kx.streamlit.PyKXConnection,
+ host='localhost',
+ port=5050)
+ ```
+
+=== "User - Password protected connection"
+
+ Connect to a process requiring a username/password to be provided
+
+ ```python
+ import streamlit as st
+ import pykx as kx
+ connection = st.connection('pykx',
+ type=kx.streamlit.PyKXConnection,
+ host='localhost',
+ port=5050,
+ username='user',
+ password='password')
+ ```
+
+=== "Connection to automatically reconnect if dropped"
+
+ Attempt to reconnect to the process if connection is lost 5 times on an exponential backoff
+
+ ```python
+ import streamlit as st
+ import pykx as kx
+ connection = st.connection('pykx',
+ type=kx.streamlit.PyKXConnection,
+ host='localhost',
+ port=5050,
+ reconnection_attempts=5)
+ ```
+
+### Checking and restoring the health of your connections
+
+In streamlit, your application may be running for a significant period of time. In such situations it is not uncommon for your original connection to a server to drop.
+
+To help with such cases there are a number of methods provided by PyKX to recover your environment:
+
+- The addition of an `is_healthy` method to facilitate checking if the remote server can be interacted with.
+- The availability of a `reset` method to allow a connection which is deemed not to be healthy to be re-established.
+
+The following provides an example code block showing use of these methods
+
+```python
+import streamlit as st
+import pykx as kx
+connection = st.connection('pykx',
+ type=kx.streamlit.PyKXConnection,
+ host='localhost',
+ port=5050)
+
+if not connection.is_healthy():
+ connection.reset()
+```
+
+### Querying using a connection
+
+Process query is available in three formats
+
+1. SQL
+1. Pythonic qSQL
+1. q
+
+The following code blocks show use of each of these query types.
+
+In each case we assume that a healthy connection has been established and the user is attempting to retrieve the maximum value of data in column 'price' by symbol ('sym') from a table named 'trade'
+
+=== "Pythonic qSQL"
+
+ ```python
+ >>> conn.query('trade',
+ ... columns=kx.Column('price').max(),
+ ... by=kx.Column('sym'),
+ ... format='qsql')
+ pykx.KeyedTable(pykx.q('
+ sym | price
+ ----| ---------
+ AAPL| 0.9877844
+ GOOG| 0.9598964
+ IBM | 0.9785
+ '))
+ ```
+
+=== "SQL"
+
+ SQL querying requires that your server have access to the [SQL interface to kdb+](https://code.kx.com/insights/core/sql.html) to be loaded on the server
+
+ ```python
+ >>> conn.query('select sym, max(price) from trade GROUP BY sym', format='sql')
+ pykx.Table(pykx.q('
+ sym price
+ --------------
+ AAPL 0.9877844
+ GOOG 0.9598964
+ IBM 0.9785
+ '))
+ ```
+
+=== "q"
+
+ ```python
+ >>> conn.query('select max price by sym from trade', format='q')
+ pykx.KeyedTable(pykx.q('
+ sym | price
+ ----| ---------
+ AAPL| 0.9877844
+ GOOG| 0.9598964
+ IBM | 0.9785
+ '))
+ ```
+
+## Example
+
+Now that you have seen some of the functions in action you can generate a streamlit script to read data from a table and generate a graph.
+
+### Pre-requisites
+
+You must have available to you a q session running on port 5050 and which has available the following table
+
+```q
+\p 5050
+N:1000
+tab:([]sym:N?`AAPL`MSFT`GOOG`FDP;price:100+N?100f;size:10+N?100)
+```
+
+### Script
+
+The following script generates a simple streamlit application which
+
+1. Sets environment variables and imports required libraries
+1. Defines a function to run for generation of the streamlit application completing the following
+ 1. Name the streamlit application
+ 1. Create a connection to the q process initialised on port 5050
+ 1. Query the q process retrieving a small tabular subset of data using the Pythonic Query API
+ 1. Generates a Matplotlib graph directly using the PyKX table
+ 1. Displays both the table and graph
+
+The script which follows can be downloaded [here](examples/streamlit.py)
+
+??? Note "Expand here to view the script text"
+
+ ```python
+ # Set environment variables needed to run Steamlit integration
+ import os
+
+ # This is optional but suggested as without it's usage caching
+ # is not supported within streamlit
+ os.environ['PYKX_THREADING'] = 'true'
+
+ import streamlit as st
+ import pykx as kx
+ import matplotlib.pyplot as plt
+
+ def main():
+ st.header('PyKX Demonstration')
+ connection = st.connection('pykx',
+ type=kx.streamlit.PyKXConnection,
+ port=5050)
+ if connection.is_healthy():
+ tab = connection.query(
+ 'tab',
+ where = kx.Column('size') < 11
+ )
+ else:
+ try:
+ connection.reset()
+ except BaseException:
+ raise kx.QError('Connection object was not deemed to be healthy')
+ fig, x = plt.subplots()
+ x.scatter(tab['size'], tab['price'])
+
+ st.write('Queried kdb+ remote table')
+ st.write(tab)
+
+ st.write('Generated plot')
+ st.pyplot(fig)
+
+ if __name__ == "__main__":
+ try:
+ main()
+ finally:
+ kx.shutdown_thread()
+ ```
+
+## Next Steps
+
+- Learn more about querying your data [here](../fundamentals/query/index.md)
+- Learn more about Interprocess Communication [here](ipc.md)
diff --git a/docs/user-guide/advanced/subprocess.md b/docs/user-guide/advanced/subprocess.md
new file mode 100644
index 0000000..d954ab9
--- /dev/null
+++ b/docs/user-guide/advanced/subprocess.md
@@ -0,0 +1,22 @@
+---
+title: PyKX in Subprocesses
+description: Outline using pykx in python subprocesses
+date: August 2024
+author: KX Systems, Inc.
+tags: PyKX
+---
+
+# Using PyKX in python subprocesses
+
+_This page outlines using PyKX in a Python subprocess._
+
+To use PyKX in a python subprocess you should spawn the process using the `#!python kx.PyKXReimport` function as follows:
+
+```python
+import pykx as kx
+import subprocess
+with kx.PyKXReimport():
+ subprocess.Popen(['python', 'file.py']) #_Run a python subprocess that loads a python script containing a PyKX import
+```
+
+Failing to reimport the PyKX package running in the parent process can cause the subprocess to crash with a segmentation fault. The `#!python PyKXReimport` function and possible causes of segmentation faults is covered in more detail [here](../../api/reimporting.md).
diff --git a/docs/user-guide/advanced/threading.md b/docs/user-guide/advanced/threading.md
new file mode 100644
index 0000000..2d53e04
--- /dev/null
+++ b/docs/user-guide/advanced/threading.md
@@ -0,0 +1,58 @@
+---
+title: Multithreaded Execution
+description: Learn how multithreaded integration for PyKX works
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, threading, python, asyncio, multithreaded, noupdate
+---
+
+# Multithreaded execution
+
+_This page explains how to execute q code on multiple threads._
+
+When used in its default configuration, PyKX does not support multithreaded execution of q code on Python threads. This limitation arises because only the main thread (the one importing PyKX and loading `#!python libq`) can modify the state in PyKX’s assigned memory.
+
+As a result, PyKX’s integration with Python’s multithreading libraries, such as [`#!python threading`](https://docs.python.org/3/library/threading.html), [`#!python asyncio`](https://docs.python.org/3/library/asyncio.html), is restricted. This also affects other Python libraries that utilize multiple threads simultaneously, including [`#!python streamlit`](https://streamlit.io/), which uses multiple threads to manage data caching. Read more information about [PyKX’s integration with Streamlit](streamlit.md).
+
+Use cases for multithreading with PyKX:
+
+- **Upserting Data**: Insert or update data in a global table from multiple sources.
+- **Querying Multiple Processes**: Open `#!python QConnection` instances to query several processes simultaneously and combine their results.
+
+If you don’t configure PyKX for multithreading, you might encounter a `#!python noupdate` error. To avoid this, consider enabling the feature described here. This feature allows multithreading by creating a background thread that `#!python loads` libq.
+
+All calls to q from other threads are run on this background thread, created using `#!python libpthread` for minimal overhead. This setup enables safe state modification in multithreaded programs with minimal performance impact.
+
+## Before enabling
+
+Before globally enabling this functionality, consider the following:
+
+- **Concurrency Cost**: While the overhead for offloading calls onto a secondary thread is low, there will always be a cost in forcing a thread context switch. As such single-threaded performance is faster at the cost of concurrency.
+- **Memory-Safe Use**: While using `#!python PYKX_THREADING` it's not possible nor memory safe to have `#!python q` call back into Python; this could result in memory corruption or side-effects which may not be immediately obvious.
+- **Shutdown**: When using `#!python PYKX_THREADING`, it creates a background thread for running queries to `#!python q`. Make sure to call `#!python kx.shutdown_thread()` at the end of your script to properly close this thread. If you don’t, the thread will remain running in the background after the script finishes. To avoid this, it’s best to start your `#!python main` function within a `#!python try` - `#!python finally` block.
+
+## How to enable multithreaded execution
+
+By default, PyKX doesn't start with multithreading support enabled. To enable this feature, you must set `#!python PYKX_THREADING=True` during [configuration](../configuration.md). You can do this either as an environment variable or by adding this configuration to a `#!python .pykx-config` file as outlined [here](../configuration.md#configuration-file).
+
+## Example usage
+
+The following example shows the basic structure suggested for using this functionality:
+
+```Python
+import os
+import asyncio
+os.environ['PYKX_THREADING'] = '1'
+import pykx as kx
+
+def main(): # Your scripts entry point
+ ...
+
+if __name__ == '__main__':
+ try:
+ main()
+ finally:
+ kx.shutdown_thread() # This will be called if the script completes normally or errors early
+```
+
+- A more complete worked example can be found [here](../../examples/threaded_execution/threading.md).
diff --git a/docs/user-guide/configuration.md b/docs/user-guide/configuration.md
index 1f07f44..40e6aba 100644
--- a/docs/user-guide/configuration.md
+++ b/docs/user-guide/configuration.md
@@ -1,12 +1,22 @@
-# PyKX Configurable Behavior
+---
+title: Configure PyKX
+description: How to configure PyKX
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, data, convert
+---
-The following document outlines how users can modify the underlying behavior of PyKX based on their specific use-case. The [options](#options) presented are provided for use-case/performance tuned optimisations of the library itself.
+# Configure PyKX
-Setting of these configuration options is supported via a [configuration file](#configuration-file) or [environment variables](#environment-variables) as described below. In all cases environment variable definitions will take precedence over definitions within the configuration file.
+_This page provides details on how to configure PyKX using a configuration file and/or environment variables._
-## Configuration File
+To modify the underlying behavior of PyKX based on your specific use-case, check out your [options](#options) for use-case/performance tuned optimizations of the library. You can configure them using a [configuration file](#configuration-file) or [environment variables](#environment-variables) as described below.
-Users can use a configuration file `.pykx-config` to define configuration options for PyKX initialization. The following provides an example of a `.pykx-config` file which operates according to `*.toml` syntax:
+!!! warning "Important: In all cases, environment variable definitions take precedence over definitions within the configuration file."
+
+## Configuration file
+
+If you choose to use a configuration file `#!python .pykx-config` to define your options for PyKX initialization, here's an example of a `#!python .pykx-config` file which operates according to `#!python *.toml` syntax:
```bash
[default]
@@ -21,15 +31,15 @@ PYKX_RELEASE_GIL="true"
PYKX_BETA_FEATURES="true"
```
-On import of PyKX the file `.pykx-config` will be searched for according to the following path ordering, the first location containing a `.pykx-config` file will be used for definition of the :
+On import of PyKX, the file `#!python .pykx-config` is searched for according to the following path ordering. The first location containing a `#!python .pykx-config` file is used for definition of the PyKX configuration:
-| Order | Location |
-|-------|---------------|
-| 1. | `Path('.')` |
-| 2. | `Path(os.getenv('PYKX_CONFIGURATION_LOCATION'))` |
-| 3. | `Path.home()` |
+| **Order** | **Location** |
+|-----------|--------------------------------------------------|
+| 1. | `Path('.')` |
+| 2. | `Path(os.getenv('PYKX_CONFIGURATION_LOCATION'))` |
+| 3. | `Path.home()` |
-When loading this file unless otherwise specified PyKX will use the profile `default`. Use of non default profiles from within this file can be configured through the setting of an environment variable `PYKX_PROFILE` prior to loading of PyKX, for example using the above configuration file.
+When loading this file, unless otherwise specified, PyKX uses the profile `#!python default`. To configure non-default profiles from within this file, set an environment variable `#!python PYKX_PROFILE` prior to loading of PyKX, for example using the above configuration file.
=== "default"
@@ -49,9 +59,22 @@ When loading this file unless otherwise specified PyKX will use the profile `def
True
```
+To add values to your configuration file you can modify the file directly or alternatively use the helper function `#!python kx.util.add_to_config` as follows for example
+
+```python
+>>> import pykx as kx
+>>> kx.util.add_to_config({'PYKX_GC': 'True', 'PYKX_BETA_FEATURES': 'True'})
+
+Configuration updated at: /Users/conormccarthy/.pykx-config.
+Profile updated: default.
+Successfully added:
+ - PYKX_GC = True
+ - PYKX_BETA_FEATURES = True
+```
+
## Environment variables
-For users wishing to make use of the provided [options](#options) as environment variables this is also supported, for example a user can define the environment variables to use before import of PyKX as follows.
+If you wish to configure the [options](#options) as environment variables, before importing PyKX, you can, for example, define the environment variables to use:
```python
>>> import os
@@ -64,86 +87,87 @@ True
## Options
-The options can be used to tune PyKX behavior at run time. These variables need to be set before attempting to import PyKX and will take effect for the duration of the execution of the PyKX process.
+You have various options to tune PyKX behavior at run time. You must set these variables before importing PyKX. They remain effective throughout the execution of the PyKX process.
### General
-The following variables can be used to enable or disable advanced features of PyKX across all modes of operation:
-
-| Option | Default | Values | Description | Status |
-|---------------------------------|---------|-----------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------|
-| `PYKX_BETA_FEATURES` | `False` | `1` or `true` | Enable all Beta features supplied with PyKX allowing users to test and prototype code slated for later releases. | |
-| `PYKX_QDEBUG` | `False` | `1` or `true` | Enable retrieval of backtrace information on error being raised when executing q functions, this can alternatively be enabled by setting `debug=True` as a keyword in calls to `kx.q`. | |
-| `PYKX_IGNORE_QHOME` | `False` | `1` or `true` | When loading PyKX on a machine with an existing q installation (and the environment variable `QHOME` set to the installation folder), PyKX will look within this directory for q scripts their dependencies. It will then symlink these files to make them available to load under PyKX. This variable instructs PyKX to not perform this symlinking. | |
-| `PYKX_KEEP_LOCAL_TIMES` | `False` | `1` or `true` | When converting a Python datetime object to q, PyKX will translate the Python datetime into UTC before the conversion. This variable instructs PyKX to convert the Python datetime using the local time zone. | |
-| `PYKX_ALLOCATOR` | `False` | `1` or `true` | When converting a Numpy array to q, PyKX implements a full data copy in order to translate the Numpy array to q representation in memory. When this is set PyKX implements [NEP-49](https://numpy.org/neps/nep-0049.html) which allows q to handle memory allocation of all Numpy arrays so they can be converted more efficiently to q. This avoids the need to resort to a copy where possible. | |
-| `PYKX_GC` | `False` | `1` or `true` | When PYKX_ALLOCATOR is enabled, PyKX can trigger q garbage collector when Numpy arrays allocated by PyKX are deallocated. This variable enables this behavior which will release q memory to the OS following deallocation of the Numpy array at the cost of a small overhead. | |
-| `PYKX_LOAD_PYARROW_UNSAFE` | `False` | `1` or `true` | By default, PyKX uses a subprocess to import pyarrow as it can result in a crash when the version of pyarrow is incompatible. This variable will trigger a normal import of pyarrow and importing PyKX should be slightly faster. | |
-| `PYKX_MAX_ERROR_LENGTH` | `256` | size in characters | By default, PyKX reports IPC connection errors with a message buffer of size 256 characters. This allows the length of these error messages to be modified reducing the chance of excessive error messages polluting logs. | |
-| `PYKX_NOQCE` | `False` | `1` or `true` | On Linux, PyKX comes with q Cloud Edition features from [Insights Core](https://code.kx.com/insights/core/). This variable allows a user to skip the loading of q Cloud Edition functionality, saving some time when importing PyKX but removing access to possibly supported additional functionality. | |
-| `PYKX_Q_LIB_LOCATION` | `UNSET` | Path to a directory containing q libraries necessary for loading PyKX | See [here](../release-notes/changelog.md#pykx-131) for detailed information. This allows a user to centralise the q libraries, `q.k`, `read.q`, `libq.so` etc to a managed location within their environment which is decentralised from the Python installation. This is required for some enterprise use-cases. | |
-| `PYKX_RELEASE_GIL` | `False` | `1` or `true` | When PYKX_RELEASE_GIL is enabled the Python Global Interpreter Lock will not be held when calling into q. | |
-| `PYKX_Q_LOCK` | `False` | `1` or `true` | When PYKX_Q_LOCK is enabled a re-entrant lock is added around calls into q, this lock will stop multiple threads from calling into q at the same time. This allows embedded q to be thread safe even when using PYKX_RELEASE_GIL. | |
-| `PYKX_DEBUG_INSIGHTS_LIBRARIES` | `False` | `1` or `true` | If the insights libraries failed to load this variable can be used to print out the full error output for debugging purposes. | |
-| `PYKX_UNLICENSED` | `False` | `1` or `true` | Set PyKX to make use of the library in `unlicensed` mode at all times. | |
-| `PYKX_LICENSED` | `False` | `1` or `true` | Set PyKX to make use of the library in `licensed` mode at all times. | |
-| `PYKX_THREADING` | `False` | `1` or `true` | When importing PyKX start EmbeddedQ within a background thread. This allows calls into q from any thread to modify state, this environment variable is only supported for licensed users. | |
-| `PYKX_NO_SIGNAL` | `False` | `1` or `true` | Skip overwriting of [signal](https://docs.python.org/3/library/signal.html) definitions by PyKX, these are presently overwritten by default to reset Pythonic default definitions with are reset by PyKX on initialisation in licensed modality. | |
-| `PYKX_4_1_ENABLED` | `False` | `1` or `true` | Load version 4.1 of `libq` when starting `PyKX` in licensed mode, this environment variable does not work without a valid `q` license. | |
-| `PYKX_NO_SIGINT` | `False` | `1` or `true` | Avoid setting `signal.signal(signal.SIGINT)` once PyKX is loaded, these are presently set to the Python default values once PyKX is loaded to ensure that PyKX licensed modality does not block their use by Python. | `DEPRECATED`, please use `PYKX_NO_SIGNAL` |
-| `IGNORE_QHOME` | `True` | `1` or `true` | When loading PyKX on a machine with an existing q installation (and the environment variable `QHOME` set to the installation folder), PyKX will look within this directory for q scripts their dependencies. It will then symlink these files to make them available to load under PyKX. This variable instructs PyKX to not perform this symlinking. | `DEPRECATED`, please use `PYKX_IGNORE_QHOME` |
-| `KEEP_LOCAL_TIMES` | `False` | `1` or `true` | When converting a Python datetime object to q, PyKX will translate the Python datetime into UTC before the conversion. This variable instructs PyKX to convert the Python datetime using the local time zone. | `DEPRECATED`, please use `PYKX_KEEP_LOCAL_TIMES` |
-
-
-The variables below can be used to set the environment for q (embedded in PyKX, in licensed mode):
-
-| Variable | Values | Description |
-|----------|----------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------|
-| `QARGS` | See link | Command-line flags to pass to q, see [here](https://code.kx.com/q/basics/cmdline/) for more information. |
-| `QHOME` | Path to the users q installation folder | See [here](https://code.kx.com/q/learn/install/#step-5-edit-your-profile) for more information. |
-| `QLIC` | Path to the folder where the q license should be found | See [here](https://code.kx.com/q/learn/install/#step-5-edit-your-profile) for more information. |
-| `QINIT` | Path to an additional `*.q` file loaded after `PyKX` has initialized | See [here](https://code.kx.com/q4m3/14_Introduction_to_Kdb%2B/#1481-the-environment-variables) for more information. |
-
-The following variables can be set in configuration or as environment variables to define the `kc.lic` or `k4.lic` license used by PyKX if no license is found
-
-| Variable | Description |
+To enable or disable advanced features of PyKX across all modes of operation, use the following variables:
+
+| **Option** | **Default** | **Values** | **Description** |
+|---------------------------------|-------------|-----------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `PYKX_BETA_FEATURES` | `False` | `1` or `true` | Enable all Beta features supplied with PyKX allowing users to test and prototype code slated for later releases. |
+| `PYKX_QDEBUG` | `False` | `1` or `true` | Enable retrieval of backtrace information on error being raised when executing q functions, this can alternatively be enabled by setting `debug=True` as a keyword in calls to `kx.q`. |
+| `PYKX_IGNORE_QHOME` | `False` | `1` or `true` | When loading PyKX on a machine with an existing q installation (and the environment variable `QHOME` set to the installation folder), PyKX will look within this directory for q scripts their dependencies. It will then symlink these files to make them available to load under PyKX. This variable instructs PyKX to not perform this symlinking. |
+| `PYKX_KEEP_LOCAL_TIMES` | `False` | `1` or `true` | When converting a Python datetime object to q, PyKX will translate the Python datetime into UTC before the conversion. This variable instructs PyKX to convert the Python datetime using the local time zone. |
+| `PYKX_ALLOCATOR` | `False` | `1` or `true` | When converting a Numpy array to q, PyKX implements a full data copy in order to translate the Numpy array to q representation in memory. When this is set PyKX implements [NEP-49](https://numpy.org/neps/nep-0049.html) which allows q to handle memory allocation of all Numpy arrays so they can be converted more efficiently to q. This avoids the need to resort to a copy where possible. |
+| `PYKX_GC` | `False` | `1` or `true` | When PYKX_ALLOCATOR is enabled, PyKX can trigger q garbage collector when Numpy arrays allocated by PyKX are deallocated. This variable enables this behavior which will release q memory to the OS following deallocation of the Numpy array at the cost of a small overhead. |
+| `PYKX_LOAD_PYARROW_UNSAFE` | `False` | `1` or `true` | By default, PyKX uses a subprocess to import pyarrow as it can result in a crash when the version of pyarrow is incompatible. This variable will trigger a normal import of pyarrow and importing PyKX should be slightly faster. |
+| `PYKX_MAX_ERROR_LENGTH` | `256` | size in characters | By default, PyKX reports IPC connection errors with a message buffer of size 256 characters. This allows the length of these error messages to be modified reducing the chance of excessive error messages polluting logs. |
+| `PYKX_NOQCE` | `False` | `1` or `true` | On Linux, PyKX comes with q Cloud Edition features from [Insights Core](https://code.kx.com/insights/core/). This variable allows a user to skip the loading of q Cloud Edition functionality, saving some time when importing PyKX but removing access to possibly supported additional functionality. |
+| `PYKX_Q_LIB_LOCATION` | `UNSET` | Path to a directory containing q libraries necessary for loading PyKX | See [here](../release-notes/changelog.md#pykx-131) for detailed information. This allows a user to centralise the q libraries, `q.k`, `read.q`, `libq.so` etc to a managed location within their environment which is decentralised from the Python installation. This is required for some enterprise use-cases. |
+| `PYKX_RELEASE_GIL` | `False` | `1` or `true` | When PYKX_RELEASE_GIL is enabled the Python Global Interpreter Lock will not be held when calling into q. |
+| `PYKX_Q_LOCK` | `False` | `1` or `true` | When PYKX_Q_LOCK is enabled a re-entrant lock is added around calls into q, this lock will stop multiple threads from calling into q at the same time. This allows embedded q to be thread safe even when using PYKX_RELEASE_GIL. |
+| `PYKX_DEBUG_INSIGHTS_LIBRARIES` | `False` | `1` or `true` | If the insights libraries failed to load this variable can be used to print out the full error output for debugging purposes. |
+| `PYKX_UNLICENSED` | `False` | `1` or `true` | Set PyKX to make use of the library in `unlicensed` mode at all times. |
+| `PYKX_LICENSED` | `False` | `1` or `true` | Set PyKX to make use of the library in `licensed` mode at all times. |
+| `PYKX_THREADING` | `False` | `1` or `true` | When importing PyKX start EmbeddedQ within a background thread. This allows calls into q from any thread to modify state, this environment variable is only supported for licensed users. |
+| `PYKX_NO_SIGNAL` | `False` | `1` or `true` | Skip overwriting of [signal](https://docs.python.org/3/library/signal.html) definitions by PyKX, these are presently overwritten by default to reset Pythonic default definitions with are reset by PyKX on initialisation in licensed modality. |
+| `PYKX_4_1_ENABLED` | `False` | `1` or `true` | Load version 4.1 of `libq` when starting `PyKX` in licensed mode, this environment variable does not work without a valid `q` license. |
+| `PYKX_JUPYTERQ` | `False` | `1` or `true` | When enabled, any Jupyter Notebook will start in q first mode by default when PyKX is imported. |
+| `PYKX_Q_EXECUTABLE` | `q` | string denoting path to q executable | This allows users to specify the location of the q executable which should be called when using making use of the `tick` module for defining streaming infrastructures |
+| `PYKX_SUPPRESS_WARNINGS` | `False` | `1` or `true` | This allows the user to suppress warnings that have been suggested as sensible to be raised by users for PyKX in situations where edge cases can result in unexpected behaviour. Warnings in scenarios where a decision has been made to not support behaviour explicitly rather than where user discretion is required are still maintained. |
+| `PYKX_CONFIGURATION_LOCATION` | `.` | The path to the folder containing the `.pykx-config` file. | This allows users to specify a location other than the `.` or a users `home` directory to store their configuration file outlined [here](#configuration-file) |
+| `PYKX_CONFIGURATION_PROFILE` | `default` | The "profile" defined in `.pykx-config` file to be used. | Users can specify which set of configuration variables are to be used by modifying the `PYKX_CONFIGURATION_PROFILE` variable see [here](#configuration-file) for more details. Note that this configuration can only be used as an environment variable. |
+
+
+To set the environment for q (embedded in PyKX, in licensed mode), use the variables below:
+
+
+| **Variable** | **Values** | **Description** |
+|--------------|--------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------|
+| `QARGS` | See link | Command-line flags to pass to q, see [here](https://code.kx.com/q/basics/cmdline/) for more information. |
+| `QHOME` | Path to the users q installation folder | See [here](https://code.kx.com/q/learn/install/#step-5-edit-your-profile) for more information. |
+| `QLIC` | Path to the folder where the q license should be found | See [here](https://code.kx.com/q/learn/install/#step-5-edit-your-profile) for more information. |
+| `QINIT` | Path to an additional `*.q` file loaded after `PyKX` has initialized | See [here](https://code.kx.com/q4m3/14_Introduction_to_Kdb%2B/#1481-the-environment-variables) for more information. |
+
+If no license is found, set the following variables either in configuration or as environment variables to define the `kc.lic` or `k4.lic` license used by PyKX:
+
+| **Variable** | **Description** |
|---------------------|------------------------------------------------------------------------------------------------|
| `KDB_LICENSE_B64` | This should contain the base-64 encoded contents of a valid `kc.lic` file with `pykx` enabled. |
| `KDB_K4LICENSE_B64` | This should contain the base-64 encoded contents of a valid `k4.lic` file with `pykx` enabled. |
-#### PyKX QARGS Supported Additions
+#### PyKX QARGS supported additions
-When using PyKX users can use the following values when defining `QARGS` to modify the behaviour of PyKX at initialisation when running within a Linux environment.
+When using PyKX, you can define `#!python QARGS` to modify its behavior during initialization in a Linux environment. Here are some of the values you can use for `#!python QARGS`:
-| Input | Description |
+| **Input** | **Description** |
|----------------|---------------------------------------------------------------------------------|
-| `--no-qce` | Ensure that no kdb Insights libraries are loaded at initialisation of PyKX. |
-| `--no-kurl` | Ensure that the kdb Insights `kurl` library is not loaded at initialisation. |
-| `--no-objstor` | Ensure that the kdb Insights `objstor` library is not loaded at initialisation. |
-| `--no-qlog` | Ensure that the kdb Insights `qlog` library is not loaded at initialisation. |
-| `--no-sql` | Ensure that the kdb Insights `sql` library is not loaded at initialisation. |
+| `--no-qce` | Ensure that no kdb Insights libraries are loaded at initialization of PyKX. |
+| `--no-kurl` | Ensure that the kdb Insights `kurl` library is not loaded at initialization. |
+| `--no-objstor` | Ensure that the kdb Insights `objstor` library is not loaded at initialization. |
+| `--no-qlog` | Ensure that the kdb Insights `qlog` library is not loaded at initialization. |
+| `--no-sql` | Ensure that the kdb Insights `sql` library is not loaded at initialization. |
### PyKX under q
-PyKX can be loaded and used from a q session (see [here](../pykx-under-q/intro.md) for more information). The following variables are specific to this mode of operation.
+You can load PyKX and [use it from a q session](../pykx-under-q/intro.md). The following variables are specific to this mode of operation:
-| Variable | Values | Description | Status |
-|---------------------------|-------------------------------|-------------|--------|
-| `PYKX_DEFAULT_CONVERSION` | `py`, `np`, `pd`, `pa` or `k` | Default conversion to apply when passing q objects to Python. Converting to Numpy (`np`) by default. | |
-| `PYKX_SKIP_UNDERQ` | `1` or `true` | When importing PyKX from Python, PyKX will also load `pykx.q` under its embedded q. This variable skips this step. | |
-| `PYKX_UNSET_GLOBALS` | `1` or `true` | By default "PyKX under q" will load some utility functions into the global namespace (eg. `print`). This variable prevents this. | |
-| `PYKX_EXECUTABLE` | File path | The path to use for the Python executable | |
-| `PYKX_PYTHON_LIB_PATH` | File path | The path to use for loading libpython. | |
-| `PYKX_PYTHON_BASE_PATH` | File path | The path to use for the base directory of your Python installation. | |
-| `PYKX_PYTHON_HOME_PATH` | File path | The path to use for the base Python home directory (used to find site packages). | |
-| `SKIP_UNDERQ` | `1` or `true` | When importing PyKX from Python, PyKX will also load `pykx.q` under its embedded q. This variable skips this step. | `DEPRECATED`, please use `PYKX_SKIP_UNDERQ` |
-| `UNSET_PYKX_GLOBALS` | `1` or `true` | By default "PyKX under q" will load some utility functions into the global namespace (eg. `print`). This variable prevents this. | `DEPRECATED`, please use `PYKX_UNSET_GLOBALS` |
+| **Variable** | **Values** | **Description** |
+|---------------------------|-------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `PYKX_DEFAULT_CONVERSION` | `py`, `np`, `pd`, `pa` or `k` | Default conversion to apply when passing q objects to Python. Converting to Numpy (`np`) by default. |
+| `PYKX_SKIP_UNDERQ` | `1` or `true` | When importing PyKX from Python, PyKX also loads `pykx.q` under its embedded q. This variable skips this step. |
+| `PYKX_EXECUTABLE` | File path | The path to use for the Python executable |
+| `PYKX_USE_FIND_LIBPYTHON` | `1` or `true` | Should the Python package [`find-libpython`](https://pypi.org/project/find-libpython/) be used to determine the location of `libpython.[so|dll]`, this manually could be done by setting the location `PYKX_PYTHON_LIB_PATH` |
+| `PYKX_PYTHON_LIB_PATH` | File path | The path to use for loading libpython. |
+| `PYKX_PYTHON_BASE_PATH` | File path | The path to use for the base directory of your Python installation. |
+| `PYKX_PYTHON_HOME_PATH` | File path | The path to use for the base Python home directory (used to find site packages). |
### q Cloud Edition features with Insights Core (Linux only)
-On Linux, the q Cloud Edition features, coming with Insights Core, can be used to read data from Cloud Storage (AWS S3, Google Cloud Storage, Azure Blob Storage). Credentials to access the Cloud Storage can be passed using specific environment variables. For more information, see the two following links:
+On Linux, the q Cloud Edition features coming with Insights Core can be used to read data from Cloud Storage (AWS S3, Google Cloud Storage, Azure Blob Storage). Credentials to access the Cloud Storage can be passed using specific environment variables. For more information, go to:
-- https://code.kx.com/insights/core/objstor/main.html#environment-variables
-- https://code.kx.com/insights/core/kurl/kurl.html#automatic-registration-using-credential-discovery
+- [kdb Insights SDK environment variables](https://code.kx.com/insights/core/objstor/main.html#environment-variables)
+- [kdb Insights SDK automatic registration using credential discovery](https://code.kx.com/insights/core/kurl/kurl.html#automatic-registration-using-credential-discovery)
diff --git a/docs/user-guide/fundamentals/conversion_considerations.md b/docs/user-guide/fundamentals/conversion_considerations.md
index 1fa75f4..5870c17 100644
--- a/docs/user-guide/fundamentals/conversion_considerations.md
+++ b/docs/user-guide/fundamentals/conversion_considerations.md
@@ -1,4 +1,14 @@
-# PyKX Conversion Considerations
+---
+title: Convert data types in PyKX
+description: Converting data types in PyKX
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, data, convert
+---
+
+# PyKX conversion considerations
+
+_This page provides details on data types and conversions in PyKX._
PyKX attempts to make conversions between q and Python as seamless as possible.
However due to differences in their underlying implementations there are cases where 1 to 1 mappings are not possible.
@@ -14,134 +24,12 @@ The key PyKX APIs around data types and conversions are outlined under:
## Text representation in PyKX
-[Text representation in PyKX](../fundamentals/text.md) requires consideration as there are some key differences between the `Symbol` and `Char` data types.
+Handling and converting [text in PyKX](./text.md) requires consideration as there are some key differences between the `Symbol` and `Char` data types.
## Nulls and Infinites
Most q datatypes have the concepts of null, negative infinity, and infinity. Python does not have the concept of infinites and it's null behaviour differs in implementation. The page [handling nulls and infinities](./nulls_and_infinities.md) details the needed considerations when dealing with these special values.
-## Temporal types
-
-### Timestamp/Datetime types
-
-Particular care is needed when converting temporal types as Python and q use different [epoch](https://en.wikipedia.org/wiki/Epoch_(computing)) values:
-
-* q 2000
-* Python 1970
-
-__Note:__ The following details focus on `NumPy` but similar considerations should be taken in to account when converting Python, Pandas, and PyArrow objects.
-
-The 30 year epoch offset means there are times which are unreachable in one or the other language:
-
-| | TimestampVector | datetime64[ns] |
-|---------------|---------------------------------|---------------------------------|
-| Minimum value | `1707.09.22D00:12:43.145224194` | `1677-09-21T00:12:43.145224194` |
-| Maximum value | `2292.04.10D23:47:16.854775806` | `2262-04-11T23:47:16.854775807` |
-
-As such the range of times which can be directly converted should be considered:
-
-* Minimum value: `1707-09-22T00:12:43.145224194`
-* Maximum value: `2262-04-11T23:47:16.854775807`
-
-As mentioned [above](#nulls-and-infinites) most q data types have null, negative infinity, and infinity values.
-
-| | q representation | datetime64[ns] |
-|-------------------|------------------|---------------------------------|
-| Null | `0Np` | `NaT` |
-| Negative Infinity | `-0Wp` | `1707-09-22T00:12:43.145224193` |
-| Infinity | `0Wp` | Overflow cannot be represented |
-
-Converting from q to NumPy using `.np()`, `0Np` and `-0Wp` convert to meaningful values but `0Wp` overflows:
-
-```q
->>> kx.q('0N -0W 0Wp').np()
-array(['NaT', '1707-09-22T00:12:43.145224193', '1707-09-22T00:12:43.145224191'], dtype='datetime64[ns]')
-```
-
-Converting to q using `toq` by default only the NumPy maximum values converts to a meaningful value:
-
-```q
->>> arr = np.array(['NaT', '1677-09-21T00:12:43.145224194', '2262-04-11T23:47:16.854775807'], dtype='datetime64[ns]')
->>> kx.toq(arr)
-pykx.TimestampVector(pykx.q('2262.04.11D23:47:16.854775808 2262.04.11D23:47:16.854775810 2262.04.11D23:47:16.854775807'))
-```
-
-To additionally handle `NaT` being converted the `handle_nulls` keyword can be used:
-
-```q
->>> arr = np.array(['NaT', '1677-09-21T00:12:43.145224194', '2262-04-11T23:47:16.854775807'], dtype='datetime64[ns]', handle_nulls=True)
->>> kx.toq(arr)
-pykx.TimestampVector(pykx.q('0N 2262.04.11D23:47:16.854775810 2262.04.11D23:47:16.854775807'))
-```
-
-Using `raw=True` we can request that the epoch offset is not applied. This allows for the underlying numeric values to be accessed directly:
-
-```python
->>> kx.q('0N -0W 0Wp').np(raw=True)
-array([-9223372036854775808, -9223372036854775807, 9223372036854775807])
-```
-
-Passing back to q with `toq` these are then presented as the long null, negative infinity, and infinity:
-
-```python
->>> kx.toq(kx.q('0N -0W 0Wp').np(raw=True))
-pykx.LongVector(pykx.q('0N -0W 0W'))
-```
-
-`ktype` can be passed during `toq` to specify desired types:
-
-```python
->>> kx.toq(pd.DataFrame(data= {'d':np.array(['2020-09-08T07:06:05'], dtype='datetime64[s]')}), ktype={'d':kx.DateVector})
-pykx.Table(pykx.q('
-d
-----------
-2020.09.08
-'))
-```
-
-Note that:
-
-* Dictionary based conversion is only supported when operating in [licensed mode](../../user-guide/advanced/modes.md).
-* Data is first converted to the default type and then cast to the desired type.
-
-Other items of note:
-
-* In NumPy further data types exist `datetime64[us]`, `datetime64[ms]`, `datetime64[s]` which due to their lower precision have a wider range of dates they can represent. When converted using to q using `toq` these all present as q `Timestamp` type and as such only dates within the range this data type can represent should be converted.
-* Pandas 2.* changes behavior and conversions should be reviewed as part of an upgrade of this package. [PyKX to Pythonic data type mapping](../../api/pykx-q-data/type_conversions.md) includes examples showing differences seen when calling `.pd()`.
-
-### Duration types
-
-Duration types do not have the issue of epoch offsets but some range limitations exist when converting between Python and PyKX.
-
-`kx.SecondVector` and `kx.MinuteVector` convert to `timedelta64[s]`:
-
-| | q representation | timedelta64[s] |
-|-------------------------------------|------------------|---------------------------|
-| `kx.SecondVector` Null | `0Nv` | `NaT` |
-| `kx.SecondVector` Negative Infinity | `-0Wv` | `-24856 days +20:45:53` |
-| `kx.SecondVector` Infinity | `0Wv` | `24855 days 03:14:07` |
-| `kx.MinuteVector` Null | `0Nu` | `NaT` |
-| `kx.MinuteVector` Negative Infinity | `-0Wu` | `-1491309 days +21:53:00` |
-| `kx.MinuteVector` Infinity | `0Wu` | `1491308 days 02:07:00` |
-
-When converting Python to q using `toq` care must be taken as `timedelta64[s]` is 64 bit and converts to `kx.SecondVector` which is 32 bit:
-
-| | SecondVector | timedelta64[s] |
-|---------------|--------------|-----------------------------------|
-| Minimum value | `**:14:06` | `106751991167300 days 15:30:07` |
-| Maximum value | `-**:14:06` | `-106751991167301 days +08:29:53` |
-
-As such the range of times which can be directly converted should be considered:
-
-* Minimum value: `-24856 days +20:45:54`
-* Maximum value: `24855 days 03:14:06`
-
-q does not display values of second type over `99:59:59`, beyond this `**` is displayed in the hour field.
-The data is still stored correctly and will display when converted:
+## Temporal data types
-```python
->>> kx.q('99:59:59 +1')
-pykx.SecondAtom(pykx.q('**:00:00'))
->>> kx.q('99:59:59 +1').pd()
-Timedelta('4 days 04:00:00')
-```
+Converting [temporal data types](./temporal.md) in PyKX involves handling [timestamp/datetime](./temporal.md#timestampdatetime-types) types and [duration](./temporal.md#duration-types) types, each with specific considerations due to differences in how Python and q (the language used by kdb+) represent these data types.
\ No newline at end of file
diff --git a/docs/user-guide/fundamentals/creating.md b/docs/user-guide/fundamentals/creating.md
index 8b6357c..bd16a94 100644
--- a/docs/user-guide/fundamentals/creating.md
+++ b/docs/user-guide/fundamentals/creating.md
@@ -1,25 +1,39 @@
-# Interacting with PyKX objects
+---
+title: Create and convert PyKX objects
+description: How to generate PyKX objects
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, PyKX objects,
+---
-In order to use the power of q and the functionality provided by PyKX a user must at some point interact with a PyKX object. At it's most basic level these items are allocated C representations of q/kdb+ objects within a memory space managed by q. Keeping the data in this format allows it to be used directly for query/analytic execution in q without any translation overhead.
+# Create and convert PyKX objects
-There are a number of ways to generate PyKX objects:
+_This page provides details on how to generate and convert PyKX objects._
-1. Explicitly converting from a Python object to a PyKX object
-2. By evaluating q code using `kx.q`
-3. By retrieving a named entity from q's memory
-4. Through query of an external q session
+!!! tip "Tip: For the best experience, we recommend reading [PyKX objects and attributes](..//../learn/objects.md) first."
-Getting the data to a PyKX format provides you with the ability to easily interact with these objects using q or the analytic functionality provided by PyKX, however, having data in this format is not suitable for all use-cases. For example, should a function require a Pandas DataFrame as input then a PyKX object must be converted to a Pandas DataFrame. This is supported using methods provided for the majority of PyKX objects, these are covered below.
+To use the power of q and the functionality provided by PyKX, at some point you must interact with PyKX objects. At their most basic level, objects are allocated C representations of q/kdb+ objects within a memory space managed by q. Keeping the data in this format allows it to be used directly for query/analytic execution in q without any translation overhead.
-## Generating PyKX objects
+## 1. Create PyKX objects
-### Explicitly converting from Pythonic objects to PyKX objects
+There are five ways to create PyKX objects:
-The most simplistic method of creating a PyKX object is to convert an analogous Pythonic type to a PyKX object. This is facilitated through the use of the functions `pykx.toq` which allows conversions from Python, Numpy, Pandas and PyArrow types to PyKX objects, open the tabs which are of interest to you to see some examples of these conversions
+- a. [Convert Python objects to PyKX objects](#1a-convert-python-objects-to-pykx-objects)
+- b. [Generate data using PyKX inbuilt functions](#1b-generate-data-using-pykx-inbuilt-functions)
+- c. [Evaluate q code using `#!python kx.q`](#1c-evaluate-q-code-using-python-kxq)
+- d. [Retrieve a named entity from q's memory](#1d-retrieve-a-named-entity-from-qs-memory)
+- e. [Query an external q session](#1e-query-an-external-q-session)
-??? Note "Specifying target types"
+### 1.a Convert Python objects to PyKX objects
- When converting Pythonic objects to PyKX types users can make use of the `ktype` named argument. Users converting lists/atomic elements should use [PyKX types](../../api/pykx-q-data/type_conversions.md), if converting Pandas DataFrames or PyArrow Tables users can make use of the `ktype` argument with a dictionary input mapping the column name to the [PyKX type](../../api/pykx-q-data/type_conversions.md).
+The simplest way to create a PyKX object is by converting a similar Python type into a PyKX object. You can do this with the `#!python pykx.toq function`, which supports conversions from Python, NumPy, pandas, and PyArrow types to PyKX objects. Open the tabs that interest you to see conversion examples:
+
+??? Note "Specify target types"
+
+ When converting Pythonic objects to PyKX types, you can use the `ktype` named argument:
+
+ - To convert lists/atomic elements, use [PyKX types](../../api/pykx-q-data/type_conversions.md);
+ - To convert Pandas DataFrames or PyArrow Tables, use the `#!python ktype` argument with a dictionary input mapping the column name to the [PyKX type](../../api/pykx-q-data/type_conversions.md).
=== "Python"
@@ -156,18 +170,31 @@ The most simplistic method of creating a PyKX object is to convert an analogous
'))
```
-### Generating data using PyKX inbuilt functions
+By default, when you convert Python strings to PyKX, they are returned as `#!python pykx.SymbolAtom` objects. This ensures a clear distinction between `#!python str` (string) and `#!python byte` objects. However, you might prefer Python strings to be returned as `#!python pykx.CharVector` objects, to achieve memory efficiency or greater flexibility in analytic development. To do this, use the keyword argument `#!python strings_as_char`, which ensures that all `#!python str` objects are converted to `#!python pykx.CharVector` objects.
+
+```python
+>>> import pykx as kx
+>>> kx.toq('str', strings_as_char=True)
+pykx.CharVector(pykx.q('"str"'))
+>>> kx.toq({'a': {'b': 'test'}, 'b': 'test1'}, strings_as_char=True)
+pykx.Dictionary(pykx.q('
+a| (,`b)!,"test"
+b| "test1"
+'))
+```
+
+### 1.b Generate data using PyKX inbuilt functions
-For users who wish to generate objects directly but who are not familiar with q and want to quickly prototype functionality a number of helper functions can be used.
+For users who want to generate objects directly but are not familiar with q, and wish to quickly prototype this functionality, several helper functions are available.
-Create a vector of random floating point precision values
+Create a vector of random floating point precision values:
```python
>>> kx.random.random(3, 10.0)
pykx.FloatVector(pykx.q('9.030751 7.750292 3.869818'))
```
-Additionally, users when generating random data can use PyKX null/infinite data to create data across larger data ranges as follows
+Additionally, when generating random data, you can use PyKX null/infinite data to create data across larger data ranges as follows:
```python
>>> kx.random.random(2, kx.GUIDAtom.null)
@@ -176,7 +203,7 @@ pykx.GUIDVector(pykx.q('8c6b8b64-6815-6084-0a3e-178401251b68 5ae7962d-49f2-404d-
pykx.IntVector(pykx.q('986388794 824432196 2022020141i'))
```
-Create a two-dimensional list of random symbol values
+Create a two-dimensional list of random symbol values:
```python
>>> kx.random.random([2, 3], ['a', 'b', 'c'])
@@ -186,7 +213,7 @@ b a b
'))
```
-Create a table of tabular data generated using random data
+Create a table of tabular data generated using random data:
```python
>>> N = 100000
@@ -206,7 +233,7 @@ AAPL 68.98055 94
'))
```
-Additionally for retrieval of current temporal information users can make calls to the `date`, `time` and `timestamp` type objects respectively as follows
+For retrieval of current temporal information, call the `#!python date`, `#!python time`, and `#!python timestamp` type objects as follows:
```python
>>> kx.DateAtom('today')
@@ -217,20 +244,20 @@ pykx.TimeAtom(pykx.q('16:22:12.178'))
pykx.TimestampAtom(pykx.q('2024.01.05T16:22:21.012631000'))
```
-### Evaluating q code using `kx.q`
+### 1.c Evaluate q code using `#!python kx.q`
-For users more familiar with q it is possible to evaluate q code to generate PyKX objects, this can be done as follows
+If you're more familiar with q, generate PyKX objects by evaluating q code:
```python
>>> kx.q('til 10')
pykx.LongVector(pykx.q('0 1 2 3 4 5 6 7 8 9'))
```
-More information on the usage of `kx.q` can be found by following the documentation guide [here](evaluating.md)
+Documentation guide on [how to use `kx.q`](evaluating.md).
-### By retrieving a named entity from q's memory
+### 1.d Retrieve a named entity from q's memory
-As noted at the start of this guide PyKX objects exist in a memory space accessed and controlled by interactions with q, as such items which are created in q may not be immediately available as Python objects. For example if a named variable in q has been created as a side effect of a function call or explicitly created by a user it can be retrieved based on this name as follows.
+As PyKX objects exist in a memory space accessed and controlled by interactions with q, the items created in q may not be immediately available as Python objects. For example, if you created a named variable in q as a side effect of a function call or just explicitly created it, you can retrieve it by its name:
```python
>>> kx.q('t:([]5?1f;5?1f)') # Generate a named variable in a single object
@@ -251,15 +278,16 @@ x x1
pykx.FloatVector(pykx.q('0.3927524 0.5170911 0.5159796 0.4066642 0.1780839'))
```
-### Through query of an external q session
+### 1.e Query an external q session
-PyKX provides an IPC interface allowing users to query and retrieve data from a q server. Assuming that a user has a q server with no username/password exposed on port 5000 it is possible to run synchronous and asynchronous events against this server as follows:
+PyKX provides an IPC interface allowing users to query and retrieve data from a q server. If you have a q server with no username/password exposed on `#!python port 5000`, it's possible to run synchronous and asynchronous events against this server:
```python
>>> conn = kx.QConnection('localhost', 5000) # Open a connection to the q server
->>> conn('til 10') # Execute a command server side
+>>> conn('til 10') # Execute a command server side
pykx.LongVector(pykx.q('0 1 2 3 4 5 6 7 8 9'))
->>> conn.qsql.select('tab', where = 'x=`a') # Query using qsql statement
+>>> conn['tab'] = kx.q('([]100?`a`b;100?1f;100?1f)') # Generate a table on the server
+>>> conn.qsql.select('tab', where = 'x=`a') # Query using qsql statement
pykx.Table(pykx.q('
x x1 x2
-----------------------
@@ -287,78 +315,80 @@ a 0.02810674 0.481821
'))
```
-## Converting PyKX objects to Pythonic Types
+## 2. Convert PyKX objects to Pythonic types
-As mentioned above PyKX objects can be created and interacted with using q functionality, once the data is in a position to be used by Python it may be more appropriate to convert it to a Python, Numpy, Pandas or PyArrow representation. This is facilitated through the use of the following methods:
+Converting data to a PyKX format allows for easy interaction with these objects using q or the analytic functionality provided by PyKX. However, this format may not be suitable for all use cases. For instance, if a function requires a Pandas DataFrame as input, a PyKX object must be converted to a Pandas DataFrame.
-| Method | Description |
+Once the data is ready for use in Python, it may be more appropriate to convert it into a representation using Python, NumPy, Pandas, or PyArrow by using the following methods:
+
+| **Method** | **Description** |
|----------|----------------------------------|
| `*.py()` | Convert a PyKX object to Python |
| `*.np()` | Convert a PyKX object to Numpy |
| `*.pd()` | Convert a PyKX object to Pandas |
| `*.pa()` | Convert a PyKX object to PyArrow |
+
+??? example "Example"
-The following provides some examples of this functionality in use:
-
-```python
-import pykx as kx
-qarr = kx.q('til 5')
->>> qarr.py()
-[0, 1, 2, 3, 4]
->>> qarr.np()
-array([0, 1, 2, 3, 4])
->>> qarr.pd()
-0 0
-1 1
-2 2
-3 3
-4 4
-dtype: int64
->>> qarr.pa()
-
-[
- 0,
- 1,
- 2,
- 3,
- 4
-]
->>>
->>> qtab = kx.Table(data={
-... 'x': kx.random.random(5, 1.0),
-... 'x1': kx.random.random(5, 1.0),
-... })
->>> qtab
-pykx.Table(pykx.q('
-x x1
--------------------
-0.439081 0.4707883
-0.5759051 0.6346716
-0.5919004 0.9672398
-0.8481567 0.2306385
-0.389056 0.949975
-'))
->>> qtab.np()
-rec.array([(0.43908099, 0.47078825), (0.57590514, 0.63467162),
- (0.59190043, 0.96723983), (0.84815665, 0.23063848),
- (0.38905602, 0.94997503)],
- dtype=[('x', '>> qtab.pd()
- x x1
-0 0.439081 0.470788
-1 0.575905 0.634672
-2 0.591900 0.967240
-3 0.848157 0.230638
-4 0.389056 0.949975
->>> qtab.pa()
-pyarrow.Table
-x: double
-x1: double
-```
+ ```python
+ import pykx as kx
+ qarr = kx.q('til 5')
+ >>> qarr.py()
+ [0, 1, 2, 3, 4]
+ >>> qarr.np()
+ array([0, 1, 2, 3, 4])
+ >>> qarr.pd()
+ 0 0
+ 1 1
+ 2 2
+ 3 3
+ 4 4
+ dtype: int64
+ >>> qarr.pa()
+
+ [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4
+ ]
+ >>>
+ >>> qtab = kx.Table(data={
+ ... 'x': kx.random.random(5, 1.0),
+ ... 'x1': kx.random.random(5, 1.0),
+ ... })
+ >>> qtab
+ pykx.Table(pykx.q('
+ x x1
+ -------------------
+ 0.439081 0.4707883
+ 0.5759051 0.6346716
+ 0.5919004 0.9672398
+ 0.8481567 0.2306385
+ 0.389056 0.949975
+ '))
+ >>> qtab.np()
+ rec.array([(0.43908099, 0.47078825), (0.57590514, 0.63467162),
+ (0.59190043, 0.96723983), (0.84815665, 0.23063848),
+ (0.38905602, 0.94997503)],
+ dtype=[('x', '>> qtab.pd()
+ x x1
+ 0 0.439081 0.470788
+ 1 0.575905 0.634672
+ 2 0.591900 0.967240
+ 3 0.848157 0.230638
+ 4 0.389056 0.949975
+ >>> qtab.pa()
+ pyarrow.Table
+ x: double
+ x1: double
+ ```
-!!! warning "Precision Loss Considerations"
+!!! warning "Precision loss considerations"
- Care should be taken in particular when converting q temporal data to Python native data types. As Python temporal data types only support microsecond precision roundtrip conversions will reduce temporal granularity for q data.
+ Special care is needed when converting q temporal data to Python native data types. Since Python temporal data types only support microsecond precision, roundtrip conversions reduce the temporal granularity of q data.
```python
>>> import pykx as kx
@@ -369,4 +399,4 @@ x1: double
pykx.TimestampAtom(pykx.q('2024.01.05D03:16:23.736627000'))
```
- See [here](../fundamentals/conversion_considerations.md#temporal-types) for further details.
+ See our [Conversion considerations for temporal types](../fundamentals/conversion_considerations.md#temporal-types) section for further details.
diff --git a/docs/user-guide/fundamentals/evaluating.md b/docs/user-guide/fundamentals/evaluating.md
index ce31349..7d43d02 100644
--- a/docs/user-guide/fundamentals/evaluating.md
+++ b/docs/user-guide/fundamentals/evaluating.md
@@ -1,36 +1,40 @@
-# Evaluating q code with PyKX
+---
+title: Use PyKX objects
+description: How to use PyKX objects and evaluate q code with PyKX
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, PyKX objects,
+---
-There are a number of ways to manipulate PyKX objects and evaluate q code in PyKX, for example:
+# Use PyKX objects and evaluate q code with PyKX
-1. By calling `pykx.q` directly, e.g. `pykx.q('10 {x,sum -2#x}/ 0 1')`
-2. By dropping into the [interactive console][pykx.QConsole]
-3. By making use of keyword functions provided by `q`, e.g. `pykx.q.til(10)`
-4. Over [IPC][pykx.QConnection]
+_This page provides details on how to use PyKX objects and how to evaluate q code with PyKX._
-The first three methods evaluate the code locally within the Python process, and are not available without a q license. The final method evaluates the code in a separate q process, and can be used with or without a q license provided the server to which your PyKX instance is connected is appropriately licensed.
+!!! tip "Tip: For the best experience, we recommend reading [PyKX objects and attributes](..//../learn/objects.md) and [Create and convert PyKX objects](creating.md) first."
-!!! Warning
-
- Functions pulled in over IPC are executed locally in PyKX, see the [IPC documentation](../../api/ipc.md)
- for more information on how to ensure `q` code is executed on the server and not locally.
+There are four ways to manipulate PyKX objects and evaluate q code in PyKX:
-## PyKX Objects
+- a. By calling `#!python pykx.q` directly, for example, `#!python pykx.q('10 {x,sum -2#x}/ 0 1')`
+- b. By dropping into the [interactive console][pykx.QConsole]
+- c. By using `#!python q` keyword functions, for example, `#!python pykx.q.til(10)`
+- d. Over [IPC][pykx.QConnection]
-Calling a q instance or a connection to a q instance will return what is commonly referred to as a *PyKX object*. A PyKX object is an instance of the [`pykx.K`][pykx.K] class, or one of its subclasses. These classes are documented on the [PyKX wrappers API doc](../../api/pykx-q-data/wrappers.md) page.
+The first three methods evaluate the code locally within the Python process and require a q license. The final method evaluates the code in a separate q process and can be used with or without a q license, provided the server your PyKX instance is connected to is appropriately licensed.
-PyKX objects are wrappers around objects in q's memory space within the Python process that PyKX (and your program that uses PyKX) runs in. These wrappers are cheap to make as they do not require copying any data out of q's memory space.
+!!! Warning
-These PyKX objects support a variety of Python features (e.g. iteration, slicing, calling, etc.), and so oftentimes converting them to other types (e.g. a [`pykx.Vector`][pykx.Vector] to a `numpy.ndarray`) is unnecessary.
+ Functions pulled in over IPC are executed locally in PyKX. Go to the [IPC documentation](../../api/ipc.md)
+ for more information on how to ensure the `q` code is executed on the server and not locally.
-## Calling q using `pykx.q`
+## a. Call q using `#!python pykx.q`
-For users familiar with writing kdb+/q code use of the method `pykx.q` (or more commonly in this documentation `kx.q`) allows the evaluation of q code to take place providing the return of the function as a `PyKX` object. This method is variadic in nature and its usage comes in two forms:
+For users familiar with kdb+/q code, the `#!python pykx.q` (or `#!python kx.q`) method allows the evaluation of q code to take place providing the return of the function as a `#!python PyKX` object. This method is variadic, meaning it can accept a variable number of arguments. You can use in two different ways:”
1. Direct evaluation of single lines of code
-2. The application of functions taking multiple arguments
+2. Application of functions that take multiple arguments
-### Direct evaluation of single lines of code
+### a.1 Direct evaluation of single lines of code
```python
>>> import pykx as kx
@@ -42,9 +46,9 @@ pykx.Identity(pykx.q('::'))
pykx.FloatVector(pykx.q('0.06165008 0.285799 0.6684724 0.9133033 0.1485357'))
```
-### Application of functions taking multiple arguments
+### a.2 Application of functions taking multiple arguments
-As noted above the `pykx.q` functionality is variadic in nature, in the case that the first argument is a function the N following arguments will be treated as arguments to that function. Of particular note is that these arguments can be Python or PyKX objects, all objects passed to a q function will be converted to a PyKX object using the method `pykx.toq` for example:
+If the first argument of `#!python pykx.q` is a function, the `#!python N` following arguments are treated as arguments to that function. Arguments can be Python or PyKX objects. All objects passed to a q function are converted to a PyKX object using the method `#!python pykx.toq`. For example:
```python
>>> import pykx as kx
@@ -70,7 +74,7 @@ x x1
The application of arguments to functions within PyKX is limited to a maximum of 8 arguments. This limitation is imposed by the evaluation of q code.
-Users wishing to debug failed evaluation of q code can do so either through usage of a `debug` keyword or by globally setting the environment variable `PYKX_QDEBUG`.
+Users wishing to debug failed evaluation of q code can do so, either by globally setting the environment variable `#!python PYKX_QDEBUG` or through a `#!python debug` keyword:
=== "Global Setting"
@@ -118,9 +122,9 @@ Users wishing to debug failed evaluation of q code can do so either through usag
pykx.exceptions.QError: type
```
-## Using the q console within PyKX
+## b. Use the q console within PyKX
-For users more comfortable prototyping q code within a q terminal it is possible within a Python terminal to run an emulation of a q session directly in Python through use of the `kx.q.console` method.
+For users more comfortable prototyping q code within a q terminal, it's possible within a Python terminal to run an emulation of a q session directly in Python through the `#!python kx.q.console` method:
```python
>>> import pykx as kx
@@ -133,9 +137,9 @@ q)\\
!!! Note
- This is not a fully featured q terminal, it has the same core [limitations](../advanced/limitations.md) that PyKX has when it comes to the running of timers and subscriptions.
+ This is not a fully-featured q terminal. It shares the same core [limitations](../../help/issues.md) as PyKX, particularly regarding the running of timers and subscriptions.
-## Using q keywords
+## c. Use q keywords
Consider the following q function that checks if a given number is prime:
@@ -143,7 +147,7 @@ Consider the following q function that checks if a given number is prime:
{$[x in 2 3;1;x<2;0;{min x mod 2_til 1+floor sqrt x}x]}
```
-We can evaluate it through `q` to obtain a [`pykx.Lambda`](../../api/pykx-q-data/wrappers.md) object. This object can then be called as a Python function:
+You can evaluate it through `#!python q` to obtain a [`#!python pykx.Lambda`](../../api/pykx-q-data/wrappers.md) object. You can then call this object as a Python function:
```python
import pykx as kx
@@ -154,16 +158,16 @@ assert is_prime(127)
assert not is_prime(128)
```
-Arguments to the function are converted to [`pykx.K`][pykx.K] objects via the [`pykx.toq`][pykx.toq] module, and so the arguments can be anything supported by that module, i.e. any Python type `X` for which a `pykx.toq.from_X` function exists (barring some caveats - see the [`pykx.toq`][pykx.toq] documentation).
+To convert arguments to the function to [`#!python pykx.K`][pykx.K] objects, use the [`#!python pykx.toq`][pykx.toq] module. The arguments can be anything supported by that module, for example, any Python type `#!python X` for which a `#!python ykx.toq.from_X` function exists (barring some caveats mentioned in the [`#!python pykx.toq`][pykx.toq] documentation).
-For instance, we can apply the `each` adverb to `is_prime` and then provide it a range of numbers to check like so:
+For instance, you can apply the `#!python each` adverb to `#!python is_prime` and then provide it a range of numbers to check:
```python
>>> is_prime.each(range(10))
pykx.LongVector(q('0 0 1 1 0 1 0 1 0 0'))
```
-Then we could pass that into [`pykx.q.where`](../../api/pykx-execution/q.md#where)
+Then you could pass that into [`pykx.q.where`](../../api/pykx-execution/q.md#where):
```python
>>> kx.q.where(is_prime.each(range(10)))
diff --git a/docs/user-guide/fundamentals/indexing.md b/docs/user-guide/fundamentals/indexing.md
index e157b00..f569c5b 100644
--- a/docs/user-guide/fundamentals/indexing.md
+++ b/docs/user-guide/fundamentals/indexing.md
@@ -1,25 +1,41 @@
-# Indexing PyKX Objects
+---
+title: IndexPyKX objects
+description: How to index PyKX object
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, PyKX objects, index
+---
-## An introduction to indexing within PyKX
+# Index PyKX Objects
-Indexing in q works differently than you may be used to, and that behavior largely carries over into PyKX for indexing K objects. For more information about how indexing in q works (and by extension, how indexing K objects in PyKX work), refer to the following sections of the q tutorial book [Q For Mortals](https://code.kx.com/q4m3/):
+_This page provides details on how indexing works within PyKX._
-- [Indexing](https://code.kx.com/q4m3/3_Lists/#34-indexing)
-- [Iterated Indexing and Indexing at Depth](https://code.kx.com/q4m3/3_Lists/#38-iterated-indexing-and-indexing-at-depth)
-- [Indexing with Lists](https://code.kx.com/q4m3/3_Lists/#39-indexing-with-lists)
-- [Elided Indices](https://code.kx.com/q4m3/3_Lists/#310-elided-indices)
+Indexing in q works differently than you may be used to, and that behavior largely carries over into PyKX for indexing K objects.
-Indexes used on K objects in PyKX are converted to equivalent K objects in q using the [toq module](../../api/pykx-q-data/toq.md), just like any other Python to q conversion. To guarantee that the index used against a K object is what you intend it to be, you may perform the conversion of the index yourself before applying it. When K objects are used as the index for another K object, the index object is applied to the [`pykx.Collection`][pykx.Collection] object as they would be in q; i.e. as described in Q For Mortals.
+!!! info "Resources"
+
+ For more information about how indexing in q works (and by extension, how indexing K objects in PyKX work), refer to the following sections of the q tutorial book [Q For Mortals](https://code.kx.com/q4m3/):
-The following provides some examples of applying indexing to various q objects:
+ - [Indexing](https://code.kx.com/q4m3/3_Lists/#34-indexing)
+ - [Iterated indexing and indexing at depth](https://code.kx.com/q4m3/3_Lists/#38-iterated-indexing-and-indexing-at-depth)
+ - [Indexing with lists](https://code.kx.com/q4m3/3_Lists/#39-indexing-with-lists)
+ - [Elided indices](https://code.kx.com/q4m3/3_Lists/#310-elided-indices)
-## Basic Vectors Indexing
+Indexes used on K objects in PyKX are converted to equivalent K objects in q using the [toq module](../../api/pykx-q-data/toq.md), just like any other Python to q conversion. To guarantee that the index used against a K object is what you intend it to be, you may perform the conversion of the index yourself before applying it. When K objects are used as the index for another K object, the index object is applied to the [`#!python pykx.Collection`][pykx.Collection] object as it would be in q, for example as described in Q For Mortals.
-Indexing in PyKX spans elements `0` to element `N-1` where `N` is the length of the object being indexed.
+Examples of applying indexing to various q objects include:
-### Single element indexing
+- a. Basic vectors indexing: Single element indexing and Slicing
+- b. Assigning and adding values to vectors/lists
+- c. Indexing non-vector objects
-Single element indexing works similarly to any other standard Python sequence. Similar to Numpy PyKX supports negative indices to allow retrieval of indexes at the end of an array. For example:
+## a. Basic Vectors Indexing
+
+Indexing in PyKX spans elements `#!python 0` to element `#!python N-1` where `#!python N` is the length of the object being indexed.
+
+### a.1 Single element indexing
+
+Single element indexing works like any other standard Python sequence. Similar to Numpy, PyKX supports negative indices to allow retrieval of indexes at the end of an array. For example:
```python
>>> x = kx.q.til(10)
@@ -34,7 +50,7 @@ pykx.CharAtom(pykx.q('"a"'))
pykx.CharAtom(pykx.q('"f"'))
```
-Similar to Numpy indexing an array out of bounds will result in an `IndexError` being raised.
+Similar to Numpy, indexing an array out of bounds results in an `#!python IndexError` being raised.
```python
>>> x = kx.q.til(5)
@@ -48,7 +64,7 @@ Traceback (most recent call last):
IndexError: index out of range
```
-N Dimensional list vectors can also be manipulated using single element indexing as follows
+N-dimensional list vectors can also be manipulated using single element indexing as follows:
```python
>>> x = kx.random.random([4, 4], 1.0)
@@ -63,9 +79,9 @@ pykx.List(pykx.q('
pykx.FloatAtom(pykx.q('0.6919531'))
```
-### Slicing
+### a.2 Slicing
-Slicing vectors in PyKX is more simplistic than the functionality provided by Numpy. Vectors of N dimensions are indexed using `obj[start:stop:step]` semantics. This slice syntax operates where `start` is the starting index, `stop` is the stopping index and `step` is the number of steps between the elements where `step` is non zero
+Slicing vectors in PyKX is simpler than the functionality provided by Numpy. You can index vectors of N dimensions by using `#!python obj[start:stop:step]` semantics. This slice syntax operates where `#!python start` is the starting index, `#!python stop` is the stopping index and `#!python step` is the number of steps between the elements where `#!python step` is non zero:
```python
>>> x = kx.q.til(10)
@@ -92,7 +108,7 @@ pykx.CharVector(pykx.q('"defg"'))
pykx.CharVector(pykx.q('"ace"'))
```
-Negative slicing works in a similar way and can be used for `list`, `vector` and `table` objects too.
+Negative slicing works in a similar way. You can use it for `#!python list`, `#!python vector` and `#!python table` objects, too.
```python
>>> list = kx.q('("a"; 2; 3.3; `four)')
@@ -121,7 +137,7 @@ a b c
'))
```
-## Assigning and Adding Values to Vectors/Lists
+## b. Assigning and adding values to vectors/lists
Vector assignment in PyKX operates similarly to that provided by Numpy and operations supported on basic Python lists. As with the previous sections this functionality supports both individual element assignment and slice assignment as follows:
@@ -161,7 +177,7 @@ pykx.LongVector(pykx.q('0 0 0 0 0 5 6 7 8 10'))
'))
```
-In addition to positional assignment users can make use of the `append` and `extend` methods for `pykx.*Vector` and `pykx.List` objects. When appending objects to a list this can be achieved for single item assignments, while extend will look to add multiple elements to a Vector or List object. The following tabbed section shows the use of append and extend operations including failing cases.
+In addition to positional assignment, you can use the `#!python append` and `#!python extend` methods for `#!python pykx.*Vector` and `#!python pykx.List` objects. When adding objects to a list, use `#!python append` for single item assignments. In contrast, use `#!python extend` to add multiple elements to a `#!python list` or `#!python vector` object. The following tabbed section demonstrates the use of `#!python append` and `#!python extend` operations, including examples of failing cases.
=== "pykx.*Vector"
@@ -222,9 +238,9 @@ In addition to positional assignment users can make use of the `append` and `ext
'))
```
-## Indexing Non Vector Objects
+## c. Indexing non-vector objects
-In addition to being able to index and slice PyKX vector and list objects it is also possible to apply index and slicing semantics on PyKX Table objects. Application of slice/index semantics on tabular objects will return table like objects
+In addition to being able to index and slice PyKX `#!python vector` and `#!python list` objects, it's also possible to apply index and slicing semantics on PyKX Table objects. Application of slice/index semantics on tabular objects returns table-like objects:
```python
>>> import pandas as pd
diff --git a/docs/user-guide/fundamentals/nulls_and_infinities.md b/docs/user-guide/fundamentals/nulls_and_infinities.md
index eb5a988..750a7c6 100644
--- a/docs/user-guide/fundamentals/nulls_and_infinities.md
+++ b/docs/user-guide/fundamentals/nulls_and_infinities.md
@@ -1,22 +1,35 @@
-# Handling nulls and infinities
+---
+title: Convert nulls and infinities
+description: How to handle nulls and infinities in PyKX
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, nulls, infinity
+---
-PyKX and its management of nulls and infinities inherited from q operate differently in subtle ways from familiar libraries such as Numpy.
+# Convert nulls and infinities
-PyKX provides typed null and infinity values for most types. [As shown in the q docs](https://code.kx.com/q/ref/#datatypes), nulls can be expressed as `0N` followed by a type character (or no type character for long integer null), while infinities can be expressed as `0W` followed by a type character (or no type character for a long integer infinity).
+_This page explains how to handle nulls and infinities in PyKX._
-Datatypes in q designate a particular value in their numeric range as null, and another two as positive and negative infinity. Most other languages, such as Python, have no way to represent infinity for anything other than IEEE floating point numbers, and where typed nulls exist, they will not also be a value in the range of the datatype (save for floats, which can be `NaN`).
+PyKX handles nulls and infinities in ways that are subtly different from familiar libraries like NumPy, due to its q legacy.
-For example, the q null short integer `0Nh` is stored as the value `-32768` (i.e. the smallest possible signed 16 bit integer), and the q infinite short integer is stored as the value `32767` (i.e the largest possible signed 16 bit integer).
+PyKX provides typed null and infinity values for most types. [As shown in the q docs](https://code.kx.com/q/ref/#datatypes):
+
+- nulls can be expressed as `#!python 0N` followed by a type character (or no type character for long integer null).
+- infinities can be expressed as `#!python 0W` followed by a type character (or no type character for a long integer infinity).
+
+Datatypes in q designate a particular value in their numeric range as null, and another two as positive and negative infinity. Most other languages, such as Python, have no way to represent infinity for anything other than IEEE floating point numbers, and where typed nulls exist, they will not also be a value in the range of the datatype (save for floats, which can be `#!python NaN`).
+
+!!! example "For example, the q null short integer `#!python 0Nh` is stored as the value `#!python -32768` (i.e. the smallest possible signed 16 bit integer), and the q infinite short integer is stored as the value `#!python 32767` (i.e the largest possible signed 16 bit integer)."
Due to the design of nulls and infinites in q, there are some technical considerations - detailed on this page - regarding converting nulls and infinities between Python and q in either direction.
## Generation of null and infinite values
-To facilitate the generation of null and infinite values there are a number of properties for `pykx.Atom` objects which allow this to be completed Pythonically. In all cases this requires access to [licensed mode](../advanced/modes.md). The following examples show the generation of various null and infinite values.
+Here are some examples demonstrating how to create various null and infinite values.
### Null generation
-Where possible null values can be returned to you as follows:
+Where possible, null values return the following:
```python
>>> import pykx as kx
@@ -30,7 +43,7 @@ pykx.GUIDAtom(pykx.q('00000000-0000-0000-0000-000000000000'))
pykx.SymbolAtom(pykx.q('`'))
```
-Unsupported values will return a `NotImplemetedError` as follows:
+Unsupported values return a `#!python NotImplemetedError` as below:
```python
>>> import pykx as kx
@@ -44,21 +57,21 @@ NotImplementedError: Retrieval of null values not supported for this type
### Infinite generation
-Where possible positive and negative infinite values can be returned to you as follows:
+Where possible, positive and negative infinite values return the following:
```python
>>> import pykx as kx
>>> kx.TimeAtom.inf
pykx.TimeAtom(pykx.q('0Wt'))
->>> -kx.TimeAtom.inf
+>>> kx.TimeAtom.inf_neg
pykx.TimeAtom(pykx.q('-0Wt'))
>>> kx.IntAtom.inf
pykx.IntAtom(pykx.q('0Wi'))
->>> -kx.IntAtom.inf
+>>> kx.IntAtom.inf_neg
pykx.IntAtom(pykx.q('-0Wi'))
```
-Unsupported values will return a `NotImplementedError` as follows:
+Unsupported values return a `#!python NotImplementedError`:
```python
>>> kx.SymbolAtom.inf
@@ -71,7 +84,7 @@ NotImplementedError: Retrieval of infinite values not supported for this type
## Checking for nulls and infinities
-[The q function named null](https://code.kx.com/q/ref/null/) can be applied to most PyKX objects, and will return if the object is null by returning `1b`, or if it contains nulls by returning a collection of booleans whose shape matches the object. Like with any function from the `.q` namespace, it can be accessed via the [context interface](../../api/pykx-execution/ctx.md): [`q.null`](../../api/pykx-execution/q.md#null)).
+If you apply [the q function named null](https://code.kx.com/q/ref/null/) to most PyKX objects, it returns `#!python 1b` if the object is null. If it contains nulls, returns a collection of booleans whose shape matches the object. Like with any function from the `#!python .q` namespace, you can access it via the [context interface](../../api/pykx-execution/ctx.md): [`#!python q.null`](../../api/pykx-execution/q.md#null).
```python
>>> import pykx as kx
@@ -81,24 +94,33 @@ pykx.BooleanAtom(pykx.q('1b'))
pykx.BooleanVector(pykx.q('0010b'))
```
-[`pykx.Atom`][pykx.Atom] objects provide the properties `is_null` and `is_inf`. These are `True` if the atom is a null value for its type, or an infinite value (positive or negative) for its type, respectively, and `False` otherwise. `is_inf` is always `False` for types which do not have an infinite value in q, such as symbols.
+[`#!python pykx.Atom`][pykx.Atom] objects provide the properties:
+
+- `#!python is_null`: `#!python True` if the atom is a null value for it's type else `#!python False`.
+- `#!python is_inf`: `#!python True` if the atom is an infinite value (positive or negative) for its type else `#!python False`.
+- `#!python is_pos_inf`: `#!python True` if the atom is a positive infinite value for its type else `#!python False`.
+- `#!python is_neg_inf`: `#!python True` if the atom is a negative infinite value for its type else `#!python False`.
+
+`#!python is_inf`/`#!python is_pos_inf`/`#!python is_neg_inf` are always `#!python False` for types which do not have an infinite value in q, such as [`#!python pykx.SymbolAtom`][pykx.SymbolAtom].
```python
>>> kx.q('0w').is_inf
True
->>> kx.q('0W').is_inf
-True
>>> kx.q('1f').is_inf
False
->>> kx.q('0n').is_null
+>>> kx.q('-0Wf').is_inf
True
->>> kx.q('0N').is_null
+>>> kx.q('-0Wf').is_pos_inf
+False
+>>> kx.q('-0Wf').is_neg_inf
True
>>> kx.q('1f').is_null
False
+>>> kx.q('0n').is_null
+True
```
-Likewise, all [`pykx.Collection`][pykx.Collection] objects provide the properties `has_nulls` and `has_infs`. They are `True` if the collection has any nulls/infinities in it.
+Likewise, all [`#!python pykx.Collection`][pykx.Collection] objects provide the properties `#!python has_nulls` and `#!python has_infs`. They are `#!python True` if the collection has any nulls/infinities in it.
```python
>>> kx.q('0w,9?1f').has_infs
@@ -107,24 +129,124 @@ True
False
```
-Some null values are unintuitive. For instance, the null value for a character in q is the space `" "`, the null value for a symbol is the empty symbol, and the null value for a GUID is `00000000-0000-0000-0000-000000000000`. A char vector (i.e. q string) that has any spaces in it will have `has_nulls` set to `True`.
+Some null values are unintuitive. For instance, the null value for a character in q is the space `#!python " "`, the null value for a symbol is the empty symbol, and the null value for a GUID is `#!python 00000000-0000-0000-0000-000000000000`. A char vector (i.e. q string) that has any spaces has `#!python has_nulls` set to `#!python True`.
+See also the page with specifics on [temporal](./temporal.md) conversions.
## q to Python
-Vectors with the q types `short`, `int`, and `long` can be converted to Python in the following ways:
-
-- `.py` provides a list with the null values left as `pykx.K` objects - thin wrappers around the objects in q's memory.
-- `.np` provides a [masked array](https://numpy.org/doc/stable/reference/maskedarray.html) with the null values masked out, and the fill value set to the underlying value of the q null.
-- `.pd` provides a [Series](https://pandas.pydata.org/docs/reference/api/pandas.Series.html) as per usual, but backed by an [IntegerArray](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.arrays.IntegerArray.html) instead of a regular `np.ndarray`.
-- `.pa` provides a [PyArrow Array](https://arrow.apache.org/docs/python/generated/pyarrow.Array.html#pyarrow.Array) as per usual, which natively supports nullable integral vector data, so it simply has the indexes of the nulls stored in the array metadata.
-
-Real vectors use the standard `NaN` and `inf` values, and so are handled by q, Python, Numpy, Pandas, and PyArrow in the same way with no special handling.
-
-Temporal vectors use `NaT` to represent null values in Numpy and Pandas, left as `pykx.K` objects in pure Python, and PyArrow represents null temporal values like it does for any other data type: by masking it out using the array metadata.
-
-When converting a table from q to Python with one of the methods above, each column will be transformed as an independent vector as described above.
-
-The following provides an example of the masked array behavior outlined in the `.np` method described above which is additionally exhibited by the `.pd` method.
+### Null conversions
+
+!!! note "Note"
+
+ PyKX null conversion behaviour changed in version 3.0.0. The below table outlines the before and after conversions.
+
+ === ".py()"
+
+ | datatype | q value | 2.* atom conversion | 2.* vector conversion | 3.* atom conversion | 3.* vector conversion |
+ |-----------|---------|---------------------|-----------------------|---------------------|-----------------------|
+ | guid | `0Ng` | `UUID(int=0)` | `UUID(int=0)` | | |
+ | short | `0Nh` | `q('0Nh')` | `q('0Nh')` | `pd.NA` | `pd.NA` |
+ | int | `0Ni` | `q('0Ni')` | `q('0Ni')` | `pd.NA` | `pd.NA` |
+ | long | `0Nj` | `q('0N')` | `q('0N')` | `pd.NA` | `pd.NA` |
+ | real | `0Ne` | `float('nan')` | `float('nan')` | | |
+ | float | `0n` | `float('nan')` | `float('nan')` | | |
+ | character | `" "` | `b' '` | `b' '` | | |
+ | symbol | `` ` `` | `''` | `''` | | |
+ | timestamp | `0Np` | `None` | `q('0Np')` | `pd.NaT` | `pd.NaT` |
+ | month | `0Nm` | `None` | `q('0Nm')` | `pd.NaT` | `pd.NaT` |
+ | date | `0Nd` | `None` | `q('0Nd')` | `pd.NaT` | `pd.NaT` |
+ | timespan | `0Nn` | `pd.NaT` | `q('0Nn')` | | `pd.NaT` |
+ | minute | `0Nu` | `pd.NaT` | `q('0Nu')` | | `pd.NaT` |
+ | second | `0Nv` | `pd.NaT` | `q('0Nv')` | | `pd.NaT` |
+ | time | `0Nt` | `pd.NaT` | `q('0Nt')` | | `pd.NaT` |
+
+ === ".np()"
+
+ | datatype | q value | 2.* atom conversion | 2.* vector conversion | 3.* atom conversion | 3.vector conversion |
+ |-----------|---------|-------------------------|----------------------------------|----------------------------------|---------------------|
+ | guid | `0Ng` | `UUID(int=0)` | `UUID(int=0)` | | |
+ | short | `0Nh` | **1 | `np.int16(-32768)` | `np.int16(-32768)` | |
+ | int | `0Ni` | **1 | `np.int32(-2147483648)` | `np.int32(-2147483648)` | |
+ | long | `0Nj` | **1 | `np.int64(-9223372036854775808)` | `np.int64(-9223372036854775808)` | |
+ | real | `0Ng` | `np.float32('nan')` | `np.float32('nan')` | | |
+ | float | `0n` | `np.float64('nan')` | `np.float64('nan')` | | |
+ | character | `" "` | `b' '` | `np.bytes_(' ')` | | |
+ | symbol | `` ` `` | `''` | `''` | | |
+ | timestamp | `0Np` | `np.datetime64('NaT')` | `np.datetime64('NaT')` | | |
+ | month | `0Nm` | `np.datetime64('NaT')` | `np.datetime64('NaT')` | | |
+ | date | `0Nd` | `np.datetime64('NaT')` | `np.datetime64('NaT')` | | |
+ | timespan | `0Nn` | `np.timedelta64('NaT')` | `np.timedelta64('NaT')` | | |
+ | minute | `0Nu` | `np.timedelta64('NaT')` | `np.timedelta64('NaT')` | | |
+ | second | `0Nv` | `np.timedelta64('NaT')` | `np.timedelta64('NaT')` | | |
+ | time | `0Nt` | `np.timedelta64('NaT')` | `np.timedelta64('NaT')` | | |
+
+ - **1 Errors: `NumPy does not support null atomic integral values for short int long`
+
+ === ".pd()"
+
+ | datatype | q value | 2.* atom conversion | 2.* vector conversion | 3.* atom conversion | 3.* vector conversion |
+ |-----------|---------|---------------------|-----------------------|---------------------|-----------------------|
+ | guid | `0Ng` | `UUID(int=0)` | `UUID(int=0)` | | |
+ | short | `0Nh` | **1 | `pd.NA` | `pd.NA` | |
+ | int | `0Ni` | **1 | `pd.NA` | `pd.NA` | |
+ | long | `0Nj` | **1 | `pd.NA` | `pd.NA` | |
+ | real | `0Ne` | `np.float32('nan')` | `np.float32('nan')` | | |
+ | float | `0n` | `np.float64('nan')` | `np.float64('nan')` | | |
+ | character | `" "` | `b' '` | `np.bytes_(' ')` | | |
+ | symbol | `` ` `` | `''` | `''` | | |
+ | timestamp | `0Np` | `pd.NaT` | `pd.NaT` | | |
+ | month | `0Nm` | `pd.NaT` | `pd.NaT` | | |
+ | date | `0Nd` | `pd.NaT` | `pd.NaT` | | |
+ | timespan | `0Nn` | `pd.NaT` | `pd.NaT` | | |
+ | minute | `0Nu` | `pd.NaT` | `pd.NaT` | | |
+ | second | `0Nv` | `pd.NaT` | `pd.NaT` | | |
+ | time | `0Nt` | `pd.NaT` | `pd.NaT` | | |
+
+ - **1 Errors: `NumPy does not support null atomic integral values for short int long`
+
+ === ".pa()"
+
+ | datatype | q value | 2.* atom conversion | 2.* vector conversion | 3.* atom conversion | 3.* vector conversion |
+ |-----------|---------|---------------------|-------------------------------------------------------|---------------------|-----------------------|
+ | guid | `0Ng` | `UUID(int=0)` | **1 | | |
+ | short | `0Nh` | **2 | **2 | `pd.NA` | |
+ | int | `0Ni` | **2 | **2 | `pd.NA` | |
+ | long | `0Nj` | **2 | **2 | `pd.NA` | |
+ | real | `0Ne` | `np.float32('nan')` | `pa.array([np.float32('nan')], type=pa.float32())[0]` | | |
+ | float | `0n` | `np.float64('nan')` | `pa.array([np.float32('nan')], type=pa.float64())[0]` | | |
+ | character | `" "` | `b' '` | `pa.array([b' '], pa.binary())[0]` | | |
+ | symbol | `` ` `` | `''` | `pa.array([''], pa.string())[0]` | | |
+ | timestamp | `0Np` | `pd.NaT` | **3 | | |
+ | month | `0Nm` | `pd.NaT` | **3 | | |
+ | date | `0Nd` | `pd.NaT` | **3 | | |
+ | timespan | `0Nn` | `pd.NaT` | **4 | | |
+ | minute | `0Nu` | `pd.NaT` | **4 | | |
+ | second | `0Nv` | `pd.NaT` | **4 | | |
+ | time | `0Nt` | `pd.NaT` | **4 | | |
+
+ - **1 Errors: `Could not convert UUID('00000000-0000-0000-0000-000000000000') with type UUID: did not recognize Python value type when inferring an Arrow data type`
+ - **2 Errors: `NumPy does not support null atomic integral values for short int long`
+ - **3 Errors: `pyarrow.lib.ArrowNotImplementedError: Unbound or generic datetime64 time unit`
+ - **4 Errors: `pyarrow.lib.ArrowNotImplementedError: Unbound or generic timedelta64 time unit`
+
+To convert vectors with the q types `#!python short`, `#!python int`, and `#!python long` to Python, you can use the following methods:
+
+|**Method**|**Description**|
+|----------|---------------|
+|`.py` | Provides a list with the null values converted to the closest possible Python representation |
+|`.np`| Provides a [masked array](https://numpy.org/doc/stable/reference/maskedarray.html) with the null values masked out, and the fill value set to the underlying value of the q null.|
+|`.pd`| Provides a [Series](https://pandas.pydata.org/docs/reference/api/pandas.Series.html) as per usual, but backed by an [IntegerArray](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.arrays.IntegerArray.html) instead of a regular `np.ndarray`.|
+|`.pa`| Provides a [PyArrow Array](https://arrow.apache.org/docs/python/generated/pyarrow.Array.html#pyarrow.Array) as per usual, which natively supports nullable integral vector data, so it simply has the indexes of the nulls stored in the array metadata.|
+
+!!! note "Note"
+
+ - **Real vectors** use the standard `#!python NaN` and `#!python inf` values, and so are handled by q, Python, NumPy, Pandas, and PyArrow in the same way with no special handling.
+
+ - **Temporal vectors** use `#!python NaT` to represent null values in Python, NumPy, Pandas, and PyArrow represents null temporal values like it does for any other data type: by masking it out using the array metadata.
+
+When converting a table from q to Python with one of the methods above, each column is transformed as an independent vector as described above.
+
+The following provides an example of the masked array behavior outlined in the `#!python .np` method, which is additionally exhibited by the `#!python .pd` method.
```python
>>> import pykx as kx
@@ -158,9 +280,9 @@ x x1
'))
```
-An important example which represents some of the limitations of Pandas DataFrames when displaying masked arrays in index columns can be seen as follows.
+An important example highlighting the limitations of Pandas DataFrames in displaying masked arrays within index columns is shown below.
-In the example below we are converting a keyed table containing one key column containing nulls to Pandas, as expected when converted the null mask is applied as appropriate
+In this example, we convert a keyed table with one key column containing null values to a pandas DataFrame. As expected, the null mask is appropriately applied during the conversion.
```python
>>> keytab = kx.q.xkey('x',
@@ -185,7 +307,7 @@ x
-- 2 3
```
-However, when displaying with multi-index columns the mask behaviour is not adhered to, this can be seen as follows
+However, when displaying with multi-index columns, the mask behaviour is not adhered to:
```python
>>> keytab = kx.q.xkey(['x', 'x1'],
@@ -210,7 +332,7 @@ x x1
-9223372036854775808 2 3
```
-To illustrate this as a limitation of Pandas rather than PyKX consider the following
+To illustrate this as a limitation of Pandas rather than PyKX consider the following:
```python
>>> tab = kx.Table(data = {
@@ -245,7 +367,7 @@ x x1
-9223372036854775808 2 3
```
-Additional to the above inconsistency with Pandas you may also run into issues with the visual representations of masked arrays when displayed in Pandas DataFrames containing large numbers of rows, for example consider the following case.
+Additional to the above inconsistency with Pandas you may also run into issues with the visual representations of masked arrays when displayed in Pandas DataFrames containing large numbers of rows. For example, consider the following case:
```python
>>> t = kx.q('([] time:.z.p;a:til 1000;b:9,999#0N)')
@@ -266,7 +388,7 @@ Additional to the above inconsistency with Pandas you may also run into issues w
[1000 rows x 3 columns]
```
-While `-9223372036854778080` does represent an underlying PyKX Null value for display purposes visually it is distracting. To display the DataFrame with the masked values you must set it's `display.max_rows` to be longer than the length of the specified table, the effect of this can be seen as follows.
+While `#!python -9223372036854778080` represents an underlying PyKX Null value, for display purposes it's visually distracting. To display the DataFrame with the masked values, set its `#!python display.max_rows` to be longer than the length of the specified table. Notice the result below:
```python
>>> import pandas as pd
@@ -283,28 +405,198 @@ While `-9223372036854778080` does represent an underlying PyKX Null value for di
..
```
-For more information on masked Numpy arrays and interactions with null representation data in Pandas see the following links
+!!! info "For more information on masked NumPy arrays and interactions with null representation data in Pandas, check out the following links:"
+
+ - [NumPy masked arrays](https://numpy.org/doc/stable/reference/maskedarray.generic.html#filling-in-the-missing-data)
+ - [Pandas working with missing data](https://pandas.pydata.org/docs/user_guide/missing_data.html)
+ - [Pandas nullable integer data types](https://pandas.pydata.org/docs/user_guide/integer_na.html#integer-na)
+
+### Infinite Conversions
+
+See also the page with specifics on [temporal](./temporal.md) conversions to explain further some of the difficulties around infinities while converting.
+
+!!! note "Note"
+
+ PyKX infinite conversion behaviour changed in version 3.0.0. The below tables outline the before and after conversions.
+
+ #### Positive Infinity conversions
+
+ === ".py()"
+
+ | datatype | q value | 2.* atom conversion | 2.* vector conversion | 3.* atom conversion | 3.* vector conversion |
+ |-----------|---------|------------------------------------------------------|-----------------------------------------------------|------------------------------------------------------|------------------------------------------------------|
+ | short | `0Wh` | `q('0Wh')` | `q('0Wh')` | `float('inf')` | `float('inf')` |
+ | int | `0Wi` | `q('0Wi')` | `q('0Wi')` | `float('inf')` | `float('inf')` |
+ | long | `0Wj` | `q('0W')` | `q('0W')` | `float('inf')` | `float('inf')` |
+ | real | `0We` | `float('inf')` | `float('inf')` | | |
+ | float | `0w` | `float('inf')` | `float('inf')` | | |
+ | timestamp | `0Wp` | `datetime.datetime(2262, 4, 11, 23, 47, 16, 854775)` | `datetime.datetime(1707, 9, 22, 0, 12, 43, 145224)` | | |
+ | month | `0Wm` | `2147484007` | `2147484007` | | |
+ | date | `0Wd` | `2147494604` | `2147494604` | | |
+ | timespan | `0Wn` | `datetime.timedelta(106751, 16, 854775, 0, 47, 23)` | `datetime.timedelta(106751, 16, 854775, 0, 47, 23)` | | |
+ | minute | `0Wu` | `datetime.timedelta(-3220, 4, 33138, 0, 5, 5)` | `datetime.timedelta(-3220, 4, 33138, 0, 5, 5)` | | |
+ | second | `0Wv` | `datetime.timedelta(24855, 7, 0, 0, 14, 3)` | `datetime.timedelta(24855, 7, 0, 0, 14, 3)` | | |
+ | time | `0Wt` | `datetime.timedelta(24, 23, 647000, 0, 31, 20)` | `datetime.timedelta(24, 23, 647000, 0, 31, 20)` | | |
+
+ === ".np()"
+
+ | datatype | | 2.* atom conversion | 2.* vector conversion | 3.* atom conversion | 3.* vector conversion |
+ |-----------|-------|--------------------------------------------------|--------------------------------------------------|---------------------------------|--------------------------------------------------|
+ | short | `0Wh` | **1 | `np.int16(32767)` | `np.int16(32767)` | |
+ | int | `0Wi` | **1 | `np.int32(2147483647)` | `np.int32(2147483647)` | |
+ | long | `0Wj` | **1 | `np.int64(9223372036854775807)` | `np.int64(9223372036854775807)` | |
+ | real | `0We` | `np.float32('inf')` | `np.float32('inf')` | | |
+ | float | `0w` | `np.float64('inf')` | `np.float64('inf')` | | |
+ | timestamp | `0Wp` | `np.datetime64('2262-04-11T23:47:16.854775807')` | `np.datetime64('1707-09-22T00:12:43.145224191')` | | |
+ | month | `0Wm` | `np.datetime64('178958970-08')` | `np.datetime64('-178954971-04')` | | |
+ | date | `0Wd` | `np.datetime64('5881610-07-11')` | `np.datetime64('-5877611-06-21')` | | |
+ | timespan | `0Wn` | `np.timedelta64(9223372036854775807, 'ns')` | `np.timedelta64(9223372036854775807, 'ns')` | | |
+ | minute | `0Wu` | `np.timedelta64(2147483647, 'm')` | `np.timedelta64(2147483647, 'm')` | | |
+ | second | `0Wv` | `np.timedelta64(2147483647, 's')` | `np.timedelta64(2147483647, 's')` | | |
+ | time | `0Wt` | `np.timedelta64(2147483647, 'ms')` | `np.timedelta64(2147483647, 'ms')` | | |
+
+ - **1 Errors: `NumPy does not support infinite atomic integral values`
+
+ === "pd()"
+
+ | datatype | | 2.* atom conversion | 2.* vector conversion | 3.* atom conversion | 3.* vector conversion |
+ |-----------|-------|-------------------------------------------------|-------------------------------------------------|---------------------------------|-------------------------------------------------|
+ | short | `0Wh` | **1 | `np.int16(32767)` | `np.int16(32767)` | |
+ | int | `0Wi` | **1 | `np.int32(2147483647)` | `np.int32(2147483647)` | |
+ | long | `0Wj` | **1 | `np.int64(9223372036854775807)` | `np.int64(9223372036854775807)` | |
+ | real | `0We` | `np.float32('inf')` | `np.float32('inf')` | | |
+ | float | `0w` | `np.float64('inf')` | `np.float64('inf')` | | |
+ | timestamp | `0Wp` | `pd.Timestamp('2262-04-11T23:47:16.854775807')` | `pd.Timestamp('1707-09-22T00:12:43.145224191')` | | |
+ | month | `0Wm` | **2 `Timestamp('178958970-08-01 00:00:00')` | **2 `Timestamp('178958970-08-01 00:00:00')` | | |
+ | date | `0Wd` | **2 `Timestamp('5881610-07-11 00:00:00')` | **2 `Timestamp('5881610-07-11 00:00:00')` | | |
+ | timespan | `0Wn` | `pd.Timedelta(9223372036854775807, 'ns')` | `pd.Timedelta(9223372036854775807, 'ns')` | | |
+ | minute | `0Wu` | **2 `Timedelta('1491308 days 02:07:00')` | **2 `Timedelta('1491308 days 02:07:00')` | | |
+ | second | `0Wv` | `pd.Timedelta(2147483647, 's')` | `pd.Timedelta(2147483647, 's')` | | |
+ | time | `0Wt` | `pd.Timedelta(2147483647, 'ms')` | `pd.Timedelta(2147483647, 'ms')` | | |
+
+ - **1 Errors: `NumPy does not support infinite atomic integral values
+ - **2 Errors: `Values out of range` Pandas constructors block creation of these values
+
+ === ".pa()"
+
+ | datatype | | 2.* atom conversion | 2.* vector conversion | 3.* atom conversion | 3.* vector conversion |
+ |-----------|-------|-------------------------------------------------|---------------------------------------------------------------------------------------------|---------------------------------|--------------------------------------------------------------|
+ | short | `0Wh` | **1 | `` | `np.int16(32767)` | |
+ | int | `0Wi` | **1 | `` | `np.int32(2147483647)` | |
+ | long | `0Wj` | **1 | `` | `np.int64(9223372036854775807)` | |
+ | real | `0We` | `np.float32('inf')` | `` | | |
+ | float | `0w` | `np.float64('inf')` | `` | | |
+ | timestamp | `0Wp` | `pd.Timestamp('2262-04-11T23:47:16.854775807')` | `` | | |
+ | month | `0Wm` | **2 `Timestamp('178958970-08-01 00:00:00')` | **3 | | |
+ | date | `0Wd` | **2 `Timestamp('5881610-07-11 00:00:00')` | **4 | | |
+ | timespan | `0Wn` | `pd.Timedelta(9223372036854775807, 'ns')` | `` | | |
+ | minute | `0Wu` | **2 `Timedelta('1491308 days 02:07:00')` | **5 | | |
+ | second | `0Wv` | `pd.Timedelta(2147483647, 's')` | `` | | |
+ | time | `0Wt` | `pd.Timedelta(2147483647, 'ms')` | `` | | |
+
+ - **1 Errors: `NumPy does not support infinite atomic integral values`
+ - **2 Errors: `Values out of range - Pandas constructors block them`
+ - **3 Errors: `pyarrow.lib.ArrowNotImplementedError: Unsupported datetime64 time unit`
+ - **4 Errors: `OverflowError: days=-2147472692; must have magnitude <= 999999999`
+ - **5 Errors: `pyarrow.lib.ArrowNotImplementedError: Unsupported timedelta64 time unit`
+
+ #### Negative Infinity conversions
+
+ === ".py()"
+
+ | datatype | | 2.* atom conversion | 2.* vector conversion | 3.* atom conversion | 3.* vector conversion |
+ |-----------|--------|------------------------------------------------------|-----------------------------------------------------|-----------------------------------------------------|-----------------------|
+ | short | `-0Wh` | `q('-0Wh')` | `q('-0Wh')` | `float('-inf')` | `float('-inf')` |
+ | int | `-0Wi` | `q('-0Wi')` | `q('-0Wi')` | `float('-inf')` | `float('-inf')` |
+ | long | `-0Wj` | `q('-0W')` | `q('-0W')` | `float('-inf')` | `float('-inf')` |
+ | real | `-0We` | `float('-inf')` | `float('-inf')` | | |
+ | float | `-0w` | `float('-inf')` | `float('-inf')` | | |
+ | timestamp | `-0Wp` | `datetime.datetime(2262, 4, 11, 23, 47, 16, 854774)` | `datetime.datetime(1707, 9, 22, 0, 12, 43, 145224)` | `datetime.datetime(1707, 9, 22, 0, 12, 43, 145224)` | |
+ | month | `-0Wm` | `-2147483287` | `-2147483287` | | |
+ | date | `-0Wd` | `-2147472690` | `-2147472690` | | |
+ | timespan | `-0Wn` | `datetime.timedelta(-106752, 43, 145224, 0, 12)` | `datetime.timedelta(-106752, 43, 145224, 0, 12)` | | |
+ | minute | `-0Wu` | `datetime.timedelta(3219, 55, 966861, 0, 54, 18)` | `datetime.timedelta(3219, 55, 966861, 0, 54, 18)` | | |
+ | second | `-0Wv` | `datetime.timedelta(-24856, 53, 0, 0, 45, 20)` | `datetime.timedelta(-24856, 53, 0, 0, 45, 20)` | | |
+ | time | `-0Wt` | `datetime.timedelta(-25, 36, 353000, 0, 28, 3)` | `datetime.timedelta(-25, 36, 353000, 0, 28, 3)` | | |
+
+ === "np()"
+
+ | datatype | | 2.* atom conversion | 2.* vector conversion | 3.* atom conversion | 3.* vector conversion |
+ |-----------|--------|--------------------------------------------------|--------------------------------------------------|--------------------------------------------------|-----------------------|
+ | short | `-0Wh` | **1 | `np.int16(-32767)` | `np.int16(-32767)` | |
+ | int | `-0Wi` | **1 | `np.int32(-2147483647)` | `np.int32(-2147483647)` | |
+ | long | `-0Wj` | **1 | `np.int64(-9223372036854775807)` | `np.int64(-9223372036854775807)` | |
+ | real | `-0We` | `np.float32('-inf')` | `np.float32('-inf')` | | |
+ | float | `-0w` | `np.float64('-inf')` | `np.float64('-inf')` | | |
+ | timestamp | `-0Wp` | `np.datetime64('1677-09-21T00:12:43.145224193')` | `np.datetime64('1707-09-22T00:12:43.145224193')` | `np.datetime64('1707-09-22T00:12:43.145224193')` | |
+ | month | `-0Wm` | `np.datetime64('-178954971-06')` | `np.datetime64('-178954971-06')` | | |
+ | date | `-0Wd` | `np.datetime64('-5877611-06-23')` | `np.datetime64('-5877611-06-23')` | | |
+ | timespan | `-0Wn` | `np.timedelta64(-9223372036854775807, 'ns')` | `np.timedelta64(-9223372036854775807, 'ns')` | | |
+ | minute | `-0Wu` | `np.timedelta64(-2147483647, 'm')` | `np.timedelta64(-2147483647, 'm')` | | |
+ | second | `-0Wv` | `np.timedelta64(-2147483647, 's')` | `np.timedelta64(-2147483647, 's')` | | |
+ | time | `-0Wt` | `np.timedelta64(-2147483647, 'ms')` | `np.timedelta64(-2147483647, 'ms')` | | |
+
+ - **1 Errors: `NumPy does not support infinite atomic integral values`
+
+ === ".pd()"
+
+ | datatype | | 2.* atom conversion | 2.* vector conversion | 3.* atom conversion | 3.* vector conversion |
+ |-----------|--------|-------------------------------------------------|-------------------------------------------------|-------------------------------------------------|-----------------------|
+ | short | `-0Wh` | **1 | `np.int16(-32767)` | `np.int16(-32767)` | |
+ | int | `-0Wi` | **1 | `np.int32(-2147483647)` | `np.int32(-2147483647)` | |
+ | long | `-0Wj` | **1 | `np.int64(-9223372036854775807)` | `np.int64(-9223372036854775807)` | |
+ | real | `-0We` | `np.float32('-inf')` | `np.float32('-inf')` | | |
+ | float | `-0w` | `np.float64('-inf')` | `np.float64('-inf')` | | |
+ | timestamp | `-0Wp` | `pd.Timestamp('1677-09-21T00:12:43.145224193')` | `pd.Timestamp('1707-09-22 00:12:43.145224193')` | `pd.Timestamp('1707-09-22 00:12:43.145224193')` | |
+ | month | `-0Wm` | **2 `Timestamp('-178954971-06-01 00:00:00')` | **2 `Timestamp('-178954971-06-01 00:00:00')` | | |
+ | date | `-0Wd` | **2 `Timestamp('-5877611-06-23 00:00:00')` | **2 `Timestamp('-5877611-06-23 00:00:00')` | | |
+ | timespan | `-0Wn` | `pd.Timedelta(-9223372036854775807, 'ns')` | `pd.Timedelta(-9223372036854775807, 'ns')` | | |
+ | minute | `-0Wu` | **2 `Timedelta('-1491309 days +21:53:00')` | **2 `Timedelta('-1491309 days +21:53:00')` | | |
+ | second | `-0Wv` | `pd.Timedelta(-2147483647, 's')` | `pd.Timedelta(-2147483647, 's')` | | |
+ | time | `-0Wt` | `pd.Timedelta(-2147483647, 'ms')` | `pd.Timedelta(-2147483647, 'ms')` | | |
+
+ - **1 Errors: `NumPy does not support infinite atomic integral values`
+ - **2 Errors: `Values out of range` Pandas constructors block creation of these values
+
+ === ".pa()"
+
+ | datatype | | 2.* atom conversion | 2.* vector conversion | 3.* atom conversion | 3.* vector conversion |
+ |-----------|--------|-------------------------------------------------|----------------------------------------------------------------------------------------------|-------------------------------------------------|-----------------------|
+ | short | `-0Wh` | **1 | `` | `np.int16(-32767)` | |
+ | int | `-0Wi` | **1 | `` | `np.int32(-2147483647)` | |
+ | long | `-0Wj` | **1 | `` | `np.int64(-9223372036854775807)` | |
+ | real | `-0We` | `np.float32('-inf')` | `` | | |
+ | float | `-0w` | `np.float64('-inf')` | `` | | |
+ | timestamp | `-0Wp` | `pd.Timestamp('1677-09-21T00:12:43.145224193')` | `` | `pd.Timestamp('1707-09-22 00:12:43.145224193')` | |
+ | month | `-0Wm` | **2 `Timestamp('-178954971-06-01 00:00:00')` | **3 | | |
+ | date | `-0Wd` | **2 `Timestamp('-5877611-06-23 00:00:00')` | **4 | | |
+ | timespan | `-0Wn` | `pd.Timedelta(-9223372036854775807, 'ns')` | `` | | |
+ | minute | `-0Wu` | **2 `Timedelta('-1491309 days +21:53:00')` | **5 | | |
+ | second | `-0Wv` | `pd.Timedelta(-2147483647, 's')` | `` | | |
+ | time | `-0Wt` | `pd.Timedelta(-2147483647, 'ms')` | `` | | |
+
+ - **1 Errors: `NumPy does not support infinite atomic integral values`
+ - **2 Errors: `Values out of range - Pandas constructors block them`
+ - **3 Errors: `pyarrow.lib.ArrowNotImplementedError: Unsupported datetime64 time unit`
+ - **4 Errors: `OverflowError: days=-2147472690; must have magnitude <= 999999999`
+ - **5 Errors: `pyarrow.lib.ArrowNotImplementedError: Unsupported timedelta64 time unit`
+
+#### Infinite weirdness
-- [Numpy masked arrays](https://numpy.org/doc/stable/reference/maskedarray.generic.html#filling-in-the-missing-data)
-- [Pandas working with missing data](https://pandas.pydata.org/docs/user_guide/missing_data.html)
-- [Pandas nullable integer data types](https://pandas.pydata.org/docs/user_guide/integer_na.html#integer-na)
+Other than real/float infinities, which follow the IEEE standard for infinities and so are ignored in this section, infinite values in kdb+ do not behave how you would expect them to. PyKX opts to expose their behavior as-is, since the alternatives (error for infinities, or always expose them as their underlying values) are undesirable. For this reason you should take care when using them.
+Arithmetic operations on infinities are applied directly to the underlying values. As such, adding 1 to many positive infinities in q will result in the null for that type, as the value overflows and becomes the smallest value in that type's range. Subtracting 1 from positive infinities merely yields the second largest number for that type. For instance, `#!python 2147483646 == q('0Wi') - 1`.
## Python to q
-Wherever practical the conversions from q to Python are symmetric, so most of the conversions detailed in the section above work in reverse too. For instance, if you convert a Numpy masked array with dtype `np.int32` to q, the masked values will be represented by int null (`0Ni`) in q.
-
+Wherever possible, the conversions from Q to Python are symmetric. Therefore, you can apply in reverse most of the conversions described in the previous section. For instance, if you convert a NumPy masked array with dtype `#!python np.int32` to q, the masked values will be represented by int null (`0Ni`) in q.
## Performance
-By default, whenever PyKX converts a q vector to some Python representation (e.g. a Numpy array) it checks where the nulls (if any) are located. This requires operating on every element of the array, which can be rather expensive. If you know ahead of time that your q vector/table has no nulls in it, you can provide the keyword argument `has_nulls=False` to `.py`/`.np`/`.pd`/`.pa`. This will skip the null-check. If you set this keyword argument to false, but there are still nulls in the data, they will come through as the underlying values from q, e.g. `-32768` for a short integer.
-
-By default `has_nulls` is `None`. It can be set to `True` to always handle the data as if it contains nulls, regardless of whether it actually does. This can improve consistency in some cases, for instance by having all int vectors be converted to Numpy masked arrays instead of normal Numpy arrays when there are no nulls, and masked arrays when there are nulls.
+By default, whenever PyKX converts a q vector to a Python representation (e.g. a NumPy array) it checks where the nulls (if any) are located. This requires operating on every element of the array, which can be rather expensive.
-You can also use the keyword argument `raw=True` for the `py`/`np`/`pd`/`pa` methods for improved performance - albeit this affects more than just how nulls are handled. See [the performance doc page](../advanced/performance.md) for more details about raw conversions.
+If you know ahead of time that your q vector/table has no nulls in it, you can provide the keyword argument `#!python has_nulls=False` to `#!python .py`/`#!python .np`/`#!python .pd`/`#!python .pa`. This will skip the null-check. If you set this keyword argument to false, but there are still nulls in the data, they will come through as the underlying values from q, for example, `#!python -32768` for a short integer.
-## Infinite Weirdness
-
-Other than real/float infinities, which follow the IEEE standard for infinities and so are ignored in this section, infinite values in kdb+ do not behave how you would expect them to. PyKX opts to expose their behavior as-is, since the alternatives (error for infinities, or always expose them as their underlying values) are undesirable. For this reason you should take care when using them.
+By default `#!python has_nulls` is `#!python None`. You can set it to `#!python True` to always handle the data as if it contains nulls, regardless of whether it actually does. This can improve consistency in some cases, for instance by having all int vectors be converted to NumPy masked arrays instead of normal NumPy arrays when there are no nulls, and masked arrays when there are nulls.
-Arithmetic operations on infinities are applied directly to the underlying values. As such, adding 1 to many positive infinities in q will result in the null for that type, as the value overflows and becomes the smallest value in that type's range. Subtracting 1 from positive infinities merely yields the second largest number for that type. For instance, `2147483646 == q('0Wi') - 1`.
+!!! tip "Tip: you can also use the keyword argument `#!python raw=True` for the `#!python py`/`#!python np`/`#!python pd`/`#!python pa` methods for improved performance - albeit this affects more than just how nulls are handled. See [the performance doc page](../advanced/performance.md) for more details about raw conversions."
diff --git a/docs/user-guide/fundamentals/query/index.md b/docs/user-guide/fundamentals/query/index.md
new file mode 100644
index 0000000..5f50c99
--- /dev/null
+++ b/docs/user-guide/fundamentals/query/index.md
@@ -0,0 +1,23 @@
+---
+title: Querying data using PyKX
+description: Introduction to the concept of querying PyKX databases and tables
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, query, historical, SQL, qSQL
+---
+
+# Querying data using PyKX
+
+_This page provides an introduction to querying your data using PyKX._
+
+| Title | Description |
+|:--------------------------------------|:------------------------------------------------------------------------------------|
+| [Pythonic Querying](pyquery.md) | Learn how to use Pythonic Syntax to query on-disk and in-memory tables. |
+| [SQL Querying](sql.md) | Learn how to use SQL to query on-disk and in-memory tables. |
+| [q Querying](qquery.md) | Learn how to use the q programming language to query on-disk and in-memory tables. |
+| [Performance Considerations](perf.md) | Learn how to update your queries to make them more memory efficient and performant. |
+
+## Next Steps
+
+- If you don't have a historical database available see [here](../../advanced/database/index.md).
+- To learn about creating PyKX Table objects see [here](../../../examples/interface-overview.ipynb).
diff --git a/docs/user-guide/fundamentals/query/perf.md b/docs/user-guide/fundamentals/query/perf.md
new file mode 100644
index 0000000..688a0e6
--- /dev/null
+++ b/docs/user-guide/fundamentals/query/perf.md
@@ -0,0 +1,90 @@
+---
+title: Querying data using PyKX
+description: Introduction to the concept of querying PyKX databases and tables
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, query, historical, SQL, qSQL
+---
+
+# Query performance considerations using PyKX
+
+_This page explains how to efficiently query your data using PyKX._
+
+## qSQL
+
+The whitepapers detailed below outline optimizations which can be useful for qSQL queries. However, the core lessons/concepts which apply in the [q query](./qquery.md) case also apply to using the [Pythonic query API](./pyquery.md) and [SQL](./sql.md) modes:
+
+- [Columnar database and query optimization](https://code.kx.com/q/wp/columnar-database/)
+- [kdb+ query scaling](https://code.kx.com/q/wp/query-scaling/)
+
+The following provides a tangible example of two impactful optimizations
+
+### Parameter ordering
+
+Assume we have a historical database generated using the functionality [here](../../advanced/database/index.md) partitioned on date. The query being performed will query for all data in the database based on date and symbol. The following queries align to those completed [here](https://code.kx.com/q/wp/columnar-database/#query-structure-example)
+
+=== "Optimal query"
+
+ ```python
+ trade.select(where = (kx.Column('date') == kx.DateAtom(2020, 1, 1)) &
+ (kx.Column('sym') == 'IBM'))
+ ```
+
+=== "Non-Optimal query"
+
+ ```python
+ trade.select(where = (kx.Column('sym') == 'IBM') &
+ (kx.Column('date') == kx.DateAtom(2020, 1, 1)))
+ ```
+
+The following shows the scaling of queries based on the number of dates within the database
+
+```q
+ | sym before date | date before sym
+dates in | time size | time size
+database | (ms) (b) | (ms) (b)
+---------|--------------------|----------------------
+ 1 | 470 75,499,920 | 78 75,499,984
+ 5 | 487 75,878,400 | 78 75,499,984
+ 10 | 931 75,880,624 | 78 75,499,984
+ 15 | 1,209 75,882,912 | 78 75,499,984
+ 20 | 1,438 75,885,072 | 78 75,499,984
+```
+
+### Applying Attributes
+
+The following shows the performance difference between the application of a grouped-attribute on the `sym` column of an in-memory table.
+
+```python
+rtquote = quote.select(where = kx.Column('date').isin([kx.DateAtom(2020, 1, 1)]).grouped('sym')
+rtquote.select(where = kx.Column('sym') == 'IBM')
+```
+
+The following shows the scaling of queries based on the number of rows on an in-memory table using only the `sym` column.
+
+```q
+ | no attribute | grouped attribute
+ rows in | time size | time size
+ table | (ms) (b) | (ms) (b)
+-------------------------------------------------------
+ 25,000,000 | 119 301,990,304 | 8 2,228,848
+ 50,000,000 | 243 603,980,192 | 10 4,457,072
+ 75,000,000 | 326 1,207,959,968 | 14 8,913,520
+100,000,000 | 472 1,207,959,968 | 20 8,913,520
+125,000,000 | 582 1,207,959,968 | 26 8,913,520
+150,000,000 | 711 2,415,919,520 | 30 17,826,416
+175,000,000 | 834 2,415,919,520 | 36 17,826,416
+200,000,000 | 931 2,415,919,520 | 40 17,826,416
+225,000,000 | 1,049 2,415,919,520 | 46 17,826,416
+250,000,000 | 1,167 2,415,919,520 | 50 17,826,416
+```
+
+## SQL
+
+To optimize frequently called SQL queries you can make use of the [prepare](../../../api/query.md#pykx.query.SQL.prepare) and [execute](../../../api/query.md#pykx.query.SQL.execute) functionality to separate SQL parsing from query execution as detailed [here](https://code.kx.com/insights/1.10/core/sql.html#prepare-and-execute).
+
+## Next Steps
+
+- Learn how to query your data using the PyKX Pythonic Query API [here](pyquery.md).
+- If you don't have a historical database available see [here](../../advanced/database/index.md).
+- To learn about creating PyKX Table objects see [here](../../../examples/interface-overview.ipynb).
diff --git a/docs/user-guide/fundamentals/query/pyquery.md b/docs/user-guide/fundamentals/query/pyquery.md
new file mode 100644
index 0000000..0400d2c
--- /dev/null
+++ b/docs/user-guide/fundamentals/query/pyquery.md
@@ -0,0 +1,1022 @@
+---
+title: Querying data using PyKX
+description: Introduction to the concept of querying PyKX databases and tables
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, query, historical, SQL, qSQL
+---
+
+# Querying data using the query API with PyKX
+
+_This page explains how to query your data with PyKX using the query API._
+
+Before we get started the following dataset will be used throughout the remainder of this page.
+
+Creating a sample table:
+
+```python
+>>> import pykx as kx
+>>> kx.random.seed(42)
+>>> trades = kx.Table(data={
+ 'sym': kx.random.random(100, ['AAPL', 'GOOG', 'MSFT']),
+ 'date': kx.random.random(100, kx.q('2022.01.01') + [0,1,2]),
+ 'price': kx.random.random(100, 1000.0),
+ 'size': kx.random.random(100, 100)
+ })
+
+# Store the same table in q memory space to be able to demo queries on q variables
+>>> kx.q['trades'] = trades
+```
+
+## Query basics
+
+The PyKX [query API](../../../api/query.md) provides a Pythonic way to query kdb+ table. This API builds [qSQL](https://code.kx.com/q/basics/qsql/) queries in their [functional](https://code.kx.com/q/basics/funsql/) allowing you to query in-memory and on-disk data.
+
+In the following sections we will introduce the functions, their arguments and how they can be used to perform queries of increasing complexity.
+
+### Query Functions
+
+Query functions describe the operations against in-memory and on-disk data which allow users to retrieve, update or delete data from these tables. Not all operations are supported against all table types, as such the following table provides a guide:
+
+| Function | pykx.Table | pykx.KeyedTable | pykx.SplayedTable | pykx.PartitionedTable |
+| :------- | :--------------- | :--------------- | :---------------- | :-------------------- |
+| select | :material-check: | :material-check: | :material-check: | :material-check: |
+| exec | :material-check: | :material-check: | :material-check: | :material-close: |
+| update | :material-check: | :material-check: | :material-minus: | :material-close: |
+| delete | :material-check: | :material-check: | :material-minus: | :material-close: |
+
+For `pykx.SplayedTable` objects the denoted :material-minus: operations indicate that while applied queries will return a `pykx.Table` object the on-disk data will not be modified directly.
+
+#### select()
+
+[select()](../../../api/query.md#pykx.query.QSQL.select) builds on qSQL [select](https://code.kx.com/q/ref/select/).
+
+Select should be used to query/filter data returning a [pykx.Table](../../../api/pykx-q-data/wrappers.md#pykx.wrappers.Table) or [pykx.KeyedTable](../../../api/pykx-q-data/wrappers.md#pykx.wrappers.KeyedTable).
+
+```python
+table.select(columns=None, where=None, by=None, inplace=False)
+```
+
+#### exec()
+
+[exec()](../../../api/query.md#pykx.query.QSQL.exec) builds on qSQL [exec](https://code.kx.com/q/ref/exec/).
+
+Exec is used to query tables but unlike Select it does not return tables. Instead this query type will return [pykx.Vector](../../../api/pykx-q-data/wrappers.md##pykx.wrappers.Vector), [pykx.Atom](../../../api/pykx-q-data/wrappers.md##pykx.wrappers.Atom), or [pykx.Dictionary](../../../api/pykx-q-data/wrappers.md##pykx.wrappers.Dictionary) will be returned depending on the query parameters.
+
+For example if querying for data in a single column a vector will be returned, multiple columns will result in a dictionary mapping column name to value and when performing aggregations on a single column you may return an atom.
+
+```python
+table.exec(columns=None, where=None, by=None, inplace=False)
+```
+
+#### update()
+
+[update()](../../../api/query.md#pykx.query.QSQL.update) builds on qSQL [update](https://code.kx.com/q/ref/update/).
+
+Update returns the modified [pykx.Table](../../../api/pykx-q-data/wrappers.md#pykx.wrappers.Table) or [pykx.KeyedTable](../../../api/pykx-q-data/wrappers.md#pykx.wrappers.KeyedTable).
+
+```python
+table.update(columns=None, where=None, by=None, inplace=False)
+```
+
+#### delete()
+
+[delete()](../../../api/query.md#pykx.query.QSQL.delete) builds on qSQL [delete](https://code.kx.com/q/ref/delete/).
+
+Delete returns the modified [pykx.Table](../../../api/pykx-q-data/wrappers.md#pykx.wrappers.Table) or [pykx.KeyedTable](../../../api/pykx-q-data/wrappers.md#pykx.wrappers.KeyedTable).
+
+```python
+table.delete(columns=None, where=None, by=None, inplace=False)
+```
+
+!!! Note
+
+ The following sections makes use of `kx.Column` objects which are only enabled in PyKX licensed mode. For unlicensed query examples using `str` objects see the [query API](../../../api/query.md) page.
+
+### Query arguments
+
+Querying data using this API refers to the four functions outlined above each which can take as arguments the following keyword parameters:
+
+- `columns`
+- `where`
+- `by`*
+- `inplace`
+
+Outlined below these, arguments allow you to manipulate your data to filter for/update specific columns/rows in the case of a `where` clause, apply some analytics in the case of a `columns` clause or group data based on supplied conditions when discussing a `by` clause.
+
+??? Note "by clause restrictions"
+
+ The `by` clause is not supported when used with the `delete` query type
+
+#### columns
+
+The `columns` keyword provides the ability to access columnar data by name or apply analytics to the content of columns. In the following examples we will use various combinations of the `columns` keyword with `select`, `exec`, `update` and `delete` operations.
+
+- `columns` can be passed a single column name without where conditions to retrieve or modify the content of that column:
+
+=== "select"
+
+ ```python
+ >>> trades.select(columns=kx.Column('sym'))
+ pykx.Table(pykx.q('
+ sym
+ ----
+ AAPL
+ MSFT
+ MSFT
+ GOOG
+ AAPL
+ ..
+ '))
+ >>> trades.select(columns=2 * kx.Column('price'))
+ pykx.Table(pykx.q('
+ price
+ --------
+ 291.2518
+ 1067.837
+ 34.35393
+ 1832.257
+ 280.0766
+ ..
+ '))
+ >>> trades.select(columns=kx.Column('price').max())
+ pykx.Table(pykx.q('
+ price
+ --------
+ 989.3873
+ '))
+ ```
+
+=== "delete"
+
+ ```python
+ >>> trades.delete(columns=kx.Column('date'))
+ pykx.Table(pykx.q('
+ sym price
+ -------------
+ AAPL 145.6259
+ MSFT 533.9187
+ MSFT 17.17696
+ GOOG 916.1286
+ AAPL 140.0383
+ ..
+ '))
+ ```
+
+=== "exec"
+
+ ```python
+ >>> trades.exec(columns=kx.Column('price'))
+ pykx.FloatVector(pykx.q('145.6259 533.91..'))
+ >>> trades.exec(columns=kx.Column('price').max())
+ pykx.FloatAtom(pykx.q('989.3873'))
+ >>> trades.exec(columns=2 * kx.Column('price'))
+ pykx.FloatVector(pykx.q('291.2518 1067.83..'))
+ ```
+
+=== "update"
+
+ ```python
+ >>> trades.update(columns=(kx.Column('price') * 2).name('dpx'))
+ pykx.Table(pykx.q('
+ sym date price size dpx
+ --------------------------------------
+ AAPL 2022.01.01 145.6259 19 291.2518
+ MSFT 2022.01.02 533.9187 92 1067.837
+ MSFT 2022.01.02 17.17696 7 34.35393
+ GOOG 2022.01.03 916.1286 60 1832.257
+ AAPL 2022.01.02 140.0383 54 280.0766
+ ..
+ '))
+ >>> trades.update(columns=kx.Column('dpx', value=kx.Column('price') * 2))
+ pykx.Table(pykx.q('
+ sym date price size dpx
+ --------------------------------------
+ AAPL 2022.01.01 145.6259 19 291.2518
+ MSFT 2022.01.02 533.9187 92 1067.837
+ MSFT 2022.01.02 17.17696 7 34.35393
+ GOOG 2022.01.03 916.1286 60 1832.257
+ AAPL 2022.01.02 140.0383 54 280.0766
+ ..
+ '))
+ ```
+
+- Multiple columns can be modified, retrieved or aggregations applied by using queries can be returned and have aggregations/operation performed on them.
+
+=== "select"
+
+ ```python
+ >>> trades.select(columns=kx.Column('date') & kx.Column('sym'))
+ pykx.Table(pykx.q('
+ date sym
+ ---------------
+ 2022.01.01 AAPL
+ 2022.01.02 MSFT
+ 2022.01.02 MSFT
+ 2022.01.03 GOOG
+ 2022.01.02 AAPL
+ ..
+ '))
+ >>> trades.select(columns=kx.Column('price').neg() & kx.Column('date') + 1)
+ pykx.Table(pykx.q('
+ price date
+ --------------------
+ -145.6259 2022.01.02
+ -533.9187 2022.01.03
+ -17.17696 2022.01.03
+ -916.1286 2022.01.04
+ -140.0383 2022.01.03
+ ..
+ '))
+ >>> trades.select(columns=kx.Column('price').last() & kx.Column('date').last())
+ pykx.Table(pykx.q('
+ price date
+ -------------------
+ 975.5566 2022.01.01
+ '))
+ ```
+
+=== "delete"
+
+ ```python
+ >>> trades.delete(columns=kx.Column('date') & kx.Column('sym'))
+ pykx.Table(pykx.q('
+ price size
+ -------------
+ 145.6259 19
+ 533.9187 92
+ 17.17696 7
+ 916.1286 60
+ 140.0383 54
+ ..
+ '))
+ '))
+ ```
+
+=== "exec"
+
+ ```python
+ >>> trades.exec(columns=kx.Column('date') & kx.Column('price'))
+ pykx.Dictionary(pykx.q('
+ date | 2022.01.01 2022.01.02 2020.0..
+ price| 145.6259 533.9187 17.176..
+ '))
+ ```
+
+- Columns can be named by using the `name` method on you column objects
+
+=== "select"
+
+ ```python
+ >>> trades.select(columns=kx.Column('price').max().name('maxPrice'))
+ pykx.Table(pykx.q('
+ maxPrice
+ --------
+ 989.3873
+ '))
+ ```
+
+=== "exec"
+
+ ```python
+ >>> trades.exec(columns=(2 * kx.Column('price')).name('multiPrice') &
+ ... kx.Column('sym').name('symName'))
+ pykx.Dictionary(pykx.q('
+ multiPrice| 291.2518 1067.837 34.35..
+ symName | AAPL MSFT MSFT ..
+ '))
+ ```
+
+=== "update"
+
+ In the case of update renaming a column will add a new column with the associated name
+
+ ```python
+ >>> trades.update(columns=kx.Column('price').name('priceCol'))
+ pykx.Table(pykx.q('
+ sym date price size priceCol
+ --------------------------------------
+ AAPL 2022.01.01 145.6259 19 145.6259
+ MSFT 2022.01.02 533.9187 92 533.9187
+ MSFT 2022.01.02 17.17696 7 17.17696
+ GOOG 2022.01.03 916.1286 60 916.1286
+ AAPL 2022.01.02 140.0383 54 140.0383
+ ..
+ '))
+ ```
+
+Finally as an alternative approach for renaming a dictionary can be used to control names of returned columns.
+
+```python
+>>> trades.select(columns={'maxPrice':kx.Column('price').max()})
+pykx.Table(pykx.q('
+maxPrice
+--------
+993.6284
+'))
+```
+
+#### where
+
+The [where phrase](https://code.kx.com/q/basics/qsql/#where-phrase) allows you to filter data to retrieve, update, delete or apply functions on rows of a table which meet the specified conditions
+
+By default this parameter has a value `None` which is equivalent to not filtering the data. This parameter is supported for all query types.
+
+- Filter data meeting a specified criteria on one column
+
+=== "select"
+
+ ```python
+ >>> trades.select(where=kx.Column('price') > 500)
+ pykx.Table(pykx.q('
+ sym date price size
+ -----------------------------
+ MSFT 2022.01.02 533.9187 92
+ GOOG 2022.01.03 916.1286 60
+ AAPL 2022.01.02 876.0921 37
+ AAPL 2022.01.03 952.2597 53
+ MSFT 2022.01.02 603.3717 6
+ ..
+ '))
+ >>> trades.select(where=kx.Column('price') < kx.Column('size'))
+ pykx.Table(pykx.q('
+ sym date price size
+ -----------------------------
+ MSFT 2022.01.03 46.11964 93
+ GOOG 2022.01.02 16.11913 81
+ AAPL 2022.01.03 28.98133 97
+ AAPL 2022.01.02 44.09906 91
+ GOOG 2022.01.01 12.58364 33
+ '))
+ >>> trades.select(where=kx.Column('price') == kx.Column('price').max())
+ pykx.Table(pykx.q('
+ sym date price size
+ -----------------------------
+ MSFT 2022.01.01 989.3873 42
+ '))
+ ```
+
+=== "delete"
+
+ ```python
+ >>> trades.delete(where=kx.Column('price') > 500)
+ pykx.Table(pykx.q('
+ sym date price size
+ -----------------------------
+ AAPL 2022.01.01 145.6259 19
+ MSFT 2022.01.02 17.17696 7
+ AAPL 2022.01.02 140.0383 54
+ MSFT 2022.01.03 282.4291 98
+ MSFT 2022.01.03 46.11964 93
+ ..
+ '))
+ >>> trades.delete(where=kx.Column('price') > kx.Column('size'))
+ pykx.Table(pykx.q('
+ sym date price size
+ -----------------------------
+ MSFT 2022.01.03 46.11964 93
+ GOOG 2022.01.02 16.11913 81
+ AAPL 2022.01.03 28.98133 97
+ AAPL 2022.01.02 44.09906 91
+ GOOG 2022.01.01 12.58364 33
+ '))
+ ```
+
+=== "update"
+
+ ```python
+ >>> trades.update(columns = 2 * kx.Column('price'),
+ ... where=kx.Column('price') > 500)
+ pykx.Table(pykx.q('
+ sym date price size
+ -----------------------------
+ AAPL 2022.01.01 145.6259 19
+ MSFT 2022.01.02 1067.837 92
+ MSFT 2022.01.02 17.17696 7
+ GOOG 2022.01.03 1832.257 60
+ AAPL 2022.01.02 140.0383 54
+ ..
+ '))
+ ```
+
+=== "exec"
+
+ ```python
+ >>> trades.exec(columns = kx.Column('size'), where = kx.Column('price') > 900)
+ pykx.LongVector(pykx.q('60 53 61 41 98 12 41 12 23 42 18 76 73 55'))
+ ```
+
+- Using `&` or passing a list of `pykx.Column` objects will allow multiple filters to be passed
+
+=== "select"
+
+ ```python
+ >>> trades.select(where=(kx.Column('sym') == 'GOOG') & (kx.Column('date') == datetime.date(2022, 1, 1)))
+ pykx.Table(pykx.q('
+ sym date price
+ ------------------------
+ GOOG 2022.01.01 480.9078
+ GOOG 2022.01.01 454.5668
+ GOOG 2022.01.01 790.2208
+ GOOG 2022.01.01 296.6022
+ GOOG 2022.01.01 727.6113
+ ..
+ '))
+ >>> trades.select(where=[
+ ... kx.Column('sym') == 'GOOG',
+ ... kx.Column('date') == datetime.date(2022, 1, 1)
+ ... ])
+ >>> from datetime import date
+ >>> trades.select(columns=kx.Column('price').wavg(kx.Column('size')),
+ ... where=(kx.Column('sym') == 'GOOG') & (kx.Column('date') == date(2022, 1, 1)))
+ pykx.Table(pykx.q('
+ price
+ -------
+ 44.7002
+ '))
+ ```
+
+=== "delete"
+
+ ```python
+ >>> from datetime import date
+ >>> trades.delete(where=(kx.Column('sym') == 'AAPL') & (kx.Column('date') == date(2022, 1, 1)))
+ pykx.Table(pykx.q('
+ sym date price size
+ -----------------------------
+ MSFT 2022.01.02 533.9187 92
+ MSFT 2022.01.02 17.17696 7
+ GOOG 2022.01.03 916.1286 60
+ AAPL 2022.01.02 140.0383 54
+ MSFT 2022.01.03 282.4291 98
+ ..
+ '))
+ ```
+
+=== "update"
+
+ ```python
+ >>> from datetime import date
+ >>> trades.update(
+ ... columns=2*kx.Column('price'),
+ ... where=(kx.Column('sym') == 'AAPL') & (kx.Column('date') == date(2022, 1, 1)))
+ pykx.Table(pykx.q('
+ sym date price size
+ -----------------------------
+ AAPL 2022.01.01 291.2518 19
+ MSFT 2022.01.02 533.9187 92
+ MSFT 2022.01.02 17.17696 7
+ GOOG 2022.01.03 916.1286 60
+ AAPL 2022.01.02 140.0383 54
+ ..
+ '))
+ ```
+
+=== "exec"
+
+ ```python
+ >>> from datetime import date
+ >>> trades.exec(
+ ... columns=kx.Column('price') & kx.Column('date'),
+ ... where=(kx.Column('sym') == 'AAPL') & (kx.Column('date') == date(2022, 1, 1)))
+ pykx.Dictionary(pykx.q('
+ price| 145.6259 636.4009 8..
+ date | 2022.01.01 2022.01.01 2..
+ '))
+ ```
+
+#### by
+
+The [by phrase](https://code.kx.com/q/basics/qsql/#aggregates) allows you to apply aggregations or manipulate data grouping the data `by` specific conditions.
+
+By default this parameter has a value `None` which is equivalent to not grouping your data. This parameter is supported for `select`, `exec` and `update` type queries.
+
+When both a `columns` and `by` clause are passed to a select query without use of an aggregation function then each row contains vectors of data related to the `by` columns.
+
+```python
+>>> trades.select(columns=kx.Column('price'), by=kx.Column('date') & kx.Column('sym'))
+pykx.KeyedTable(pykx.q('
+date sym | price ..
+---------------| ------------------------------------------------------------..
+2022.01.01 AAPL| 131.6095 236.3145 140.4332 839.3869 843.3531 641.2171 104.81..
+2022.01.01 GOOG| 480.9078 454.5668 790.2208 296.6022 727.6113 341.9665 609.77..
+2022.01.01 MSFT| 556.9152 755.6175 865.9657 714.9804 179.5444 149.734 67.0821..
+2022.01.02 AAPL| 441.8975 379.1373 659.8286 531.1731 975.3188 613.6512 603.99..
+2022.01.02 GOOG| 446.898 664.8273 648.3929 240.1062 119.6 774.3718 449.4149 8..
+2022.01.02 MSFT| 699.0336 387.7172 588.2985 725.8795 842.5805 646.37 593.7708..
+2022.01.03 AAPL| 793.2503 621.7243 570.4403 626.2866 263.992 153.475 123.7397..
+2022.01.03 GOOG| 586.263 777.3633 834.1404 906.9809 617.6205 179.6328 100.041..
+2022.01.03 MSFT| 633.3324 39.47309 682.9453 867.1843 483.0873 851.2139 318.93..
+'))
+```
+
+Adding an aggregation function allows this aggregation to be run on a column within the `by` phrase
+
+```python
+>>> trades.select(columns=kx.Column('price').max(), by=kx.Column('date') & kx.Column('sym'))
+pykx.KeyedTable(pykx.q('
+date sym | price
+---------------| --------
+2022.01.01 AAPL| 843.3531
+2022.01.01 GOOG| 790.2208
+2022.01.01 MSFT| 865.9657
+2022.01.02 AAPL| 975.3188
+2022.01.02 GOOG| 886.0093
+2022.01.02 MSFT| 993.6284
+2022.01.03 AAPL| 843.9354
+2022.01.03 GOOG| 914.6929
+2022.01.03 MSFT| 867.1843
+'))
+```
+
+Using a `by` clause within an update allows you to modify the values of the table conditionally based on your grouped criteria, for example:
+
+```python
+>>> trades.update(columns=kx.Column('price').wavg(kx.Column('size')).name('vwap'),
+... by=kx.Column('sym'))
+pykx.Table(pykx.q('
+sym date price size vwap
+--------------------------------------
+AAPL 2022.01.01 145.6259 19 56.09317
+MSFT 2022.01.02 533.9187 92 40.46716
+MSFT 2022.01.02 17.17696 7 40.46716
+GOOG 2022.01.03 916.1286 60 52.721
+AAPL 2022.01.02 140.0383 54 56.09317
+..
+'))
+```
+
+
+??? Note "What happens without a columns clause"
+
+ Using `by` without an associated `columns` clause will return the last row in the table for each column in the `by` phrase.
+
+ ```python
+ >>> trades.select(by=kx.Column('sym'))
+ pykx.KeyedTable(pykx.q('
+ sym | date price
+ ----| -------------------
+ AAPL| 2022.01.02 955.4843
+ GOOG| 2022.01.02 886.0093
+ MSFT| 2022.01.01 719.9879
+ '))
+ ```
+
+#### inplace
+
+The `inplace` keyword provides the ability for a user to overwrite the representation of the object which they are querying.
+This functionality is set to `False` by default but will operate effectively on in-memory table objects for the `select`, `update` and `delete` query types.
+
+If set to `True` the input table can be overwritten as follows
+
+```python
+>>> trades.delete(where=kx.Column('sym').isin(['AAPL']), inplace=True)
+pykx.Table(pykx.q('
+sym date price size
+-----------------------------
+MSFT 2022.01.02 533.9187 92
+MSFT 2022.01.02 17.17696 7
+GOOG 2022.01.03 916.1286 60
+MSFT 2022.01.03 282.4291 98
+MSFT 2022.01.03 46.11964 93
+..
+'))
+>>> trades
+pykx.Table(pykx.q('
+sym date price size
+-----------------------------
+MSFT 2022.01.02 533.9187 92
+MSFT 2022.01.02 17.17696 7
+GOOG 2022.01.03 916.1286 60
+MSFT 2022.01.03 282.4291 98
+MSFT 2022.01.03 46.11964 93
+..
+'))
+```
+
+### Query Types
+
+While this page discusses primarily the Pythonic API for querying kdb+ tables locally. The following describes some of the other ways that queries can be completed
+
+#### Local Queries
+
+qSQL equivalent query for comparison:
+
+```python
+>>> kx.q('select from trades where price=max price')
+pykx.Table(pykx.q('
+sym date price
+------------------------
+AAPL 2022.01.01 983.0794
+'))
+```
+
+Access query API off the table object:
+
+```python
+>>> trades.select(where=kx.Column('price') == kx.Column('price').max())
+pykx.Table(pykx.q('
+sym date price
+------------------------
+AAPL 2022.01.01 983.0794
+'))
+```
+
+Direct use of the `kx.q.qsql` query APIs taking the table as a parameter:
+
+```python
+>>> kx.q.qsql.select(trades, where=kx.Column('price') == kx.Column('price').max())
+pykx.Table(pykx.q('
+sym date price
+------------------------
+AAPL 2022.01.01 983.0794
+'))
+```
+
+Passing a string will query the table of that name in q memory:
+
+```python
+>>> kx.q.qsql.select('trades', where=kx.Column('price') == kx.Column('price').max())
+pykx.Table(pykx.q('
+sym date price
+------------------------
+AAPL 2022.01.01 983.0794
+'))
+```
+
+#### Remote Queries
+
+Queries can also be performed over [IPC](../../advanced/ipc.md) to remote servers.
+
+```python
+>>> conn = kx.SyncQConnection(port = 5000)
+>>> conn.qsql.select('trades', where=kx.Column('price') == kx.Column('price').max())
+pykx.Table(pykx.q('
+sym date price
+------------------------
+AAPL 2022.01.01 983.0794
+'))
+```
+
+## Query Classes
+
+### Column
+
+See [pykx.Column](../../../api/pykx-q-data/wrappers.md#pykx.wrappers.Column) for full documentation on this class.
+
+#### And operator `&`
+
+Using `&` on two `Column` objects will return a `QueryPhrase` which describes the underlying construct which is used to query your table.
+
+```python
+>>> qp =(kx.Column('sym') == 'GOOG') & (kx.Column('price') > 500)
+>>> type(qp)
+
+>>> qp.phrase
+[[pykx.Operator(pykx.q('=')), 'sym', [pykx.SymbolAtom(pykx.q('`GOOG'))]], [pykx.Operator(pykx.q('>')), 'price', pykx.LongAtom(pykx.q('500'))]]
+>>> trades.select(where=qp)
+pykx.Table(pykx.q('
+sym date price
+------------------------
+GOOG 2022.01.03 976.1246
+GOOG 2022.01.02 716.2858
+GOOG 2022.01.03 872.5027
+GOOG 2022.01.02 962.5156
+GOOG 2022.01.01 589.7202
+..
+'))
+```
+
+Additional `Column` objects can `&` off a `QueryPhrase` to further build up more complex queries.
+
+#### Or operator `|`
+
+Using `|` on two `Column` objects will return a `Column` object.
+
+```python
+>>> c =(kx.Column('price') < 100) | (kx.Column('price') > 500)
+>>> type(c)
+
+>>> c.phrase
+[pykx.Operator(pykx.q('|')), [pykx.Operator(pykx.q('<')), 'price', pykx.LongAtom(pykx.q('100'))], [pykx.Operator(pykx.q('>')), 'price', pykx.LongAtom(pykx.q('500'))]]
+>>> trades.select(where=c)
+pykx.Table(pykx.q('
+sym date price
+------------------------
+AAPL 2022.01.01 542.6371
+AAPL 2022.01.01 77.57332
+MSFT 2022.01.01 637.4637
+GOOG 2022.01.03 976.1246
+MSFT 2022.01.03 539.6816
+..
+'))
+```
+
+!!! Note "`or` / `|` operator restriction"
+
+ `Column` objects can not apply `or` or `|` off a `QueryPhrase`. Presently these are restricted only to operations on two `kx.Column` phrases.
+
+#### Python operators
+
+The following Python operators can be used with the `Column` class to perform analysis on your data
+
+| Python operator | q operation | Magic method |
+| --------------- | ------------ | --------------- |
+| `+` | `+` | `__add__` |
+| `-` | `-` | `__sub__` |
+| `-` | `-` | `__rsub__` |
+| `*` | `*` | `__mul__` |
+| `/` | `%` | `__truediv__` |
+| `/` | `%` | `__rtruediv__` |
+| `//` | `div` | `__floordiv__` |
+| `//` | `div` | `__rfloordiv__` |
+| `%` | `mod` | `___mod__` |
+| `**` | `xexp` | `__pow__` |
+| `==` | `=` | `__eq__` |
+| `!=` | `<>` | `__ne__` |
+| `>` | `>` | `__gt__` |
+| `>=` | `>=` | `__ge__` |
+| `<` | `<` | `__lt__` |
+| `<=` | `<=` | `__le__` |
+| `pos` | `abs` | `__pos__` |
+| `neg` | `neg` | `__neg__` |
+| `floor` | `floor` | `__floor__` |
+| `ceil` | `ceiling` | `__ceil__` |
+| `abs` | `abs` | `__abs__` |
+
+The following are a few examples of this various operations in use
+
+1. Finding rows where `price` is greater than or equal to half the average price:
+
+ ```python
+ >>> trades.select(where=kx.Column('price') >= kx.Column('price').avg() / 2)
+ pykx.Table(pykx.q('
+ sym date price
+ ------------------------
+ AAPL 2022.01.01 542.6371
+ MSFT 2022.01.01 637.4637
+ GOOG 2022.01.03 976.1246
+ MSFT 2022.01.03 539.6816
+ GOOG 2022.01.02 716.2858
+ ..
+ '))
+ ```
+
+2. Apply the `math` libraries `floor` operation on the column price updating it's value
+
+ ```python
+ >>> from math import floor
+ >>> trades.update(floor(kx.Column('price')))
+ pykx.Table(pykx.q('
+ sym date price size
+ --------------------------
+ AAPL 2022.01.01 145 19
+ MSFT 2022.01.02 533 92
+ MSFT 2022.01.02 17 7
+ GOOG 2022.01.03 916 60
+ AAPL 2022.01.02 140 54
+ ..
+ '))
+ ```
+
+#### PyKX methods
+
+In addition to support for the Python operators outlined above PyKX provides a number of analytic methods and properties for the `kx.Column` objects. In total there are more than 100 analytic methods supported ranging from a basic method to retrieve the maximum value of a column, to more complex analytics for the calculation of the weighted average between two vectors.
+
+The following drop-down provides a list of the supported methods, with full details on the API page [here](../../../api/columns.md).
+
+??? Note "Supported methods"
+
+ [`abs`](../../../api/columns.md#pykx.wrappers.Column.abs), [`acos`](../../../api/columns.md#pykx.wrappers.Column.acos), [`asc`](../../../api/columns.md#pykx.wrappers.Column.asc), [`asin`](../../../api/columns.md#pykx.wrappers.Column.asin), [`atan`](../../../api/columns.md#pykx.wrappers.Column.atan), [`avg`](../../../api/columns.md#pykx.wrappers.Column.avg), [`avgs`](../../../api/columns.md#pykx.wrappers.Column.avgs), [`ceiling`](../../../api/columns.md#pykx.wrappers.Column.ceiling), [`cor`](../../../api/columns.md#pykx.wrappers.Column.cor), [`cos`](../../../api/columns.md#pykx.wrappers.Column.cos), [`count`](../../../api/columns.md#pykx.wrappers.Column.count), [`cov`](../../../api/columns.md#pykx.wrappers.Column.cov), [`cross`](../../../api/columns.md#pykx.wrappers.Column.cross), [`deltas`](../../../api/columns.md#pykx.wrappers.Column.deltas), [`desc`](../../../api/columns.md#pykx.wrappers.Column.desc), [`dev`](../../../api/columns.md#pykx.wrappers.Column.dev), [`differ`](../../../api/columns.md#pykx.wrappers.Column.differ), [`distinct`](../../../api/columns.md#pykx.wrappers.Column.distinct), [`div`](../../../api/columns.md#pykx.wrappers.Column.div), [`exp`](../../../api/columns.md#pykx.wrappers.Column.exp), [`fills`](../../../api/columns.md#pykx.wrappers.Column.fills), [`first`](../../../api/columns.md#pykx.wrappers.Column.first), [`floor`](../../../api/columns.md#pykx.wrappers.Column.floor), [`null`](../../../api/columns.md#pykx.wrappers.Column.null), [`iasc`](../../../api/columns.md#pykx.wrappers.Column.iasc), [`idesc`](../../../api/columns.md#pykx.wrappers.Column.idesc), [`inter`](../../../api/columns.md#pykx.wrappers.Column.inter), [`isin`](../../../api/columns.md#pykx.wrappers.Column.isin), [`last`](../../../api/columns.md#pykx.wrappers.Column.last), [`like`](../../../api/columns.md#pykx.wrappers.Column.like), [`log`](../../../api/columns.md#pykx.wrappers.Column.log), [`lower`](../../../api/columns.md#pykx.wrappers.Column.lower), [`ltrim`](../../../api/columns.md#pykx.wrappers.Column.ltrim), [`mavg`](../../../api/columns.md#pykx.wrappers.Column.mavg), [`max`](../../../api/columns.md#pykx.wrappers.Column.max), [`maxs`](../../../api/columns.md#pykx.wrappers.Column.maxs), [`mcount`](../../../api/columns.md#pykx.wrappers.Column.mcount), [`md5`](../../../api/columns.md#pykx.wrappers.Column.md5), [`mdev`](../../../api/columns.md#pykx.wrappers.Column.mdev), [`med`](../../../api/columns.md#pykx.wrappers.Column.med), [`min`](../../../api/columns.md#pykx.wrappers.Column.min), [`mins`](../../../api/columns.md#pykx.wrappers.Column.mins), [`mmax`](../../../api/columns.md#pykx.wrappers.Column.mmax), [`mmin`](../../../api/columns.md#pykx.wrappers.Column.mmin), [`mod`](../../../api/columns.md#pykx.wrappers.Column.mod), [`msum`](../../../api/columns.md#pykx.wrappers.Column.msum), [`neg`](../../../api/columns.md#pykx.wrappers.Column.neg), [`prd`](../../../api/columns.md#pykx.wrappers.Column.prd), [`prds`](../../../api/columns.md#pykx.wrappers.Column.prds), [`prev`](../../../api/columns.md#pykx.wrappers.Column.prev), [`rank`](../../../api/columns.md#pykx.wrappers.Column.rank), [`ratios`](../../../api/columns.md#pykx.wrappers.Column.ratios), [`reciprocal`](../../../api/columns.md#pykx.wrappers.Column.reciprocal), [`reverse`](../../../api/columns.md#pykx.wrappers.Column.reverse), [`rotate`](../../../api/columns.md#pykx.wrappers.Column.rotate), [`rtrim`](../../../api/columns.md#pykx.wrappers.Column.rtrim), [`scov`](../../../api/columns.md#pykx.wrappers.Column.scov), [`sdev`](../../../api/columns.md#pykx.wrappers.Column.sdev), [`signum`](../../../api/columns.md#pykx.wrappers.Column.signum), [`sin`](../../../api/columns.md#pykx.wrappers.Column.sin), [`sqrt`](../../../api/columns.md#pykx.wrappers.Column.sqrt), [`string`](../../../api/columns.md#pykx.wrappers.Column.string), [`sum`](../../../api/columns.md#pykx.wrappers.Column.sum), [`sums`](../../../api/columns.md#pykx.wrappers.Column.sums), [`svar`](../../../api/columns.md#pykx.wrappers.Column.svar), [`tan`](../../../api/columns.md#pykx.wrappers.Column.tan), [`trim`](../../../api/columns.md#pykx.wrappers.Column.trim), [`union`](../../../api/columns.md#pykx.wrappers.Column.union), [`upper`](../../../api/columns.md#pykx.wrappers.Column.upper), [`var`](../../../api/columns.md#pykx.wrappers.Column.var), [`wavg`](../../../api/columns.md#pykx.wrappers.Column.wavg), [`within`](../../../api/columns.md#pykx.wrappers.Column.within), [`wsum`](../../../api/columns.md#pykx.wrappers.Column.wsum), [`xbar`](../../../api/columns.md#pykx.wrappers.Column.xbar), [`xexp`](../../../api/columns.md#pykx.wrappers.Column.xexp), [`xlog`](../../../api/columns.md#pykx.wrappers.Column.xlog), [`xprev`](../../../api/columns.md#pykx.wrappers.Column.xprev), [`hour`](../../../api/columns.md#pykx.wrappers.Column.hour), [`minute`](../../../api/columns.md#pykx.wrappers.Column.minute), [`date`](../../../api/columns.md#pykx.wrappers.Column.date), [`year`](../../../api/columns.md#pykx.wrappers.Column.year), [`month`](../../../api/columns.md#pykx.wrappers.Column.month), [`second`](../../../api/columns.md#pykx.wrappers.Column.second), [`add`](../../../api/columns.md#pykx.wrappers.Column.add), [`name`](../../../api/columns.md#pykx.wrappers.Column.name), [`average`](../../../api/columns.md#pykx.wrappers.Column.average), [`cast`](../../../api/columns.md#pykx.wrappers.Column.cast), [`correlation`](../../../api/columns.md#pykx.wrappers.Column.correlation), [`covariance`](../../../api/columns.md#pykx.wrappers.Column.covariance), [`divide`](../../../api/columns.md#pykx.wrappers.Column.divide), [`drop`](../../../api/columns.md#pykx.wrappers.Column.drop), [`fill`](../../../api/columns.md#pykx.wrappers.Column.fill), [`index_sort`](../../../api/columns.md#pykx.wrappers.Column.index_sort), [`join`](../../../api/columns.md#pykx.wrappers.Column.join), [`len`](../../../api/columns.md#pykx.wrappers.Column.len), [`modulus`](../../../api/columns.md#pykx.wrappers.Column.modulus), [`multiply`](../../../api/columns.md#pykx.wrappers.Column.multiply), [`next_item`](../../../api/columns.md#pykx.wrappers.Column.next_item), [`previous_item`](../../../api/columns.md#pykx.wrappers.Column.previous_item), [`product`](../../../api/columns.md#pykx.wrappers.Column.product), [`products`](../../../api/columns.md#pykx.wrappers.Column.products), [`sort`](../../../api/columns.md#pykx.wrappers.Column.sort), [`subract`](../../../api/columns.md#pykx.wrappers.Column.subract), [`take`](../../../api/columns.md#pykx.wrappers.Column.take), [`value`](../../../api/columns.md#pykx.wrappers.Column.value) and [`variance`](../../../api/columns.md#pykx.wrappers.Column.variance).
+
+The following provides a complex example of a user generated query to calculate trade statistics and time-weighted average spread information associated with a Trade and Quote tables making use of the following methods.
+
+- [`distinct`](../../../api/columns.md#pykx.wrappers.Column.distinct)
+- [`next_item`](../../../api/columns.md#pykx.wrappers.Column.next_item)
+- [`wavg`](../../../api/columns.md#pykx.wrappers.Column.wavg)
+- [`max`](../../../api/columns.md#pykx.wrappers.Column.max)
+- [`min`](../../../api/columns.md#pykx.wrappers.Column.min)
+- [`isin`](../../../api/columns.md#pykx.wrappers.Column.isin)
+- [`within`](../../../api/columns.md#pykx.wrappers.Column.witin)
+- [`avg`](../../../api/columns.md#pykx.wrappers.Column.avg)
+- [`name`](../../../api/columns.md#pykx.wrappers.Column.name)
+
+```python
+def generate_twap(trade, quote, start_time, end_time, syms = None):
+ if syms is None:
+ syms = trade.exec(kx.Column('sym').distinct())
+
+ quote_metrics = quote.select(
+ columns = (kx.Column('ask') - kx.Column('bid')).avg().name('avg_spread') &
+ (kx.Column('time').next_item() - kx.Column('time')).wavg(kx.Column('ask') - kx.Column('bid')).name('twa_spread') &
+ ((kx.Column('asize') + kx.Column('bsize')).avg().name('avg_size') * 0.5) &
+ (kx.Column('time').next_item() - kx.Column('time')).avg().name('avg_duration'),
+ by = kx.Column('sym'),
+ where = kx.Column('sym').isin(syms) & kx.Column('time').within(start_time, end_time)
+ )
+
+ trade_metrics = trade.select(
+ columns = (2 * kx.Column('price').dev()).name('std_dev') &
+ (kx.Column('time').next_item() - kx.Column('time')).wavg(kx.Column('price')).name('std_dev') &
+ kx.Column('price').max().name('max_price') &
+ kx.Column('price').min().name('min_price') &
+ kx.Column('size').wavg(kx.Column('price')).name('vwap'),
+ by = kx.Column('sym'),
+ where = kx.Column('sym').isin(syms) & kx.Column('time').within(start_time, end_time)
+ )
+
+ return kx.q.uj(quote_metrics, trade_metrics)
+```
+
+### Variable
+
+See [pykx.Variable](../../../api/pykx-q-data/wrappers.md#pykx.wrappers.Variable) for full documentation on this class.
+
+In some cases when operating at the interface of q and Python analytics you may wish to perform a comparison or analytic which makes use of a named variable from `q`.
+
+The following example shows this in action
+
+```python
+>>> kx.q['filter']='GOOG'
+>>> trades.select(where=kx.Column('sym') == kx.Variable('filter'))
+pykx.Table(pykx.q('
+sym date price
+------------------------
+GOOG 2022.01.03 976.1246
+GOOG 2022.01.02 716.2858
+GOOG 2022.01.03 872.5027
+GOOG 2022.01.02 962.5156
+GOOG 2022.01.01 589.7202
+..
+'))
+```
+
+## Advanced features
+
+### Custom Functions
+
+While there is an extensive list of functions/analytics that are supported by the API it does not cover all analytics that you, or users of an extension you are writing may need.
+
+To facilitate this you have access to the [pykx.register.column_function](../../../api/pykx-q-data/register.md#pykx.register.column_function), this function provides the ability to define methods off your defined `pykx.Column` objects. This function should take the column on which the function is being performed as it's first argument and the `call` method should be used to apply your analytic.
+
+The `call` method takes as it's first argument the function you wish to apply and can take multiple positional arguments,
+
+For example take the following cases:
+
+- Define a function applying a min-max scaling against the price column of a table
+
+ ```python
+ >>> def min_max_scaler(column):
+ ... return column.call('{(x-minData)%max[x]-minData:min x}')
+ >>> kx.register.column_function('minmax', min_max_scaler)
+ >>> trades.update(kx.Column('price').minmax().name('scaled_price'))
+ pykx.Table(pykx.q('
+ sym date price size scaled_price
+ ------------------------------------------
+ MSFT 2022.01.02 533.9187 92 0.5337153
+ MSFT 2022.01.02 17.17696 7 0.004702399
+ GOOG 2022.01.03 916.1286 60 0.9250016
+ MSFT 2022.01.03 282.4291 98 0.2762535
+ MSFT 2022.01.03 46.11964 93 0.03433238
+ ..
+ '))
+ ```
+
+- Define a function which multiplies two columns together and calculates the log returning the result
+
+ ```python
+ >>> def log_multiply(column1, column2):
+ ... return column1.call('{log x*y}', column2)
+ >>> kx.register.column_function('log_multiply', log_multiply)
+ >>> trades.select(kx.Column('price').log_multiply(kx.Column('size')))
+ pykx.Table(pykx.q('
+ price
+ --------
+ 10.80203
+ 4.789479
+ 10.9145
+ 10.22839
+ 8.363838
+ ..
+ '))
+ ```
+
+### fby queries
+
+Complex queries often require the application of a function on data grouped by some condition, in many cases the application of a `by` clause will be sufficient to get the information you need, however you will run into cases where you need to filter-by a certain condition.
+
+Take for example the case where you want to find the stock information by symbol where the price is the maximum price
+
+```python
+>>> trades.select(where=kx.Column('price') == kx.Column.fby(kx.Column('sym'), kx.q.max, kx.Column('price')))
+pykx.Table(pykx.q('
+sym date price size
+-----------------------------
+MSFT 2022.01.03 977.1655 92
+AAPL 2022.01.02 996.8898 20
+GOOG 2022.01.03 971.9498 47
+'))
+```
+
+### Using iterators
+
+Not all analytics that you may wish to run on your table will expect to take the full content of a column(s) as input, fo
+r example in some cases you may wish to apply an analytic on each row of a column. While operations which rely on iterato
+rs may be slower than purely vectorised operations they may be necessary.
+
+PyKX supports the following iterators, a number of examples are provided below
+
+| Iterator | Type | Link |
+| :------- | :---------- | :------------------------------------------------------------------------------- |
+| `each` | map | [Each](https://code.kx.com/q/ref/maps/#each) |
+| `peach` | map | [Peach](https://code.kx.com/q/ref/maps/#peach-keyword) |
+| `\:` | map | [Each Left](https://code.kx.com/q/ref/maps/#each-left-and-each-right) |
+| `/:` | map | [Each Right](https://code.kx.com/q/ref/maps/#each-left-and-each-right) |
+| `\:/:` | map | [Each Left-Each Right](https://code.kx.com/q/ref/maps/#each-left-and-each-right) |
+| `/:\:` | map | [Each Right-Each Left](https://code.kx.com/q/ref/maps/#each-left-and-each-right) |
+| `'` | map | [Case](https://code.kx.com/q/ref/maps/#case) |
+| `':` | map | [Each Prior](https://code.kx.com/q/ref/maps/#each-prior) |
+| `/` | accumulator | [Over](https://code.kx.com/q/ref/over/) |
+| `\` | accumulator | [Scan](https://code.kx.com/q/ref/scan/) |
+
+#### Example 1
+
+Calculate the maximum value of each row of a column `x`
+
+```python
+>>> table = kx.Table(data={'x': [[10, 5, 4], [20, 30, 50], [1, 2, 3]]})
+pykx.Table(pykx.q('
+x
+--------
+10 5 4
+20 30 50
+1 2 3
+'))
+>>> table.select(kx.Column('x').max(iterator='each'))
+pykx.Table(pykx.q('
+x
+--
+10
+50
+3
+'))
+```
+
+#### Example 2
+
+Join the characters associated from two columns row wise using the `'` iterator
+
+```python
+>>> table = kx.Table(data={'x': b'abc', 'y': b'def'})
+pykx.Table(pykx.q('
+x y
+---
+a d
+b e
+c f
+'))
+>>> table.select(kx.Column('x').join(kx.Column('y'), iterator="'"))
+pykx.Table(pykx.q('
+x
+----
+"ad"
+"be"
+"cf"
+'))
+```
+
+#### Example 3
+
+Join the characters `"_xy"` to all rows in a column `x`
+
+```python
+>>> table = kx.Table(data={'x': b'abc', 'y': b'def'})
+pykx.Table(pykx.q('
+x y
+---
+a d
+b e
+c f
+'))
+>>> table.select(kx.Column('x').join(b"_xy", iterator='\\:'))
+pykx.Table(pykx.q('
+x
+------
+"a_xy"
+"b_xy"
+"c_xy"
+'))
+```
+
+## Next Steps
+
+Now that you have learnt how to query your data using the Pythonic API you may be interested in other methods for querying your data:
+
+- If you wish to query your data using SQL, you can follow the introduction to this functionality [here](./sql.md).
+- If you want to upskill and learn how to query directly using q you can follow [this page](./qquery.md).
+- To learn how to make your queries more performant following the tips and tricks [here](./perf.md).
+
+For some further reading, here are some related topics:
+
+- If you don't have a historical database available see [here](../../advanced/database/index.md).
+- To learn about creating PyKX Table objects see [here](../../../examples/interface-overview.ipynb).
diff --git a/docs/user-guide/fundamentals/query/qquery.md b/docs/user-guide/fundamentals/query/qquery.md
new file mode 100644
index 0000000..1860405
--- /dev/null
+++ b/docs/user-guide/fundamentals/query/qquery.md
@@ -0,0 +1,89 @@
+---
+title: Querying data using PyKX
+description: Introduction to the concept of querying PyKX databases and tables
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, query, historical, SQL, qSQL
+---
+
+# Querying data using qSQL with PyKX
+
+_This page explains how to query your data with PyKX using qSQL._
+
+## Querying tables using qSQL
+
+Creating a sample table:
+
+```python
+>>> import pykx as kx
+>>> trades = kx.Table(data={
+ 'sym': kx.random.random(100, ['AAPL', 'GOOG', 'MSFT']),
+ 'date': kx.random.random(100, kx.q('2022.01.01') + [0,1,2]),
+ 'price': kx.random.random(100, 1000.0)
+ })
+
+>>> # Assign the table to a named object in q memory to allow name based query later
+>>> kx.q['trades'] = trades
+```
+
+Query a table, by name, using [qSQL](https://code.kx.com/q/basics/qsql/):
+
+```python
+>>> kx.q('select from trades')
+pykx.Table(pykx.q('
+sym date price
+------------------------
+GOOG 2022.01.02 805.0147
+AAPL 2022.01.03 847.6275
+AAPL 2022.01.03 329.8159
+GOOG 2022.01.02 982.5155
+MSFT 2022.01.02 724.9456
+..
+'))
+```
+
+Query a [pykx.Table](../../../api/pykx-q-data/wrappers.md#pykx.wrappers.Table) [passing it as an argument](../../../user-guide/fundamentals/evaluating.md#application-of-functions-taking-multiple-arguments):
+
+```q
+>>> kx.q('{select from x}', trades)
+pykx.Table(pykx.q('
+sym date price
+------------------------
+GOOG 2022.01.02 805.0147
+AAPL 2022.01.03 847.6275
+AAPL 2022.01.03 329.8159
+GOOG 2022.01.02 982.5155
+MSFT 2022.01.02 724.9456
+..
+'))
+```
+
+Passing multiple arguments:
+
+```python
+>>> from datetime import date
+>>> kx.q('{[x;y] select from trades where date = x, price < y}', date(2022, 1, 2), 500.0)
+pykx.Table(pykx.q('
+sym date price
+------------------------
+GOOG 2022.01.02 214.9847
+AAPL 2022.01.02 126.2957
+AAPL 2022.01.02 184.4151
+AAPL 2022.01.02 217.0378
+GOOG 2022.01.02 423.6121
+..
+'))
+```
+
+## Next Steps
+
+Now that you have learnt how to query your data using the Pythonic API you may be interested in other methods for querying your data:
+
+- If you want to query your data in a more Python-first way follow the guide [here](./pyquery.md).
+- If you wish to query your data using SQL, you can follow the introduction to this functionality [here](./sql.md).
+- To learn how to make your queries more performant following the tips and tricks [here](./perf.md).
+
+For some further reading, here are some related topics:
+
+- If you don't have a historical database available see [here](../../advanced/database/index.md).
+- To learn about creating PyKX Table objects see [here](../../../examples/interface-overview.ipynb).
diff --git a/docs/user-guide/fundamentals/query/sql.md b/docs/user-guide/fundamentals/query/sql.md
new file mode 100644
index 0000000..9557816
--- /dev/null
+++ b/docs/user-guide/fundamentals/query/sql.md
@@ -0,0 +1,131 @@
+---
+title: Querying data using SQL with PyKX
+description: Introduction to querying data using SQL with PyKX
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, q, query, historical, SQL, qSQL
+---
+
+# Querying data using SQL with PyKX
+
+_This page explains how to query your data with PyKX using SQL._
+
+PyKX exposes a wrapper around the [KX Insights Core ANSI SQL interface](https://code.kx.com/insights/core/sql.html).
+This allows SQL to be used to query in-memory and on-disk data.
+
+The interface is accessed through the `kx.q.sql` class or via the `sql` method on table type objects. Full documentation of the class is included [here](../../../api/query.md#pykx.query.SQL).
+
+## Loading the SQL interface
+
+When you `import pykx as kx` an attempt will be made to load the SQL interface. If this fails you will see:
+
+```python
+WARN: Failed to load KX Insights Core library 's.k'.
+```
+
+To debug this you can set the [configuration option](../../configuration.md) `PYKX_DEBUG_INSIGHTS_LIBRARIES` before importing PyKX:
+
+```python
+import os
+os.environ['PYKX_DEBUG_INSIGHTS_LIBRARIES'] = 'true'
+import pykx as kx
+```
+
+This will print a more detailed error message, for example:
+
+```python
+PyKXWarning: Failed to load KX Insights Core library 's.k': s.k_. OS reports: No such file or directory
+```
+
+## Querying tables using SQL
+
+Creating a sample table:
+
+```python
+>>> import pykx as kx
+>>> trades = kx.Table(data={
+ 'sym': kx.random.random(100, ['AAPL', 'GOOG', 'MSFT']),
+ 'date': kx.random.random(100, kx.q('2022.01.01') + [0,1,2]),
+ 'price': kx.random.random(100, 1000.0)
+ })
+
+>>> kx.q['trades'] = trades
+```
+
+Query a table by name:
+
+```python
+>>> kx.q.sql('select * from trades')
+pykx.Table(pykx.q('
+sym date price
+------------------------
+GOOG 2022.01.02 805.0147
+AAPL 2022.01.03 847.6275
+AAPL 2022.01.03 329.8159
+GOOG 2022.01.02 982.5155
+MSFT 2022.01.02 724.9456
+..
+'))
+```
+
+Query a [pykx.Table](../../../api/pykx-q-data/wrappers.md#pykx.wrappers.Table) instance by injecting it as the first argument using `$n` syntax:
+
+```python
+>>> kx.q.sql('select * from $1', trades)
+pykx.Table(pykx.q('
+sym date price
+------------------------
+GOOG 2022.01.02 805.0147
+AAPL 2022.01.03 847.6275
+AAPL 2022.01.03 329.8159
+GOOG 2022.01.02 982.5155
+MSFT 2022.01.02 724.9456
+..
+'))
+```
+
+Similarly you can use argument injection when using the `sql` method on your `trades` table object as follows:
+
+```python
+>>> trades.sql('select * from $1')
+pykx.Table(pykx.q('
+sym date price
+------------------------
+GOOG 2022.01.02 805.0147
+AAPL 2022.01.03 847.6275
+AAPL 2022.01.03 329.8159
+GOOG 2022.01.02 982.5155
+MSFT 2022.01.02 724.9456
+..
+'))
+```
+
+Passing multiple arguments using `$n` syntax:
+
+```python
+>>> from datetime import date
+>>> kx.q.sql('select * from trades where date = $1 and price < $2', date(2022, 1, 2), 500.0)
+pykx.Table(pykx.q('
+sym date price
+------------------------
+GOOG 2022.01.02 214.9847
+AAPL 2022.01.02 126.2957
+AAPL 2022.01.02 184.4151
+AAPL 2022.01.02 217.0378
+GOOG 2022.01.02 423.6121
+..
+'))
+```
+
+## Next Steps
+
+Now that you have learnt how to query your data using the Pythonic API you may be interested in other methods for querying your data:
+
+- To optimize frequently called SQL queries the [prepare](../../../api/query.md#pykx.query.SQL.prepare) and [execute](../../../api/query.md#pykx.query.SQL.execute) can be used to separate SQL parsing from query execution as detailed [here](https://code.kx.com/insights/1.10/core/sql.html#prepare-and-execute).
+- If you want to query your data in a more Python-first way follow the guide [here](./pyquery.md).
+- To learn how to make your queries more performant following the tips and tricks [here](./perf.md).
+
+For some further reading, here are some related topics:
+
+- If you don't have a historical database available see [here](../../advanced/database/index.md).
+- To learn about creating PyKX Table objects see [here](../../../examples/interface-overview.ipynb).
diff --git a/docs/user-guide/fundamentals/temporal.md b/docs/user-guide/fundamentals/temporal.md
new file mode 100644
index 0000000..bc91155
--- /dev/null
+++ b/docs/user-guide/fundamentals/temporal.md
@@ -0,0 +1,140 @@
+---
+title: Convert temporal data types in PyKX
+description: How to convert temporal data types in PyKX
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, data, convert
+---
+
+# Convert temporal data types in PyKX
+
+_This page provides details on how to convert temporal data types in PyKX._
+
+Converting temporal data types in PyKX involves handling timestamp/datetime types and duration types.
+
+### Timestamp/Datetime types
+
+When converting temporal types, note that Python and q use different [epoch](https://en.wikipedia.org/wiki/Epoch_(computing)) values:
+
+* q: Epoch starts at 2000.
+* Python: Epoch starts at 1970.
+
+!!! Note
+
+ The following details focus on `#!python NumPy` but similar considerations should be taken into account when converting Python, Pandas, and PyArrow objects.
+ The [Nulls and Infinities](./nulls_and_infinities.md) page should also be consulted for information.
+
+The 30-year epoch offset means there are times which are unreachable in one or the other language:
+
+| | **TimestampVector** | **datetime64[ns]** |
+|---------------|---------------------------------|---------------------------------|
+| Minimum value | `1707.09.22D00:12:43.145224194` | `1677-09-21T00:12:43.145224194` |
+| Maximum value | `2292.04.10D23:47:16.854775806` | `2262-04-11T23:47:16.854775807` |
+
+As such the range of times which can be directly converted should be considered:
+
+* Minimum value: `#!python 1707-09-22T00:12:43.145224194`
+* Maximum value: `#!python 2262-04-11T23:47:16.854775807`
+
+As mentioned on the [Nulls and infinites](nulls_and_infinities.md)page, most q data types have null, negative infinity, and infinity values.
+
+| | **q representation** | **datetime64[ns]** |
+|-------------------|------------------|---------------------------------|
+| Null | `0Np` | `NaT` |
+| Negative Infinity | `-0Wp` | `1707-09-22T00:12:43.145224193` |
+| Infinity | `0Wp` | Overflow cannot be represented |
+
+Converting from q to NumPy using `#!python .np()`, `#!python 0Np` and `#!python -0Wp` results in meaningful values. However, using `#!python 0Wp` causes an overflow:
+
+```q
+>>> kx.q('0N -0W 0Wp').np()
+array(['NaT', '1707-09-22T00:12:43.145224193', '1707-09-22T00:12:43.145224191'], dtype='datetime64[ns]')
+```
+
+Converting to q using `#!python toq` by default, results in meaningful values only for the NumPy maximum values:
+
+```q
+>>> arr = np.array(['NaT', '1677-09-21T00:12:43.145224194', '2262-04-11T23:47:16.854775807'], dtype='datetime64[ns]')
+>>> kx.toq(arr)
+pykx.TimestampVector(pykx.q('2262.04.11D23:47:16.854775808 2262.04.11D23:47:16.854775810 2262.04.11D23:47:16.854775807'))
+```
+
+To additionally convert `#!python NaT`, use the `#!python handle_nulls` keyword:
+
+```q
+>>> arr = np.array(['NaT', '1677-09-21T00:12:43.145224194', '2262-04-11T23:47:16.854775807'], dtype='datetime64[ns]')
+>>> kx.toq(arr, handle_nulls=True)
+pykx.TimestampVector(pykx.q('0N 2262.04.11D23:47:16.854775810 2262.04.11D23:47:16.854775807'))
+```
+
+Use `#!python raw=True` to request that the epoch offset is not applied. This allows for the underlying numeric values to be accessed directly:
+
+```python
+>>> kx.q('0N -0W 0Wp').np(raw=True)
+array([-9223372036854775808, -9223372036854775807, 9223372036854775807])
+```
+
+Passing back to q with `#!python toq` these are presented as the long null, negative infinity, and infinity:
+
+```python
+>>> kx.toq(kx.q('0N -0W 0Wp').np(raw=True))
+pykx.LongVector(pykx.q('0N -0W 0W'))
+```
+
+Pass `#!python ktype` during `#!python toq` to specify desired types:
+
+```python
+>>> kx.toq(pd.DataFrame(data= {'d':np.array(['2020-09-08T07:06:05'], dtype='datetime64[s]')}), ktype={'d':kx.DateVector})
+pykx.Table(pykx.q('
+d
+----------
+2020.09.08
+'))
+```
+!!! note "Note"
+
+ * Dictionary based conversion is only supported when operating in [licensed mode](../../user-guide/advanced/modes.md).
+ * Data is first converted to the default type and then cast to the desired type.
+
+!!! info
+
+ * In NumPy further data types exist `datetime64[us]`, `datetime64[ms]`, `datetime64[s]` which due to their lower precision have a wider range of dates they can represent. When converted using to q using `toq` these all present as q `Timestamp` type and as such only dates within the range this data type can represent should be converted.
+
+ * Pandas 2.* changes behavior and conversions should be reviewed as part of an upgrade of this package. [PyKX to Pythonic data type mapping](../../api/pykx-q-data/type_conversions.md) includes examples showing differences seen when calling `.pd()`.
+
+### Duration types
+
+Duration types do not have the issue of epoch offsets, but some range limitations exist when converting between Python and PyKX.
+
+`#!python kx.SecondVector` and `#!python kx.MinuteVector` convert to `#!python timedelta64[s]`:
+
+| | q representation | timedelta64[s] |
+|-------------------------------------|------------------|---------------------------|
+| `kx.SecondVector` Null | `0Nv` | `NaT` |
+| `kx.SecondVector` Negative Infinity | `-0Wv` | `-24856 days +20:45:53` |
+| `kx.SecondVector` Infinity | `0Wv` | `24855 days 03:14:07` |
+| `kx.MinuteVector` Null | `0Nu` | `NaT` |
+| `kx.MinuteVector` Negative Infinity | `-0Wu` | `-1491309 days +21:53:00` |
+| `kx.MinuteVector` Infinity | `0Wu` | `1491308 days 02:07:00` |
+
+!!! warning "When converting Python to q using `#!python toq`, `#!python timedelta64[s]` is 64 bit and converts to `#!python kx.SecondVector` which is 32 bit:"
+
+| | SecondVector | timedelta64[s] |
+|---------------|--------------|-----------------------------------|
+| Minimum value | `**:14:06` | `106751991167300 days 15:30:07` |
+| Maximum value | `-**:14:06` | `-106751991167301 days +08:29:53` |
+
+As such, you should consider the range of times which can be directly converted:
+
+* Minimum value: `-24856 days +20:45:54`
+* Maximum value: `24855 days 03:14:06`
+
+q does not display values of second type over `#!python 99:59:59`, beyond this `#!python **` is displayed in the hour field.
+The data is still stored correctly and displays when converted:
+
+```python
+>>> kx.q('99:59:59 +1')
+pykx.SecondAtom(pykx.q('**:00:00'))
+>>> kx.q('99:59:59 +1').pd()
+Timedelta('4 days 04:00:00')
+```
diff --git a/docs/user-guide/fundamentals/text.md b/docs/user-guide/fundamentals/text.md
index 070e716..1fcd316 100644
--- a/docs/user-guide/fundamentals/text.md
+++ b/docs/user-guide/fundamentals/text.md
@@ -1,17 +1,29 @@
-# Text representation in PyKX
+---
+title: Convert text in PyKX
+description: How to convert text in PyKX
+date: July 2024
+author: KX Systems, Inc.,
+tags: PyKX, text,
+---
-Within PyKX text can be represented in a number of ways that you will encounter when using the library. The following are the basic building blocks for text within PyKX, a deeper dive into the underlying text representation can be found [here](https://code.kx.com/q4m3/2_Basic_Data_Types_Atoms/#24-text-data):
+# Convert text in PyKX
-| Type | Description | Example Generation |
+_This page provides details on how to represent, handle, and convert text in PyKX._
+
+In PyKX, text can be represented in various ways. Here are the basic building blocks for handling text within the library:
+
+| **Type** | **Description** | **Example Generation** |
|---------------------|--------------------------------------------------------------------------------------------------|------------------------------|
| `pykx.SymbolAtom` | A symbol atom in PyKX is an irreducible atomic entity storing an arbitrary number of characters. | ```pykx.q('`test')``` |
| `pykx.SymbolVector` | A symbol vector is a collected list of symbol atoms. | ```pykx.q('`test`vector')``` |
| `pykx.CharAtom` | A char atom holds a single ASCII or 8-but unicode character stored as 1 byte. | `pykx.q('"a"')` |
-| `pykx.CharVector` | A char vector is a collected list of char vectors | `pykx.q('"test"')` |
+| `pykx.CharVector` | A char vector is a collected list of char vectors. | `pykx.q('"test"')` |
+
+!!! info "Head to our [Text data](https://code.kx.com/q4m3/2_Basic_Data_Types_Atoms/#24-text-data) section for a deeper dive into the underlying text representation."
-## Converting text to/from PyKX
+## Convert text to/from PyKX
-Pythonic text data can be converted to PyKX objects directly through use of the `pykx.SymbolAtom` and `pykx.CharVector` functions as shown below
+To convert Pythonic text data to PyKX objects, use the `#!python pykx.SymbolAtom` and `#!python pykx.CharVector` functions as shown below:
```python
>>> import pykx as kx
@@ -22,9 +34,9 @@ pykx.SymbolAtom(pykx.q('`test string'))
pykx.CharVector(pykx.q('"test string"'))
```
-Alternatively you can make use of the automatic conversion function `pykx.toq` which will take an incoming Python type and convert it to its analagous PyKX type. The following table shows the mapping which is used
+Alternatively, you use the automatic conversion function `#!python pykx.toq` which takes an incoming Python type and converts it to its analagous PyKX type. The following table shows the mapping between the two types:
-| Python Type | PyKX Type |
+| **Python Type**| **PyKX Type** |
|-------------|-----------------------------------|
| `str` | `pykx.SymbolAtom` |
| `byte` | `pykx.CharAtom`/`pykx.CharVector` |
@@ -39,7 +51,7 @@ pykx.CharVector(pykx.q('"bytes"'))
pykx.CharAtom(pykx.q('"a"'))
```
-When using the `pykx.toq` function it is possible to specify the target type for your data as shown below, this can be useful when selectively converting data
+When using the `#!python pykx.toq` function, you can specify the target type for your data as shown below. This can be useful when selectively converting data:
```python
>>> import pykx as kx
@@ -49,7 +61,7 @@ pykx.CharVector(pykx.q('"string"'))
pykx.SymbolAtom(pykx.q('`bytes'))
```
-An important note on the above when using PyKX functions is that the `pykx.toq` conversion will be used by default when passing Python data to these functions, for example:
+The `#!python pykx.toq` conversion is used by default when passing Python data to PyKXfunctions, for example:
```python
>>> import pykx as kx
@@ -62,11 +74,14 @@ pykx.List(pykx.q('
## Differences between `Symbol` and `Char` data objects
-While there may appear to be limited differences between `Symbol` and `Char` representations of objects, the choice of underlying representation can have an impact on the performance and memory profile of many applications of PyKX. This section will describe a number of these differences and their impact in various scenarios.
+While there may appear to be limited differences between `#!python Symbol` and `#!python Char` representations of objects, the choice of underlying representation can have an impact on the performance and memory profile of many applications of PyKX. This section will describe a number of these differences and their impact in various scenarios.
+
+Although `#!python Symbol` and `#!python Char`representations of objects might seem similar, the choice between them can significantly affect the performance and memory usage of many PyKX applications. This section exploreS the impact of these differences in various scenarios.
+
### Text access and mutability
-The individual characters which comprise a `pykx.SymbolAtom` object are not directly accessible by a user, this limitation does not exist for `pykx.CharVector` objects. For example it is possible to retrieve slices of a `pykx.CharVector`
+The individual characters which comprise a `#!python pykx.SymbolAtom` object are not directly accessible by a user; this limitation does not exist for `#!python pykx.CharVector` objects. For example, it's possible to retrieve slices of a `#!python pykx.CharVector`:
```python
>>> import pykx as kx
@@ -84,7 +99,7 @@ Traceback (most recent call last):
TypeError: 'SymbolAtom' object is not subscriptable
```
-Similarly `pykx.CharVector` type objects are mutable while `pykx.SymbolAtom` type objects are not
+Similarly `#!python pykx.CharVector` type objects are mutable while `#!python pykx.SymbolAtom` type objects are not:
```python
>>> import pykx as kx
@@ -95,7 +110,7 @@ pykx.CharVector(pykx.q('"rest"'))
### Memory considerations
-An important point of note when dealing with Symbol type objects is that these are never deallocated once generated, this can be seen through growth of the `syms` key of `kx.q.Q.w` as follows
+When dealing with Symbol type objects, note that they are never deallocated once generated. You can notice this through growth of the `#!python syms` key of `#!python kx.q.Q.w` as follows:
```python
>>> kx.q.Q.w()['syms']
diff --git a/examples/subscriber/readme.md b/examples/subscriber/readme.md
index 71876fc..292ed81 100644
--- a/examples/subscriber/readme.md
+++ b/examples/subscriber/readme.md
@@ -1,86 +1,201 @@
-# PyKX Subscribing to a `q` Process
-
-The purpose of this example is to provide a quickstart for setting up a python process using `PyKX` to subscribe to a running q process.
-
-## Quickstart
-
-This example creates a python subscriber to a q process, that appends data received to the end of a table.
-
-Here we have:
-2. A q process running on port 5001
-3. A Python process subscribing to the q process
-
-### Start the required q processes
-
-```q
-// run q
-$ q -p 5001
-q)
-```
-
-### Start the pykx subscriber
-
-```bash
-// run the subscriber which will automatically connect
-$ python subscriber.py
-```
-
-### Outcome
-
-What should be observed on invocation of the above is that the q process should have the variable `py_server` set to the handle of the python process once the python process connects. Once this variable is set you can send rows of the table to the python process and they will be appended as they are recieved.
-
-```q
-// run q
-$ q -p 5001
-q)
-```
-
-q process is started.
-
-```bash
-// run the subscriber which will automatically connect
-$ python subscriber.py
-===== Initial Table =====
-a b
---
-4 8
-9 1
-2 9
-7 5
-0 4
-1 6
-9 6
-2 1
-1 8
-8 5
-===== Initial Table =====
-
-```
-
-Python process is started with a table, and it connects to the q server and sets the `py_server` variable.
-
-```q
-q)py_server[1 2]
-
-```
+title: PyKX installation guide
+description: Subscriber Examples
+date: October 2024
+author: KX Systems, Inc.,
+tags: subscriber, synchronous, asynchronous, PyKX
+---
+# PyKX Subscribing to a `q` Process
-Send a new table row (1, 2) to the python process from q.
+_This example demonstrates using `PyKX` to setup a python process as a subscriber to data messages published from a q process._
+
+## Pre-requisites
+
+A kdb+ license is required to complete this example. [Sign-up for a license](https://code.kx.com/q/learn/licensing/).
-```python
-Recieved new table row from q: 1 2
-a b
----
-4 8
-9 1
-2 9
-7 5
-0 4
-1 6
-9 6
-2 1
-1 8
-8 5
-1 2
-```
-
-The new row has been appended to the table.
+The following python libraries are required to run this example:
+
+1. pykx
+1. asyncio
+
+The source code for this example is available in the examples directory here:
+
+1. [Synchronous subscriber](https://github.com/KxSystems/pykx/blob/main/examples/subscriber/subscriber.py)
+1. [Asynchronous subscriber](https://github.com/KxSystems/pykx/blob/main/examples/subscriber/subscriber_async.py)
+
+## Summary of steps
+
+Both example scripts for setting up a subscriber follow the same steps:
+
+1. Start a q process running with some open port (5001 is used for the example, but you may choose any open port).
+1. Run the python subscriber by executing the script from the github repository.
+
+### Run the subscriber example
+
+1. Begin by running a q process with an open port:
+
+ ```q
+ // run q
+ $ q -p 5001
+ q)
+ ```
+1. In a separate terminal start a python process running the subscriber script:
+
+ ```bash
+ // run the subscriber, which connects automatically
+ $ python subscriber.py
+ ```
+ The python process opens an IPC connection to the q process and sets a new global variable on the q process as part of the main function:
+
+ ```q
+ async def main():
+ global table
+ async with kx.RawQConnection(port=5001) as q:
+ print('===== Initial Table =====')
+ print(table)
+ print('===== Initial Table =====')
+ await q('py_server:neg .z.w')
+ await main_loop(q)
+ ```
+ The q process now has the variable `py_server` set to the handle of the python process once the python process connects.
+
+1. Once this variable is set, you can send rows of the table to the python process and they are appended as they are received.
+
+ ```bash
+ // run the subscriber, which automatically connects
+ $ python subscriber.py
+ ===== Initial Table =====
+ a b
+ ---
+ 4 8
+ 9 1
+ 2 9
+ 7 5
+ 0 4
+ 1 6
+ 9 6
+ 2 1
+ 1 8
+ 8 5
+ ===== Initial Table =====
+
+ ```
+
+1. As the Python process is initiated, it connects to the q server and sets the `py_server` variable and creates the initial table.
+
+ ```q
+ q)py_server[1 2]
+
+ ```
+
+1. Send a new table row (1, 2) to the python process from q.
+
+ ```python
+ Received new table row from q: 1 2
+ a b
+ ---
+ 4 8
+ 9 1
+ 2 9
+ 7 5
+ 0 4
+ 1 6
+ 9 6
+ 2 1
+ 1 8
+ 8 5
+ 1 2
+ ```
+
+ The new row has been appended to the table.
+
+### Run the asynchronous subscriber example
+
+1. Begin by running a q process with an open port:
+
+ ```q
+ // run q
+ $ q -p 5001
+ q)
+ ```
+1. In a separate terminal start a python process running the asynchronous subscriber script:
+
+ ```bash
+ // run the asynchronous subscriber which automatically connects
+ $ python subscriber_async.py
+ ```
+ The python process opens an IPC connection to the q process and sets a new global variable on the q process as part of the main function:
+
+ ```q
+ async def main():
+ global table
+ async with kx.RawQConnection(port=5001) as q:
+ print('===== Initial Table =====')
+ print(table)
+ print('===== Initial Table =====')
+ await q('py_server:neg .z.w')
+ await main_loop(q)
+ ```
+ The q process now has the variable `py_server` set to the handle of the python process once the python process connects.
+
+1. Once this variable is set, you can send rows of the table to the python process and they are appended as they are received.
+
+ ```bash
+ // run the subscriber, which automatically connects
+ $ python subscriber_async.py
+ ===== Initial Table =====
+ a b
+ ---
+ 4 8
+ 9 1
+ 2 9
+ 7 5
+ 0 4
+ 1 6
+ 9 6
+ 2 1
+ 1 8
+ 8 5
+ ===== Initial Table =====
+
+ ```
+
+1. As the Python process is initiated, it connects to the q server and sets the `py_server` variable and creates the initial table.
+
+ ```q
+ q)py_server[1 2]
+
+ ```
+
+1. Send a new table row (1, 2) to the python process from q.
+
+ ```python
+ Received new table row from q: 1 2
+ a b
+ ---
+ 4 8
+ 9 1
+ 2 9
+ 7 5
+ 0 4
+ 1 6
+ 9 6
+ 2 1
+ 1 8
+ 8 5
+ 1 2
+ ```
+
+ The new row has been appended to the table.
+
+
+## Summary
+
+This example has demonstrated how to initiate a q process, subscribe to an existing table, and append rows to it either synchronously or asynchronously.
+
+## Next steps
+
+Check out more examples such as:
+
+- [Real-Time Streaming]
+- [Compression and Encryption]
diff --git a/examples/subscriber/subscriber_async.py b/examples/subscriber/subscriber_async.py
new file mode 100644
index 0000000..6e2849c
--- /dev/null
+++ b/examples/subscriber/subscriber_async.py
@@ -0,0 +1,39 @@
+import pykx as kx
+
+import asyncio
+
+
+table = kx.q('([] a: 10?10; b: 10?10)')
+
+
+def assert_result(res):
+ # assert message from q process has the correct schema to be appended to the table
+ return type(res) is kx.LongVector and len(res) == 2
+
+
+async def main_loop(q):
+ global table
+ while True:
+ result = await q.poll_recv_async()
+ if assert_result(result):
+ print(f'Recieved new table row from q: {result}')
+ table = kx.q.upsert(table, result)
+ print(table)
+ result = None
+
+
+async def main():
+ global table
+ async with kx.RawQConnection(port=5001, event_loop=asyncio.get_event_loop()) as q:
+ print('===== Initial Table =====')
+ print(table)
+ print('===== Initial Table =====')
+ # Set the variable py_server on the q process pointing towards this processes IPC connection
+ # We use neg to ensure the messages are sent async so no reply is expected from this process
+ await q('py_server: neg .z.w')
+
+ await main_loop(q)
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/mkdocs.yml b/mkdocs.yml
index cd5906c..27633bd 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -15,14 +15,16 @@ edit_uri: 'edit/main/docs/'
extra_css:
- https://code.kx.com/assets/stylesheets/main.b941530a.min.css
- - https://code.kx.com/stylesheets/2021.css
- - https://code.kx.com/stylesheets/prism.css
+ # - https://code.kx.com/stylesheets/2021.css
+ # - https://code.kx.com/stylesheets/prism.css
- https://code.kx.com/stylesheets/extra.css
- - https://code.kx.com/stylesheets/hide-footer-nav.css
- stylesheets/mkdocstrings.css
- https://code.kx.com/stylesheets/dashboards.css
- https://code.kx.com/dashboards/stylesheets/dashboards.css
- stylesheets/swagger.css
+ # More accessible colors in the edited versions
+ - stylesheets/2021-edited.css
+ - stylesheets/prism-edited.css
extra_javascript:
- https://code.kx.com/scripts/prism.js
@@ -83,6 +85,7 @@ plugins:
execute: True
include: ["*.ipynb"]
include_source: True # Let users download the Jupyter notebook to use interactively
+ ignore: ["examples/streaming/*.ipynb", "examples/streaming/*.py"]
allow_errors: False
remove_tag_config:
remove_input_tags:
@@ -118,10 +121,11 @@ plugins:
- search
- exclude-search:
exclude:
- - getting-started/q_magic_command.ipynb
- user-guide/advanced/Pandas_API.ipynb
- - getting-started/PyKX Introduction Notebook.ipynb
+ - examples/interface-overview.ipynb
+ - examples/jupyter-integration.ipynb
- examples/db-management.ipynb
+ - examples/streaming/Evolving System.ipynb
- spellcheck:
known_words: spelling.txt
ignore_code: true # Ignore words in tags
@@ -131,11 +135,11 @@ plugins:
skip_files: # Skip files entirely
- comparisons.md # Skipped due to false positives
- performance.md # Skipped due to false positives
- - "getting-started/PyKX Introduction Notebook.ipynb"
- - getting-started/q_magic_command.ipynb
- user-guide/advanced/Pandas_API.ipynb
- - getting-started/PyKX Introduction Notebook.ipynb
+ - examples/interface-overview.ipynb
+ - examples/jupyter-integration.ipynb
- examples/db-management.ipynb
+ - examples/streaming/Evolving System.ipynb
- examples/charting.ipynb
@@ -174,101 +178,120 @@ docs_dir: docs
nav:
- Home: 'https://code.kx.com/insights/'
- kdb+ and q: 'https://code.kx.com/q'
- - kdb Insights: "https://code.kx.com/insights/core"
+ - kdb Insights SDK: "https://code.kx.com/insights/core"
- kdb Insights Enterprise: "https://code.kx.com/insights/platform/"
- KDB.AI: "https://code.kx.com/kdbai/"
- PyKX:
- index.md
- - Getting Started:
- - What is PyKX?: getting-started/what_is_pykx.md
- - Installation: getting-started/installing.md
+ - Get Started:
+ - Install: getting-started/installing.md
- Quickstart guide: getting-started/quickstart.md
- - PyKX Introduction Notebook: "getting-started/PyKX Introduction Notebook.ipynb"
- - Jupyter q Magic Command: getting-started/q_magic_command.ipynb
- - User Guide:
- - Introduction: user-guide/index.md
- - Configuration: user-guide/configuration.md
- - Fundamentals:
- - Generating PyKX objects: user-guide/fundamentals/creating.md
- - Interacting with PyKX objects: user-guide/fundamentals/evaluating.md
- - Querying data: user-guide/fundamentals/querying.md
- - Indexing PyKX objects: user-guide/fundamentals/indexing.md
- - Conversion considerations: user-guide/fundamentals/conversion_considerations.md
- - Text Representation in PyKX: user-guide/fundamentals/text.md
- - Handling nulls and infinities: user-guide/fundamentals/nulls_and_infinities.md
- - Advanced usage and performance considerations:
- - Communicating via IPC: user-guide/advanced/ipc.md
- - Database interactions: user-guide/advanced/database.md
- - Using q functions in a Pythonic way: user-guide/advanced/context_interface.md
- - Modes of operation: user-guide/advanced/modes.md
- - NumPy integration: user-guide/advanced/numpy.md
- - Serialization and de-serialization: user-guide/advanced/serialization.md
- - Performance considerations: user-guide/advanced/performance.md
- - Interface limitations: user-guide/advanced/limitations.md
- - Attributes: user-guide/advanced/attributes.md
- - Pandas Like API Coverage: user-guide/advanced/pandas_breakdown.md
- - API:
- - Code execution:
- - PyKX native functions: api/pykx-execution/q.md
- - PyKX execution classes: api/pykx-execution/embedded_q.md
- - Context interface: api/pykx-execution/ctx.md
- - PyKX console: api/pykx-execution/console.md
+ - Learn:
+ - What is PyKX?: getting-started/what_is_pykx.md
+ - Objects and attributes: learn/objects.md
+ - Databases: user-guide/advanced/database/index.md
+ - Modes of operation: user-guide/advanced/modes.md
+ - Performance tips: user-guide/advanced/performance.md
+ - Why upgrade from embedPy: pykx-under-q/upgrade.md
+ - Glossary: extras/glossary.md
+ - Blogs, articles, videos: blogs.md
+ - How to:
+ - Configure PyKX: user-guide/configuration.md
+ - Interact with data:
+ - Create objects: user-guide/fundamentals/creating.md
+ - Use objects: user-guide/fundamentals/evaluating.md
+ - Index objects: user-guide/fundamentals/indexing.md
+ - Convert data: user-guide/fundamentals/conversion_considerations.md
+ - Convert text: user-guide/fundamentals/text.md
+ - Convert nulls and infinities: user-guide/fundamentals/nulls_and_infinities.md
+ - Convert temporal data: user-guide/fundamentals/temporal.md
+ - Interact with databases:
+ - Generate databases: user-guide/advanced/database/db_gen.md
+ - Load databases: user-guide/advanced/database/db_loading.md
+ - Manage databases: user-guide/advanced/database/db_mgmt.md
+ - Query data:
+ - Introduction: user-guide/fundamentals/query/index.md
+ - Query with Python: user-guide/fundamentals/query/pyquery.md
+ - Query with SQL: user-guide/fundamentals/query/sql.md
+ - Query with q: user-guide/fundamentals/query/qquery.md
+ - Performance considerations: user-guide/fundamentals/query/perf.md
+ - Communicate via IPC: user-guide/advanced/ipc.md
+ - Capture real-time data:
+ - Introduction: user-guide/advanced/streaming/index.md
+ - Capture and Store: user-guide/advanced/streaming/basic.md
+ - Publish data: user-guide/advanced/streaming/publish.md
+ - Subscribe to data: user-guide/advanced/streaming/subscribe.md
+ - Analyze streaming data: user-guide/advanced/streaming/rta.md
+ - Build custom APIs: user-guide/advanced/streaming/custom_apis.md
+ - Manage query routing: user-guide/advanced/streaming/gateways.md
+ - Complex infrastructure: user-guide/advanced/streaming/complex.md
+ - Apply attributes: user-guide/advanced/attributes.md
+ - Compress and encrypt data: user-guide/advanced/compress-encrypt.md
+ - Import existing q functions: user-guide/advanced/context_interface.md
+ - Run remote functions: user-guide/advanced/remote-functions.md
+ - Serialize data: user-guide/advanced/serialization.md
+ - Use PyKX in subprocesses: user-guide/advanced/subprocess.md
+ - Run q code multithreaded: user-guide/advanced/threading.md
+ - Manage your license: user-guide/advanced/license.md
+ - Use Python in a q process: pykx-under-q/intro.md
+ - Reference:
+ - API references:
+ - q functions and operators: api/pykx-execution/q.md
+ - Execution classes: api/pykx-execution/embedded_q.md
+ - q contexts: api/pykx-execution/ctx.md
+ - Emulated q console: api/pykx-execution/console.md
+ - Data generation: api/random.md
+ - Schema generation: api/schema.md
+ - Query data: api/query.md
+ - Query classes: api/columns.md
+ - Registering custom operations: api/pykx-q-data/register.md
+ - Compression and encryption: api/compress.md
+ - Database management: api/db.md
+ - File I/O: api/pykx-save-load/fileio.md
+ - q IPC interface: api/ipc.md
+ - Streamlit integration: api/streamlit.md
+ - Serialize/deserialize: api/serialize.md
+ - Remote Python execution: api/remote.md
+ - Real-time capture: api/tick.md
+ - System command wrappers: api/system.md
+ - Utility functions: api/util.md
+ - Reimporter module: api/reimporting.md
+ - License management: api/license.md
+ - PyKX under q: pykx-under-q/api.md
- Data types and conversions:
- - Convert Pythonic data to PyKX: api/pykx-q-data/toq.md
- - PyKX type wrappers: api/pykx-q-data/wrappers.md
- - PyKX to Pythonic data type mapping: api/pykx-q-data/type_conversions.md
- - Registering Custom Conversions: api/pykx-q-data/register.md
- - Pandas Like API: user-guide/advanced/Pandas_API.ipynb
- - License management: api/license.md
- - Random data generation: api/random.md
- - Querying: api/query.md
- - Compression and Encryption APIs: api/compress.md
- - Database Interactions: api/db.md
- - Remote Python Execution: api/remote.md
- - IPC: api/ipc.md
- - PyKX Exceptions: api/exceptions.md
- - Schema generation: api/schema.md
- - Streamlit Integration: api/streamlit.md
- - System Command Wrappers: api/system.md
- - Utilities: api/util.md
- - File loading and saving:
- - Writing data to disk: api/pykx-save-load/write.md
- - Reading data from disk: api/pykx-save-load/read.md
- - Reimporter module: api/reimporting.md
- - Serialization: api/serialize.md
- - Beta Features:
- - Introduction: beta-features/index.md
- - Database Management: beta-features/db-management.md
- - Compression and Encryption: beta-features/compress-encypt.md
- - Remote Function Execution: beta-features/remote-functions.md
- - Multithreading: beta-features/threading.md
- - Streamlit: beta-features/streamlit.md
- - Python interfacing within q:
- - Overview: pykx-under-q/intro.md
- - API: pykx-under-q/api.md
- - Upgrading from embedPy: pykx-under-q/upgrade.md
- - Known Issues: pykx-under-q/known_issues.md
+ - Python to PyKX: api/pykx-q-data/toq.md
+ - Wrappers: api/pykx-q-data/wrappers.md
+ - PyKX to Python: api/pykx-q-data/type_conversions.md
+ - Pandas API: user-guide/advanced/Pandas_API.ipynb
+ - Exceptions: api/exceptions.md
+ - Integrations:
+ - NumPy: user-guide/advanced/numpy.md
+ - Streamlit: user-guide/advanced/streamlit.md
+ - Python charting libraries: examples/charting.ipynb
+ - Jupyter Integration: examples/jupyter-integration.ipynb
- Examples:
+ - PyKX Introduction Notebook: examples/interface-overview.ipynb
- Subscriber: examples/subscriber/readme.md
- - Compression and Encryption: examples/compress_and_encrypt/readme.md
- - Database Creation and Management: examples/db-management.ipynb
+ - Compress and encrypt: examples/compress_and_encrypt/readme.md
+ - Database creation and management: examples/db-management.ipynb
- IPC: examples/ipc/README.md
- - Charting Data with PyKX: examples/charting.ipynb
- - PyKX as a Server: examples/server/server.md
- - Multithreaded Execution: examples/threaded_execution/threading.md
- - Extras:
- - Comparisons against other Python/q interfaces: extras/comparisons.md
- - Known issues: extras/known_issues.md
- - Release notes:
- - PyKX: release-notes/changelog.md
- - PyKX under q: release-notes/underq-changelog.md
- - Roadmap: roadmap.md
- - Blogs, Articles and Videos: blogs.md
- - Troubleshooting: troubleshooting.md
- - Frequently Asked Questions (FAQ): faq.md
- - Support: support.md
- - License: license.md
- - Contributors: contributors.md
- - APIs: "https://code.kx.com/insights/api/index.html"
- - Licensing : "https://code.kx.com/insights/licensing/licensing/"
- - Help: "https://code.kx.com/home/support/"
+ - PyKX as a server: examples/server/server.md
+ - Real-time streaming: examples/streaming/index.md
+ - Multithreaded execution: examples/threaded_execution/threading.md
+ - Releases:
+ - Release notes:
+ - PyKX: release-notes/changelog.md
+ - PyKX under q: release-notes/underq-changelog.md
+ - 2.x -> 3.x Upgrade : upgrades/2030.md
+ - Roadmap: roadmap.md
+ - Beta features: beta-features/index.md
+ - Help and Support:
+ - Troubleshooting: help/troubleshooting.md
+ - FAQ: help/faq.md
+ - PyKX support: help/support.md
+ - Issues and limitations: help/issues.md
+ - PyKX license: license.md
+ - Contributors: contributors.md
+ - APIs: 'https://code.kx.com/insights/api/index.html'
+ - Licensing: 'https://code.kx.com/insights/licensing/licensing/'
+ - Help: 'https://code.kx.com/home/support.html'
diff --git a/pyproject.toml b/pyproject.toml
index daef722..fa7634e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,9 +54,12 @@ dependencies = [
"numpy~=1.22, <2.0; python_version=='3.10'",
"numpy~=1.23, <2.0; python_version=='3.11'",
"numpy~=1.26, <2.0; python_version=='3.12'",
- "pandas>=1.2, < 2.2.0",
+ "pandas>=1.2, < 2.0; python_version=='3.8'",
+ "pandas>=1.2, <= 2.2.3; python_version>'3.8'",
"pytz>=2022.1",
"toml~=0.10.2",
+ "dill>=0.2.0",
+ "requests>=2.25.0"
]
@@ -79,6 +82,9 @@ doc = [
"mkdocstrings[python]==0.18.0",
"pygments~=2.12",
"pymdown-extensions>=9.3",
+ "matplotlib",
+ "seaborn",
+ "kaleido",
]
debug = [
"find-libpython~=0.2",
@@ -95,12 +101,12 @@ lint = [
pyarrow = [
"pyarrow>=3.0.0",
]
+streaming = [
+ "psutil>=5.0.0"
+]
dashboards = [
"ast2json~=0.3",
]
-beta = [
- "dill>=0.2.0",
-]
streamlit = [
"streamlit~=1.28; python_version>'3.7'"
]
@@ -117,7 +123,12 @@ test = [
"pytest-xdist==2.5.0",
"pytest-order==1.1.0",
"psutil==5.9.5",
- "pytest-timeout>=2.0.0"
+ "pytest-timeout>=2.0.0",
+ "IPython",
+]
+help = [
+ "beautifulsoup4==4.10.0",
+ "markdown2==2.5.0",
]
@@ -216,7 +227,7 @@ ignore = [
"E999", # syntax error (Cython syntax is not valid Python syntax)
"I100", # import statements are in the wrong order
"I202", # additional newline in a group of imports (We use three 3: built-in, third-party, local)
- "W503", # depracated warning - goes against PEP8
+ "W503", # deprecated warning - goes against PEP8
"W605", # Invalid escape character in comments causing issue with q examples
]
diff --git a/setup.py b/setup.py
index 61d2393..c84e084 100755
--- a/setup.py
+++ b/setup.py
@@ -24,6 +24,7 @@
import numpy as np
from setuptools import Extension
from setuptools.command.build_ext import build_ext as default_build_ext
+from setuptools.command.install import install
from setuptools import setup
import tomli
@@ -52,6 +53,16 @@
windows_libraries = () if system != 'Windows' else ('psapi', 'q')
+class CustomInstallCommand(install):
+ def run(self):
+ install.run(self)
+
+ source_file = 'docs/api/pykx-execution/q.md'
+ target_dir = os.path.join(self.install_lib, 'pykx', 'docs', 'api', 'pykx-execution')
+ os.makedirs(target_dir, exist_ok=True)
+ shutil.copy(source_file, target_dir)
+
+
def rmrf(path: str):
"""Delete the file tree with ``path`` as its root"""
if os.path.isdir(path) and not os.path.islink(path):
@@ -257,6 +268,14 @@ def ext(name: str,
for f in
[str(f) for f in os.listdir() if os.path.isfile(f) and str(f) != 'q.k']
]
+ for p in ('l64', 'l64arm', 'm64', 'm64arm', 'w64'):
+ with cd(src_dir/'lib'/p):
+ [
+ shutil.copy(f, f'../4-1-libs/{p}/{f}')
+ for f in
+ [str(f) for f in os.listdir()
+ if str(f) != 'symbols.txt' and not os.path.exists(f'../4-1-libs/{p}/{f}')]
+ ]
setup(
name=pyproject['name'],
description=pyproject['description'],
@@ -273,6 +292,7 @@ def ext(name: str,
package_dir={'pykx': str(Path('src/pykx'))},
cmdclass={
'build_ext': build_ext,
+ 'install': CustomInstallCommand,
},
include_package_data=True, # makes setuptools use MANIFEST.in
zip_safe=False, # required by Cython
diff --git a/src/pykx/__init__.py b/src/pykx/__init__.py
index e05ef27..6fbea2b 100644
--- a/src/pykx/__init__.py
+++ b/src/pykx/__init__.py
@@ -41,7 +41,7 @@
from warnings import warn
from weakref import proxy
-from .config import k_allocator, licensed, no_pykx_signal, no_sigint, pykx_platlib_dir, under_q
+from .config import k_allocator, licensed, no_pykx_signal, pykx_platlib_dir, under_q
from . import util
if platform.system() == 'Windows': # nocov
@@ -81,7 +81,7 @@ class Q(metaclass=ABCMeta):
"""
reserved_words = {
'abs', 'acos', 'aj', 'aj0', 'ajf', 'ajf0', 'all', 'and', 'any', 'asc',
- 'asin', 'asin', 'asof', 'atan', 'attr', 'avg', 'avgs', 'bin',
+ 'asin', 'asof', 'atan', 'attr', 'avg', 'avgs', 'bin',
'binr', 'ceiling', 'cols', 'cor', 'cos', 'count', 'cov', 'cross',
'csv', 'cut', 'delete', 'deltas', 'desc', 'dev', 'differ', 'distinct',
'div', 'do', 'dsave', 'each', 'ej', 'ema', 'enlist', 'eval',
@@ -98,11 +98,22 @@ class Q(metaclass=ABCMeta):
'sqrt', 'ss', 'ssr', 'string', 'sublist', 'sum', 'sums', 'sv', 'svar',
'system', 'tables', 'tan', 'til', 'trim', 'type', 'uj', 'ujf', 'ungroup',
'union', 'update', 'upper', 'value', 'var', 'view', 'views',
- 'vs', 'wavg', 'wavg', 'where', 'while', 'within', 'wj', 'wj1',
+ 'vs', 'wavg', 'where', 'while', 'within', 'wj', 'wj1',
'wsum', 'xasc', 'xbar', 'xcol', 'xcols', 'xdesc', 'xexp', 'xgroup', 'xkey', 'xlog',
'xprev', 'xrank'
}
+ operators = {
+ 'drop': '_',
+ 'coalesce': '^', 'fill': '^',
+ 'take': '#', 'set_attribute': '#',
+ 'join': ',',
+ 'find': '?', 'enum_extend': '?', 'roll': '?', 'deal': '?',
+ 'dict': '!', 'enkey': '!', 'unkey': '!', 'enumeration': '!',
+ 'enumerate': '$', 'pad': '$', 'cast': '$', 'tok': '$',
+ 'compose': '\''
+ }
+
def __init__(self):
self.paths = default_paths
# Start with an empty set to avoid errors during initialization.
@@ -137,7 +148,10 @@ def __getattr__(self, key):
if key == "__objclass__":
raise AttributeError
# Elevate the q context to the global context, as is done in q normally
- ctx = self.__getattribute__('ctx')
+ try:
+ ctx = self.__getattribute__('ctx')
+ except BaseException:
+ raise exceptions.QError('Cannot load requested context object in unlicensed mode')
if key in self.__getattribute__('_q_ctx_keys'):
# if-statement used instead of try-block for performance
return ctx.q.__getattr__(key)
@@ -198,15 +212,18 @@ def _register(self,
name: Optional[str] = None,
path: Optional[Union[Path, str]] = None,
) -> str:
- """Obtain the definitions from a q/k script.
+ """Switch to a named q context (read [Q for Mortals
+ Chapter 12](https://code.kx.com/q4m3/12_Workspace_Organization/#122-contexts))
+ and load variable definitions into that context from a q/k script. Once the script is
+ loaded this function switches back to the previous q context.
Parameters:
- name: Name of the context to be loaded. If `path` is not provided, [a file whose name
- matches will be searched for](#script-search-logic), and loaded if found. If `path`
- is provided, `name` will be used as the name of the context that the script at
- `path` is executed in.
- path: Path to the script to load. If `name` is not provided, it will default to the
- filename sans extension.
+ name: Name to assign the context being loaded. If no argument is provided the
+ assigned name of the context is set to the name of the file without filetype
+ extension.
+ path: Path to the script to load. If no argument is provided this function
+ [searches for a file matching the given name](#script-search-logic),
+ loading it if found.
Returns:
The attribute name for the newly loaded module.
@@ -228,13 +245,14 @@ def _register(self,
name = path.stem
prev_ctx = self._call('string system"d"', wait=True)
try:
- self._call(
- f'{"" if name[0] == "." else "."}{name}:(enlist`)!enlist(::);'
- f'system "d {"" if name[0] == "." else "."}{name}";'
- '$[@[{get x;1b};`.pykx.util.loadfile;{0b}];'
- f' .pykx.util.loadfile["{path.parent}";"{path.name}"];'
- f' system"l {path}"];',
- wait=True,
+ self._call('''{[name;folder;file]
+ name set (enlist`)!enlist(::);
+ system "d ",string name;
+ $[@[{get x;1b};`.pykx.util.loadfile;{0b}];
+ .pykx.util.loadfile[folder;file];
+ system"l ",$[.z.o like "w*";"\\\\";"/"] sv ((),folder;(),file)]}
+ ''', "" if name[0] == "." else "."+name, str(path.parent).encode(),
+ path.name.encode(), wait=True,
)
return name[1:] if name[0] == '.' else name
finally:
@@ -244,10 +262,10 @@ def _register(self,
def paths(self):
"""List of locations for the context interface to find q scripts in.
- Defaults to the current working directory and `$QHOME`.
+ Defaults to the current working directory and `#!bash $QHOME`.
- If you change directories, the current working directory stored in this list will
- automatically reflect that change.
+ If you change directories, the current working directory stored in this list
+ automatically reflects that change.
"""
return object.__getattribute__(self, '_paths')
@@ -281,6 +299,7 @@ def paths(self, paths: List[Union[str, Path]]):
from . import schema
from . import streamlit
from . import random
+from . import help
from ._wrappers import _init as _wrappers_init
_wrappers_init(wrappers)
@@ -289,11 +308,15 @@ def paths(self, paths: List[Union[str, Path]]):
from ._version import version as __version__
from .exceptions import *
+from .util import _init as _util_init
+_util_init(q)
+
from ._ipc import _init as _ipc_init
_ipc_init(q)
from .compress_encrypt import Compress, CompressionAlgorithm, Encrypt
from .db import DB
+from .tick import TICK
from .ipc import AsyncQConnection, QConnection, QFuture, RawQConnection, SecureQConnection, SyncQConnection # noqa
from .config import qargs, qhome, qlic
from .wrappers import *
@@ -316,12 +339,19 @@ def paths(self, paths: List[Union[str, Path]]):
from .db import _init as _db_init
_db_init(q)
+from .tick import _init as _tick_init
+_tick_init(q)
+
from .remote import _init as _remote_init
_remote_init(q)
from .compress_encrypt import _init as _compress_init
_compress_init(q)
+from .help import _init as _help_init
+_help_init(q)
+qhelp = help.qhelp
+
if k_allocator:
from . import _numpy as _pykx_numpy_cext
@@ -339,22 +369,32 @@ def merge_asof(left, *args, **kwargs):
)
-def install_into_QHOME(overwrite_embedpy=False, to_local_folder=False) -> None:
+def install_into_QHOME(overwrite_embedpy=False,
+ to_local_folder=False,
+ cloud_libraries=False) -> None:
"""Copies the embedded Python functionality of PyKX into `$QHOME`.
Parameters:
overwrite_embedpy: If embedPy had previously been installed replace it otherwise
- save functionality as pykx.q
- to_local_folder: Copy the files to your local folder rather than QHOME
+ save functionality as pykx.q.
+ to_local_folder: Copy the files to your local folder rather than `#!bash QHOME`.
+ cloud_libraries: Copy cloud libraries to `#!bash QHOME`.
Returns:
None
"""
dest = Path('.') if to_local_folder else qhome
p = Path(dest)/'p.k'
+ c_files = ['kurl.q_', 'kurl.sidecar.q_', 'objstor.q_', 'qlog.q_', 'rest.q_', 'bq.q_', 's.k_']
+
if not p.exists() or overwrite_embedpy:
shutil.copy(Path(__file__).parent/'p.k', p)
- shutil.copy(Path(__file__).parent/'pykx.q', dest/'p.q' if overwrite_embedpy else dest)
+ if overwrite_embedpy:
+ shutil.copy(Path(__file__).parent/'p.k', Path(dest)/'p.q')
+ if cloud_libraries:
+ for i in c_files:
+ shutil.copy(Path(__file__).parent/'lib'/i, Path(dest)/i)
+ shutil.copy(Path(__file__).parent/'pykx.q', dest)
shutil.copy(Path(__file__).parent/'pykx_init.q_', dest)
if platform.system() == 'Windows':
if dest == qhome:
@@ -428,6 +468,9 @@ def deactivate_numpy_allocator():
ipython = get_ipython() # noqa
# Load the PyKX extension for Jupyter Notebook.
ipython.extension_manager.load_extension('pykx.nbextension')
+
+ if config.jupyterq:
+ util.jupyter_qfirst_enable()
except NameError:
# Not running under IPython/Jupyter...
pass
@@ -454,6 +497,7 @@ def deactivate_numpy_allocator():
'qhome',
'QReader',
'random',
+ 'remote',
'QWriter',
'qlic',
'serialize',
@@ -473,11 +517,13 @@ def deactivate_numpy_allocator():
'q',
'shutdown_thread',
'PyKXReimport',
+ 'help',
+ 'qhelp',
*exceptions.__all__,
*wrappers.__all__,
])
-if (not no_sigint) or (not no_pykx_signal):
+if not no_pykx_signal:
for k, v in _signal_dict.items():
try:
signal.signal(eval(k), v)
diff --git a/src/pykx/_pyarrow.py b/src/pykx/_pyarrow.py
index 6ce7016..703f266 100644
--- a/src/pykx/_pyarrow.py
+++ b/src/pykx/_pyarrow.py
@@ -9,7 +9,7 @@
"""
import os
-from .config import load_pyarrow_unsafe
+from .config import load_pyarrow_unsafe, suppress_warnings
if load_pyarrow_unsafe:
import pyarrow
@@ -55,8 +55,11 @@ def pyarrow_importer(name, globals=None, locals=None, fromlist=(), level=0): # n
if p.returncode: # nocov
import_attempt_output = p.stdout if p.stdout else _msg_from_return_code(p.returncode)
# Don't print out `import_attempt_output` by default.
- warn('PyArrow failed to load - PyArrow related functionality has been disabled. Check '
- '`pykx._pyarrow.import_attempt_output` for the reason.', PyKXWarning)
+ if not suppress_warnings:
+ warn('PyArrow failed to load - PyArrow related functionality has been disabled. '
+ 'Check `pykx._pyarrow.import_attempt_output` for the reason. '
+ 'To suppress this warning please set the configuration/environment variable '
+ 'PYKX_SUPPRESS_WARNINGS=True', PyKXWarning)
# Replace the `__import__` function to prevent other from trying to import PyArrow
builtins.__import__ = pyarrow_importer
else:
diff --git a/src/pykx/_wrappers.pxd b/src/pykx/_wrappers.pxd
index 0c21dae..253ab7a 100644
--- a/src/pykx/_wrappers.pxd
+++ b/src/pykx/_wrappers.pxd
@@ -12,4 +12,4 @@ cdef extern from 'numpy/arrayobject.h':
cpdef deserialize(x)
-cdef factory(uintptr_t addr, bint incref, bint err_preamble=*)
+cdef factory(uintptr_t addr, bint incref, str name=*, bint err_preamble=*)
diff --git a/src/pykx/_wrappers.pyx b/src/pykx/_wrappers.pyx
index 7534c19..48f56db 100644
--- a/src/pykx/_wrappers.pyx
+++ b/src/pykx/_wrappers.pyx
@@ -93,14 +93,15 @@ cdef class _K:
return self.k.r
-def k_from_addr(cls, uintptr_t addr, bint incref):
+def k_from_addr(cls, uintptr_t addr, bint incref, str name=''):
instance = object.__new__(cls)
instance._addr = addr
instance._k = _K(addr, incref)
instance.__init__(None) # placeholder argument
+ if name != '':
+ instance._name = name
return instance
-
def k_str(self):
if not licensed:
return repr(self)
@@ -474,10 +475,37 @@ cdef inline object select_wrapper(core.K k):
_current_exception = {}
-cdef inline factory(uintptr_t addr, bint incref, bint err_preamble=0):
+cdef inline factory(uintptr_t addr, bint incref, str name='', bint err_preamble=0):
+ cdef core.K k = addr
+ cdef signed char ktype = k.t
wrapper = select_wrapper(addr)
if wrapper is QError:
- q_exception = wrapper(('Failed to serialize IPC message: ' if err_preamble else '') + str((addr).s, 'utf-8'))
+ err_string = str((addr).s, 'utf-8')
+ if err_string == 'nosocket':
+ err_string = 'nosocket: Cannot open or use a socket on a thread other than main.\n'\
+ 'Read https://code.kx.com/user-guide/advanced/threading.html for more information'
+ elif err_string == 'noupdate':
+ err_string = 'noupdate: Cannot update a global variable while using:\n\t- Multithreaded mode'\
+ '\n\t- peach with secondary threads\n\t- `-b` command line argument or reval code.\n'\
+ 'Read https://code.kx.com/user-guide/advanced/threading.html for more information'
+ elif err_string == 'par':
+ err_string = 'par: Cannot execute an unsupported operation on a partitioned table or its '\
+ 'constituent parts'
+ elif err_string == 'splay':
+ err_string = 'splay: Cannot execute an unsupported operation on a splayed table'
+ elif err_string == 's-fail':
+ err_string = 's-fail: Cannot set "sorted" attribute on an unsorted list\nRead '\
+ 'Read https://code.kx.com/q/ref/set-attribute/ for more information'
+ elif err_string == 'u-fail':
+ err_string = 'u-fail: Failed to do one of the following:\n\t- Set the "unique" '\
+ 'attribute on a non-unique list\n\t- Set the "parted" attribute on '\
+ 'list with non-adjacent repeated values.\n'\
+ 'Read https://code.kx.com/q/ref/set-attribute/ for more information'
+ elif err_string == 'insert':
+ err_string = 'insert: Cannot insert a record with an existing key into a keyed table'
+ elif err_string == 'assign':
+ err_string = 'assign: Cannot redefine a reserved word'
+ q_exception = wrapper(('Failed to serialize IPC message: ' if err_preamble else '') + err_string)
# `pop` the exception object out to prevent it from being handled multiple times.
_current_exception_in_thread = _current_exception.pop(threading.get_ident(), None)
if _current_exception_in_thread is None:
@@ -486,11 +514,11 @@ cdef inline factory(uintptr_t addr, bint incref, bint err_preamble=0):
raise q_exception
else:
raise q_exception from _current_exception_in_thread
- return k_from_addr(wrapper, addr, incref)
+ return(k_from_addr(wrapper, addr, incref, name))
-def _factory(addr: int, incref: bool):
- return factory(addr, incref)
+def _factory(addr: int, incref: bool, name: str = ''):
+ return factory(addr, incref, name)
def _pyfactory(addr: int, incref: bool, typenum: int, raw: bool = False):
diff --git a/src/pykx/compress_encrypt.py b/src/pykx/compress_encrypt.py
index b46fdf7..2bca324 100644
--- a/src/pykx/compress_encrypt.py
+++ b/src/pykx/compress_encrypt.py
@@ -1,17 +1,3 @@
-"""Functionality for the setting of compression and encryption configuration when
- handling on-disk data.
-
-!!! Warning
-
- This functionality is provided in it's present form as a BETA
- Feature and is subject to change. To enable this functionality
- for testing please following configuration instructions
- [here](../user-guide/configuration.md) setting `PYKX_BETA_FEATURES='true'`
-"""
-
-from . import beta_features
-from .config import _check_beta
-
from enum import Enum
from math import log2
import os
@@ -23,8 +9,6 @@
'Encrypt',
]
-beta_features.append('Compression and Encryption')
-
def _init(_q):
global q
@@ -37,9 +21,9 @@ def __dir__():
class CompressionAlgorithm(Enum):
"""
- The compression algorithm to be used when compressing a DB partition/column.
+ The compression algorithm used when compressing a DB partition/column.
- Presently the supported algorithms are qipc, gzip, snappy, lz4hc and zstd.
+ Supported algorithms are qipc, gzip, snappy, lz4hc and zstd.
These algorithms support different compression levels denoting the agressivness
of compression in each case.
@@ -70,22 +54,24 @@ class CompressionAlgorithm(Enum):
class Encrypt():
- def __init__(self, path=None, password=None):
+ def __init__(self, path: str = None, password: str = None) -> None:
"""
- Initialize a class object which is used to control the use of encryption with PyKX.
+ A class for controlling the use of encryption with PyKX.
Parameters:
- path: Location of a users encryption key file as an 'str' object
- password: Password which had been set for encryption file
+ path: Location of a user's encryption key file
+ password: Password for encryption file
+
+ Returns:
+ A `#!python None` object on successful invocation
Example:
- ```python
- >>> import pykx as kx
- >>> encrypt = kx.Encrypt('/path/to/mykey.key', 'mySuperSecretPassword')
- ```
+ ```python
+ >>> import pykx as kx
+ >>> encrypt = kx.Encrypt('/path/to/mykey.key', 'mySuperSecretPassword')
+ ```
"""
- _check_beta('Compression and Encryption')
self.loaded = False
path = Path(os.path.abspath(path))
if not os.path.isfile(path):
@@ -97,17 +83,22 @@ def __init__(self, path=None, password=None):
raise TypeError('Password must be supplied as a string')
self.password = password
- def load_key(self):
+ def load_key(self) -> None:
"""
- Load the encyption key within your process, note this will be a global load.
+ Load the encyption key from the file given during class initialization.
+ This overwrites the master key in the embedded q process. See
+ [here](https://code.kx.com/q/basics/internal/#-36-load-master-key) for details.
+
+ Returns:
+ A `#!python None` object on successful invocation
Example:
- ```python
- >>> import pykx as kx
- >>> encrypt = kx.Encrypt('/path/to/mykey.key', 'mySuperSecretPassword')
- >>> encrypt.load_key()
- ```
+ ```python
+ >>> import pykx as kx
+ >>> encrypt = kx.Encrypt('/path/to/mykey.key', 'mySuperSecretPassword')
+ >>> encrypt.load_key()
+ ```
"""
q('{-36!(hsym x;y)}', self.path, bytes(self.password, 'UTF-8'))
self.loaded = True
@@ -115,39 +106,40 @@ def load_key(self):
class Compress():
def __init__(self,
- algo=CompressionAlgorithm.none,
- block_size=2**17,
- level=None):
+ algo: CompressionAlgorithm = CompressionAlgorithm.none,
+ block_size: int = 2**17,
+ level: int = None
+ ) -> None:
"""
- Initialize a class object which is used to control encryption within PyKX.
+ A class object for controlling q compression with PyKX.
Parameters:
- algo: Compression algorithm to be used when applying compression,
- this must be one of:
-
- - `kx.CompressionAlgorithm.none`
- - `kx.CompressionAlgorithm.ipc`
- - `kx.CompressionAlgorithm.gzip`
- - `kx.CompressionAlgorithm.snappy`
- - `kx.CompressionAlgorithm.lz4hc`
-
- block_size: Must be a port of 2 between 12 and 20 denoting the pageSize or
- allocation granularity to 1MB, see
- [here](https://code.kx.com/q/kb/file-compression/#compression-parameters)
+ algo: Compression algorithm to use. This must be one of:
+
+ - `#!python kx.CompressionAlgorithm.none`
+ - `#!python kx.CompressionAlgorithm.ipc`
+ - `#!python kx.CompressionAlgorithm.gzip`
+ - `#!python kx.CompressionAlgorithm.snappy`
+ - `#!python kx.CompressionAlgorithm.lz4hc`
+
+ block_size: Must be a power of 2 between 12 and 20 denoting the pageSize or
+ allocation granularity to 1MB. Read [compression
+ parameters](https://code.kx.com/q/kb/file-compression/#compression-parameters)
for more information.
- level: The degree to which compression will be applied, when non zero values
- are supported for a supported algorithm larger values will result in
- higher compression ratios.
+ level: Compression level for the `#!python algo` parameter. Algorithms that support
+ non-zero values have higher compression ratios as the provided level increases.
+
+ Returns:
+ A `#!python None` object on successful invocation
Example:
- ```python
- >>> import pykx as kx
- >>> comp = kx.Compress(kx.CompressionAlgorithm.gzip, level=5)
- ```
+ ```python
+ >>> import pykx as kx
+ >>> comp = kx.Compress(kx.CompressionAlgorithm.gzip, level=5)
+ ```
"""
- _check_beta('Compression and Encryption')
self.algorithm = algo
if block_size & (block_size - 1):
raise ValueError(f'block_size must be a power of 2, not {block_size}')
@@ -164,27 +156,32 @@ def __init__(self,
f'algorithm. Valid range is {compression_range}')
self.compression_level = level
- def global_init(self, encrypt=False):
+ def global_init(self, encrypt: bool = False) -> None:
"""
- Globally initialise compression settings, when completed any persistence
- operation making use of `kx.q.set` will be compressed based on the user
- specified compression settings
+ Globally initialise compression settings. Once run, using `#!python kx.q.set` to
+ persist data to disk compresses the data based on specified compression settings.
+ Refer to [compression by
+ default](https://code.kx.com/q/kb/file-compression/#compression-by-default)
+ for more details.
Parameters:
- encrypt: A `kx.Encrypt` object denoting if and using what credentials
+ encrypt: A `#!python kx.Encrypt` object denoting if and using what credentials
encryption is to be applied.
+ Returns:
+ A `#!python None` object on successful invocation
+
Example:
- ```python
- >>> import pykx as kx
- >>> comp = kx.Compress(kx.CompressionAlgorithm.gzip, level=2)
- >>> kx.q.z.zd
- pykx.Identity(pykx.q('::'))
- >>> comp.global_init()
- >>> kx.q.z.zd
- pykx.LongVector(pykx.q('17 2 2'))
- ```
+ ```python
+ >>> import pykx as kx
+ >>> comp = kx.Compress(kx.CompressionAlgorithm.gzip, level=2)
+ >>> kx.q.z.zd
+ pykx.Identity(pykx.q('::'))
+ >>> comp.global_init()
+ >>> kx.q.z.zd
+ pykx.LongVector(pykx.q('17 2 2'))
+ ```
"""
if not self.encrypt:
if isinstance(encrypt, Encrypt):
diff --git a/src/pykx/config.py b/src/pykx/config.py
index 09f9acf..fdc8112 100644
--- a/src/pykx/config.py
+++ b/src/pykx/config.py
@@ -60,21 +60,25 @@ def _is_set(envvar):
pykx_config_locs = [Path('.'), pykx_config_location, Path.home()]
+pykx_config_locs = [path / '.pykx-config' for path in pykx_config_locs]
+pykx_config_locs = [os.path.abspath(path) for path in pykx_config_locs if os.path.isfile(path)]
+pykx_config_locs = list(set(pykx_config_locs))
+
for path in pykx_config_locs:
- config_path = path / '.pykx-config'
- if os.path.isfile(config_path):
- _pykx_config_content = toml.load(config_path)
- try:
- _pykx_profile_content = _pykx_config_content[pykx_config_profile]
- break
- except KeyError:
- print("Unable to locate specified 'PYKX_PROFILE': '" + pykx_config_profile + "' in file '" + config_path + "'") # noqa E501
+ _pykx_config_content = toml.load(path)
+ try:
+ _pykx_profile_content = _pykx_config_content[pykx_config_profile]
+ break
+ except KeyError:
+ print("Unable to locate specified 'PYKX_PROFILE': '" + pykx_config_profile + "' in file '" + str(path) + "'") # noqa E501
pykx_dir = Path(__file__).parent.resolve(strict=True)
os.environ['PYKX_DIR'] = str(pykx_dir)
-os.environ['PYKX_EXECUTABLE'] = sys.executable
-pykx_libs_dir = Path(pykx_dir/'lib'/'4-1-libs') if _is_enabled('PYKX_4_1_ENABLED') else Path(pykx_dir/'lib') # noqa
+pykx_executable = sys.executable
+os.environ['PYKX_EXECUTABLE'] = pykx_executable
+pykx_4_1 = _is_enabled('PYKX_4_1_ENABLED')
+pykx_libs_dir = Path(pykx_dir/'lib'/'4-1-libs') if pykx_4_1 else Path(pykx_dir/'lib') # noqa
pykx_lib_dir = Path(_get_config_value('PYKX_Q_LIB_LOCATION', pykx_libs_dir))
pykx_platlib_dir = pykx_lib_dir/q_lib_dir_name
lib_prefix = '' if system == 'Windows' else 'lib'
@@ -84,16 +88,22 @@ def _is_set(envvar):
'Windows': 'dll',
}[system]
-try:
- qhome = Path(_get_config_value('QHOME', pykx_lib_dir)).resolve(strict=True)
-except FileNotFoundError: # nocov
- # If QHOME and its fallback weren't set/valid, then q/Python must be
- # running in the same directory as q.k (and presumably other stuff one
- # would expect to find in QHOME).
- qhome = Path().resolve(strict=True)
+
+def _get_qhome():
+ try:
+ qhome = Path(_get_config_value('QHOME', pykx_lib_dir)).resolve(strict=True)
+ except FileNotFoundError: # nocov
+ # If QHOME and its fallback weren't set/valid, then q/Python must be
+ # running in the same directory as q.k (and presumably other stuff one
+ # would expect to find in QHOME).
+ qhome = Path().resolve(strict=True)
+ return qhome
+
+
+qhome = _get_qhome()
# License search
-_qlic = os.getenv('QLIC', '')
+_qlic = _get_config_value('QLIC', '')
_pwd = os.getcwd()
license_located = False
lic_path = ''
@@ -117,10 +127,29 @@ def _is_set(envvar):
if not license_located:
qlic = Path(qhome)
-qargs = tuple(shlex.split(_get_config_value('QARGS', '')))
+under_q = _is_enabled('PYKX_UNDER_Q')
+suppress_warnings = _is_enabled('PYKX_SUPPRESS_WARNINGS')
+
+_unsupported_qargs = {
+ '-p': 'PyKX running without a main loop, setting a port in this way is not supported',
+ '-t': 'PyKX running without a main loop, setting timers in this way has no effect'
+}
+
+def _check_qargs():
+ qargs = shlex.split(_get_config_value('QARGS', ''))
+ if (not under_q) and (not suppress_warnings):
+ for i in list(_unsupported_qargs.keys()):
+ if i in qargs:
+ warn(f"'{i}' argument unsupported in QARGS configuration: {_unsupported_qargs[i]}",
+ RuntimeWarning)
+ return tuple(qargs)
-def _license_install_B64(license, license_type):
+
+qargs = _check_qargs()
+
+
+def _license_install_B64(license, license_type): # pragma: no cover
try:
lic = base64.b64decode(license)
except base64.binascii.Error:
@@ -132,7 +161,7 @@ def _license_install_B64(license, license_type):
return True
-def _license_check(lic_type, lic_encoding, lic_variable):
+def _license_check(lic_type, lic_encoding, lic_variable): # pragma: no cover
license_content = None
lic_name = lic_type + '.lic'
lic_file = qlic / lic_name
@@ -151,6 +180,32 @@ def _license_check(lic_type, lic_encoding, lic_variable):
else:
return _license_install_B64(lic_encoding, lic_name)
+
+def _unlicensed_config(unlicensed_message):
+ choice = input('\nWould you like us to remember this choice? [Y/n]: ')
+ if choice in ('y', 'Y', ''):
+ fpath = Path(os.path.expanduser('~')) / '.pykx-config'
+ try:
+ os.access(fpath, os.W_OK)
+ except FileNotFoundError:
+ pass
+ except PermissionError:
+ raise PermissionError(f"You do not have sufficient permissions to write to: {fpath}")
+ if os.path.exists(fpath):
+ with open(fpath, 'r') as file:
+ data = toml.load(file)
+ else:
+ data = {'default': {}}
+ data['default']['PYKX_UNLICENSED'] = 'True'
+ with open(fpath, 'w') as file:
+ toml.dump(data, file)
+ print(f"\nConfiguration updated at: {fpath}.\n"
+ "Unlicensed mode now set as default behavior.")
+ else:
+ print(unlicensed_message)
+ os.environ['PYKX_UNLICENSED']='true'
+
+
def _license_install(intro=None, return_value=False, license_check=False, license_error=None): # noqa:
if license_check:
@@ -176,90 +231,99 @@ def _license_install(intro=None, return_value=False, license_check=False, licens
print(install_message)
return True
- modes_url = "https://code.kx.com/pykx/user-guide/advanced/modes.html"
personal_url = "https://kx.com/kdb-insights-personal-edition-license-download"
commercial_url = "https://kx.com/book-demo"
unlicensed_message = '\nPyKX unlicensed mode enabled. To set this as your default behavior '\
- "set the following environment variable PYKX_UNLICENSED='true'"\
- '\n\nFor more information on PyKX modes of operation, visit '\
- f'{modes_url}.\nTo apply for a PyKX license visit '\
- f'\n\n Personal License: {personal_url}'\
- '\n Commercial License: Contact your KX sales representative '\
- f'or sales@kx.com or apply on {commercial_url}'
+ "set the following environment variable PYKX_UNLICENSED='true'"
first_user = '\nThank you for installing PyKX!\n\n'\
'We have been unable to locate your license for PyKX. '\
'Running PyKX in unlicensed mode has reduced functionality.\n'\
- 'Would you like to continue with license installation? [Y/n]: '
+ 'Would you like to install a license? [Y/n]: '
+ root = 'C:\\path\\to\\' if platform.system() == 'Windows' else '~/path/to/'
continue_license = input(first_user if intro is None else intro)
if continue_license in ('n', 'N'):
- os.environ['PYKX_UNLICENSED']='true'
- print(unlicensed_message)
+ _unlicensed_config(unlicensed_message)
if return_value:
return False
elif continue_license in ('y', 'Y', ''):
- commercial = input('\nIs the intended use of this software for:'
- '\n [1] Personal use (Default)'
- '\n [2] Commercial use'
- '\nEnter your choice here [1/2]: ').strip().lower()
- if commercial not in ('1', '2', ''):
- raise Exception('User provided option was not one of [1/2]')
-
- personal = commercial in ('1', '')
-
- lic_url = personal_url if personal else commercial_url
- lic_type = 'kc.lic' if personal else 'k4.lic'
-
- if personal:
- redirect = input(f'\nTo apply for your PyKX license, navigate to {lic_url}.\n'
- 'Shortly after you submit your application, you will receive a '
- 'welcome email containing your license information.\n'
- 'Would you like to open this page? [Y/n]: ')
+ existing_license = input('\nDo you have access to an existing license for PyKX '
+ 'that you would like to use? [N/y]: ')
+ if existing_license in ('N', 'n', ''):
+ commercial = input('\nIs the intended use of this software for:'
+ '\n [1] Personal use (Default)'
+ '\n [2] Commercial use'
+ '\nEnter your choice here [1/2]: ').strip().lower()
+ if commercial not in ('1', '2', ''):
+ raise Exception('User provided option was not one of [1/2]')
+
+ personal = commercial in ('1', '')
+
+ lic_url = personal_url if personal else commercial_url
+ lic_type = 'kc.lic' if personal else 'k4.lic'
+
+ if personal:
+ redirect = input(f'\nTo apply for your PyKX license, navigate to {lic_url}.\n'
+ 'Shortly after you submit your application, you will receive a '
+ 'welcome email containing your license information.\n'
+ 'Would you like to open this page? [Y/n]: ')
+ else:
+ redirect = input('\nTo apply for your PyKX license, contact your '
+ 'KX sales representative or sales@kx.com.\n'
+ f'Alternately apply through {lic_url}.\n'
+ 'Would you like to open this page? [Y/n]: ')
+
+ if redirect.lower() in ('y', ''):
+ try:
+ webbrowser.open(lic_url)
+ time.sleep(2)
+ except BaseException:
+ raise Exception('Unable to open web browser')
+
+ install_type = input('\nPlease select the method you wish to use to activate your'
+ 'license:\n [1] Download the license file provided in your '
+ 'welcome email and input the file path (Default)'
+ '\n [2] Input the activation key (base64 encoded string) '
+ 'provided in your welcome email'
+ '\n [3] Proceed with unlicensed mode'
+ '\nEnter your choice here [1/2/3]: ').strip().lower()
+
+ if install_type not in ('1', '2', '3', ''):
+ raise Exception('User provided option was not one of [1/2/3]')
+
+ if install_type in ('1', ''):
+ license = input('\nProvide the download location of your license '
+ f'(for example, {root}{lic_type}) : ').strip()
+ download_location = os.path.expanduser(Path(license))
+
+ if not os.path.exists(download_location):
+ err_msg = f'Download location provided {download_location} does not exist.'
+ raise Exception(err_msg)
+
+ shutil.copy(download_location, qlic)
+ print(f'\nPyKX license successfully installed to: {qlic / lic_type}\n')
+ elif install_type == '2':
+
+ license = input('\nProvide your activation key (base64 encoded string) '
+ 'provided with your welcome email : ').strip()
+
+ _license_install_B64(license, lic_type)
+
+ print('\nPyKX license successfully installed to: {qlic / lic_type}\n') # noqa: E501
+ elif install_type == '3':
+ _unlicensed_config(unlicensed_message)
+ if return_value:
+ return False
else:
- redirect = input('\nTo apply for your PyKX license, contact your '
- 'KX sales representative or sales@kx.com.\n'
- f'Alternately apply through {lic_url}.\n'
- 'Would you like to open this page? [Y/n]: ')
-
- if redirect.lower() in ('y', ''):
- try:
- webbrowser.open(lic_url)
- time.sleep(2)
- except BaseException:
- raise Exception('Unable to open web browser')
-
- install_type = input('\nPlease select the method you wish to use to activate your license:'
- '\n [1] Download the license file provided in your welcome email and '
- 'input the file path (Default)'
- '\n [2] Input the activation key (base64 encoded string) provided in '
- 'your welcome email'
- '\n [3] Proceed with unlicensed mode'
- '\nEnter your choice here [1/2/3]: ').strip().lower()
-
- if install_type not in ('1', '2', '3', ''):
- raise Exception('User provided option was not one of [1/2/3]')
-
- if install_type in ('1', ''):
- license = input('\nProvide the download location of your license '
- f'(for example, ~/path/to/{lic_type}) : ').strip()
+ license = input('\nProvide the location of your license '
+ f'(for example, {root}) : ').strip()
download_location = os.path.expanduser(Path(license))
if not os.path.exists(download_location):
raise Exception(f'Download location provided {download_location} does not exist.')
shutil.copy(download_location, qlic)
- print('\nPyKX license successfully installed. Restart Python for this to take effect.\n') # noqa: E501
- elif install_type == '2':
-
- license = input('\nProvide your activation key (base64 encoded string) '
- 'provided with your welcome email : ').strip()
-
- _license_install_B64(license, lic_type)
-
- print('\nPyKX license successfully installed. Restart Python for this to take effect.\n') # noqa: E501
- elif install_type == '3':
- if return_value:
- return False
+ print('\nPyKX license successfully installed to: {qlic / lic_type}\n') # noqa: E501
else:
raise Exception('Invalid input provided please try again')
if return_value:
@@ -275,25 +339,25 @@ def _license_install(intro=None, return_value=False, license_check=False, licens
licensed = False
-under_q = _is_enabled('PYKX_UNDER_Q')
qlib_location = Path(_get_config_value('PYKX_Q_LIB_LOCATION', pykx_libs_dir))
pykx_threading = _is_enabled('PYKX_THREADING')
+
+_executable = 'q'
if platform.system() == 'Windows' and pykx_threading:
- pykx_threading = False
- warn('PYKX_THREADING is only supported on Linux / MacOS, it has been disabled.')
-no_sigint = _is_enabled('PYKX_NO_SIGINT', deprecated=True)
-no_pykx_signal = _is_enabled('PYKX_NO_SIGNAL')
+ _executable += '.exe'
+ if pykx_threading:
+ pykx_threading = False
+ warn('PYKX_THREADING is only supported on Linux / MacOS, it has been disabled.')
-if _is_enabled('PYKX_ENABLE_PANDAS_API', '--pandas-api'):
- warn('Usage of PYKX_ENABLE_PANDAS_API configuration variable was removed in '
- 'PyKX 2.0. Pandas API is permanently enabled. See: '
- 'https://code.kx.com/pykx/changelog.html#pykx-200')
+q_executable = _get_config_value('PYKX_Q_EXECUTABLE', shutil.which(_executable))
+no_pykx_signal = _is_enabled('PYKX_NO_SIGNAL')
-ignore_qhome = _is_enabled('IGNORE_QHOME', '--ignore-qhome', True) or _is_enabled('PYKX_IGNORE_QHOME') # noqa E501
-keep_local_times = _is_enabled('KEEP_LOCAL_TIMES', '--keep-local-times', True) or _is_enabled('PYKX_KEEP_LOCAL_TIMES') # noqa E501
+ignore_qhome = _is_enabled('PYKX_IGNORE_QHOME', '--ignore-qhome')
+keep_local_times = _is_enabled('PYKX_KEEP_LOCAL_TIMES')
max_error_length = int(_get_config_value('PYKX_MAX_ERROR_LENGTH', 256))
-if _is_enabled('PYKX_ALLOCATOR', '--pykxalloc'):
+allocator = _is_enabled('PYKX_ALLOCATOR', '--pykxalloc')
+if allocator:
if sys.version_info[1] <= 7:
raise PyKXWarning('A python version of at least 3.8 is required to use the PyKX allocators') # noqa nocov
k_allocator = False # nocov
@@ -305,14 +369,17 @@ def _license_install(intro=None, return_value=False, license_check=False, licens
k_gc = _is_enabled('PYKX_GC', '--pykxgc')
release_gil = _is_enabled('PYKX_RELEASE_GIL', '--release-gil')
use_q_lock = _get_config_value('PYKX_Q_LOCK', False)
-skip_under_q = _is_enabled('SKIP_UNDERQ', '--skip-under-q') or _is_enabled('PYKX_SKIP_UNDERQ')
+skip_under_q = _is_enabled('PYKX_SKIP_UNDERQ', '--skip-under-q')
no_qce = _is_enabled('PYKX_NOQCE', '--no-qce')
beta_features = _is_enabled('PYKX_BETA_FEATURES', '--beta')
load_pyarrow_unsafe = _is_enabled('PYKX_LOAD_PYARROW_UNSAFE', '--load-pyarrow-unsafe')
pykx_qdebug = _is_enabled('PYKX_QDEBUG', '--q-debug')
+pykx_debug_insights = _is_enabled('PYKX_DEBUG_INSIGHTS_LIBRARIES')
pandas_2 = pd.__version__.split('.')[0] == '2'
+jupyterq = _is_enabled('PYKX_JUPYTERQ')
+
def find_core_lib(name: str) -> Path:
suffix = '.dll' if system == 'Windows' else '.so'
@@ -335,6 +402,10 @@ def _set_keep_local_times(keep_local_times_):
keep_local_times = keep_local_times_
+def _get_qexecutable():
+ return _get_config_value('PYKX_Q_EXECUTABLE', shutil.which(_executable))
+
+
def _check_beta(feature_name, *, status=beta_features):
if status:
return None
diff --git a/src/pykx/constants.py b/src/pykx/constants.py
index edc3965..3f24260 100644
--- a/src/pykx/constants.py
+++ b/src/pykx/constants.py
@@ -5,3 +5,7 @@
INF_INT16 = 2**15 - 1
INF_INT32 = 2**31 - 1
INF_INT64 = 2**63 - 1
+
+INF_NEG_INT16 = -2**15 + 1
+INF_NEG_INT32 = -2**31 + 1
+INF_NEG_INT64 = -2**63 + 1
diff --git a/src/pykx/core.pyx b/src/pykx/core.pyx
index 546930a..79a7a39 100644
--- a/src/pykx/core.pyx
+++ b/src/pykx/core.pyx
@@ -8,7 +8,7 @@ import re
import sys
from . import beta_features
-from .util import num_available_cores
+from .util import add_to_config, num_available_cores
from .config import tcore_path_location, _is_enabled, _license_install, pykx_threading, _check_beta, _get_config_value, pykx_lib_dir, ignore_qhome, lic_path
@@ -46,7 +46,6 @@ def _normalize_qargs(user_args: List[str]) -> Tuple[bytes]:
*(x.encode() for i, x in enumerate(user_args) if i not in skip_indexes)
)
-
cdef int _qinit(int (*qinit)(int, char**, char*, char*, char*), qhome_str: str, qlic_str: str, ignore_qhome: bool, args: List[str]) except *:
normalized_args = _normalize_qargs(args)
cdef int argc = len(normalized_args)
@@ -216,35 +215,58 @@ def _link_qhome():
return
# Avoid recursion, but allow for the effective merger via symlinks of the directories under the
# lib dir that come with PyKX.
- for subdir in subdirs:
- # Remove old symlinks:
- with os.scandir(pykx_lib_dir/subdir) as dir_iter:
- for dir_entry in dir_iter:
- if dir_entry.is_symlink():
- os.unlink(dir_entry)
- # Add new symlinks:
- try:
- with os.scandir(qhome/subdir) as dir_iter:
+ if not ignore_qhome:
+ for subdir in subdirs:
+ # Remove old symlinks:
+ with os.scandir(pykx_lib_dir/subdir) as dir_iter:
for dir_entry in dir_iter:
- try:
- os.symlink(
- dir_entry,
- pykx_lib_dir/subdir/dir_entry.name,
- target_is_directory=dir_entry.is_dir()
- )
- except FileExistsError:
- pass # Skip files/dirs that would overwrite those that come with PyKX.
- except OSError as ex: #nocov
- # Making this a warning instead of an error is particularly important for
- # Windows, which essentially only lets admins create symlinks.
- warn('Unable to connect user QHOME to PyKX QHOME via symlinks\n' # nocov
- f'{ex}', # nocov
- PyKXWarning) # nocov
- return # nocov
- except FileNotFoundError:
- pass # Skip subdirectories of $QHOME that don't exist.
+ if dir_entry.is_symlink():
+ os.unlink(dir_entry)
+ # Add new symlinks:
+ try:
+ with os.scandir(qhome/subdir) as dir_iter:
+ for dir_entry in dir_iter:
+ try:
+ os.symlink(
+ dir_entry,
+ pykx_lib_dir/subdir/dir_entry.name,
+ target_is_directory=dir_entry.is_dir()
+ )
+ except FileExistsError:
+ pass # Skip files/dirs that would overwrite those that come with PyKX.
+ except OSError as ex: #nocov
+ # Making this a warning instead of an error is particularly important for
+ # Windows, which essentially only lets admins create symlinks.
+ warn('Unable to connect user QHOME to PyKX QHOME via symlinks.\n' # nocov
+ 'To permanently disable attempts to create symlinks you can\n' # nocov
+ '\t1. Set the environment variable "PYKX_IGNORE_QHOME" = True.\n' # nocov
+ '\t2. Update the file ".pykx.config" using kx.util.add_to_config({\'PYKX_IGNORE_QHOME\': True})\n' # nocov
+ f'Error: {ex}\n', # nocov
+ PyKXWarning) # nocov
+ return # nocov
+ except FileNotFoundError:
+ pass # Skip subdirectories of $QHOME that don't exist.
update_marker.touch()
+def check_subprocess_init(lib_path, qhome, qlic):
+ qinit_check = subprocess.run(
+ (str(Path(sys.executable).as_posix()), '-c', 'import pykx'),
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ text=True,
+ env={
+ **os.environ,
+ 'PYKX_QINIT_CHECK': ';'.join((
+ str(lib_path),
+ qhome,
+ str(qlic),
+ # Use the env var directly because `config.qargs` has already split the args.
+ os.environ.get('QARGS', ''),
+ )),
+ }
+ )
+ return qinit_check
+
cdef void (*init_syms)(char* x)
if not pykx_threading:
@@ -262,36 +284,15 @@ if not pykx_threading:
_q_handle = dlopen(_libq_path, RTLD_NOW | RTLD_GLOBAL)
licensed = False
else:
- if platform.system() == 'Windows': # nocov
- from ctypes.util import find_library # nocov
- if find_library("msvcr100.dll") is None: # nocov
- msvcrMSG = "Needed dependency msvcr100.dll missing. See: https://code.kx.com/pykx/getting-started/installing.html" # nocov
- if '--licensed' in qargs or _is_enabled('PYKX_LICENSED', '--licensed'): # nocov
- raise PyKXException(msvcrMSG) # nocov
- else: # nocov
- warn(msvcrMSG, PyKXWarning) # nocov
_core_q_lib_path = find_core_lib('q')
licensed = True
+ _qinit_unsuccessful = False
if not _is_enabled('PYKX_UNSAFE_LOAD', '--unsafeload'):
- _qinit_check_proc = subprocess.run(
- (str(Path(sys.executable).as_posix()), '-c', 'import pykx'),
- stdout=subprocess.PIPE,
- stderr=subprocess.STDOUT,
- text=True,
- env={
- **os.environ,
- 'PYKX_QINIT_CHECK': ';'.join((
- str(_core_q_lib_path),
- final_qhome,
- str(qlic),
- # Use the env var directly because `config.qargs` has already split the args.
- os.environ.get('QARGS', ''),
- )),
- }
- )
+ _qinit_check_proc = check_subprocess_init(_core_q_lib_path, final_qhome, qlic)
_qinit_output = ' ' + ' '.join(_qinit_check_proc.stdout.strip().splitlines(True))
_license_message = False
- if _qinit_check_proc.returncode: # Fallback to unlicensed mode
+ _qinit_unsuccessful = _qinit_check_proc.returncode
+ if _qinit_unsuccessful: # Fallback to unlicensed mode
if _qinit_output != ' ':
_capout_msg = f'Captured output from initialization attempt:\n{_qinit_output}'
_lic_location = f'License location used:\n{lic_path}'
@@ -332,6 +333,14 @@ if not pykx_threading:
_q_handle = dlopen(_libq_path, RTLD_NOW | RTLD_GLOBAL)
licensed = False
if licensed: # Start in licensed mode
+ if _qinit_unsuccessful and (not _is_enabled('PYKX_UNSAFE_LOAD', '--unsafeload')):
+ _qinit_check_proc = check_subprocess_init(_core_q_lib_path, final_qhome, qlic)
+ _qinit_output = ' ' + ' '.join(_qinit_check_proc.stdout.strip().splitlines(True))
+ licensed = False
+ if _qinit_check_proc.returncode: # Fallback to unlicensed mode
+ _qinit_args = {'qhome': final_qhome, 'qlic': qlic, 'ignore_qhome': ignore_qhome, 'qargs': list(qargs)}
+ raise PyKXException(f'Non-zero qinit following license install with configuration: {_qinit_args}\n'
+ f'failed with output: {_qinit_output}')
if 'QHOME' in os.environ and not ignore_qhome:
# Only link the user's QHOME to PyKX's QHOME if the user actually set $QHOME.
# Note that `pykx.qhome` has a default value of `./q`, as that is the behavior
@@ -363,7 +372,6 @@ else:
init_syms = dlsym(_q_handle, 'sym_init')
init_syms(_libq_path)
qinit = dlsym(_q_handle, 'q_init')
-
qinit_return_code = _qinit(qinit, final_qhome, str(qlic), ignore_qhome, list(qargs))
if qinit_return_code: # nocov
dlclose(_q_handle) # nocov
diff --git a/src/pykx/ctx.py b/src/pykx/ctx.py
index 1c0ac92..dc8266a 100644
--- a/src/pykx/ctx.py
+++ b/src/pykx/ctx.py
@@ -1,6 +1,4 @@
-"""Interface to q contexts and scripts which define a namespace.
-
-The context interface provides an easy to way access q contexts (also known as namespaces when at
+"""The context interface provides an easy to way access q contexts (also known as namespaces when at
the top level). For more information about contexts/namespaces in q please refer to
[Chapter 12 of Q for Mortals](https://code.kx.com/q4m3/12_Workspace_Organization/).
"""
@@ -37,7 +35,9 @@ def __dir__():
# still allowing the current directory to be removed from the search path by altering the module
# paths attribute.
class CurrentDirectory(type(Path())):
- """``pathlib.Path`` instance for the current directory regardless of directory changes."""
+ """`#!python pathlib.Path` instance for the current directory regardless
+ of directory changes.
+ """
def __init__(self):
super().__init__()
@@ -70,12 +70,14 @@ class QContext:
def __init__(self, q: Q, name: str, parent: QContext, no_ctx=False):
"""Interface to a q context.
- Members of the context be accessed as if the `QContext` object was a dictionary, or by
- dotting off of the `QContext` object.
+ Members of the context be accessed as if the `#!python QContext` object was a dictionary,
+ or by dotting off of the `#!python QContext` object.
+ Parameters:
q: The q instance in which the context exists.
name: The name of the context.
- parent: The parent context as a `QContext`, or `None` in the case of the global context.
+ parent: The parent context as a `#!python QContext`,
+ or `#!python None` in the case of the global context.
"""
super().__setattr__('_q', q)
super().__setattr__('_name', name)
@@ -118,12 +120,20 @@ def __getattr__(self, key): # noqa
return ZContext(proxy(self))
elif self._fqn in {'', '.q'} and key in self._unsupported_keys_with_msg:
raise AttributeError(f'{key}: {self._unsupported_keys_with_msg[key]}')
- if self._fqn in {'', '.q'} and key in self._q.reserved_words:
- # Reserved words aren't actually part of the `.q` context dict
- if 'QConnection' in str(self._q._call):
- return lambda *args: self._q._call(key, *args, wait=True)
- else:
- return self._q._call(key, wait=True)
+ if self._fqn in {'', '.q'}:
+ func = None
+ if key in self._q.reserved_words:
+ func = key
+ elif key in list(self._q.operators.keys()):
+ func = self._q.operators[key]
+ if func is not None:
+ # Reserved words aren't actually part of the `.q` context dict
+ if 'QConnection' in str(self._q._call):
+ return lambda *args: self._q._call(func, *args, wait=True)
+ else:
+ fn = self._q._call(func, wait=True)
+ fn._name = key
+ return fn
if 'no_ctx=True' in str(self.__dict__['_q']) or self.no_ctx:
raise PyKXException('Attempted to use context interface after disabling it.')
fqn_with_key = f'{self._fqn}.{key}'
@@ -191,10 +201,10 @@ def __dir__(self):
class ZContext(QContext):
- """Special interface to handle the .z context.
+ """Special interface to handle the `#!q .z` context.
- The .z context in q is not a normal context; it lacks a dictionary. To access it one must
- access its attributes directly.
+ The `#!q .z` context in q is not a normal context; it lacks a dictionary. To access it users
+ must access its attributes directly.
"""
_no_default = ('ac', 'bm', 'exit', 'pc', 'pd', 'ph', 'pi', 'pm',
'po', 'pp', 'pq', 'ps', 'pw', 'vs', 'wc', 'wo', 'ws', 'zd')
diff --git a/src/pykx/db.py b/src/pykx/db.py
index dfc0f1a..40d6322 100644
--- a/src/pykx/db.py
+++ b/src/pykx/db.py
@@ -1,30 +1,21 @@
-"""Functionality for the interaction with and management of databases.
-
-!!! Warning
-
- This functionality is provided in it's present form as a BETA
- Feature and is subject to change. To enable this functionality
- for testing please following configuration instructions
- [here](../user-guide/configuration.md) setting `PYKX_BETA_FEATURES='true'`
+"""
+_This page documents the API for managing kdb+ databases using PyKX._
"""
from .exceptions import QError
from . import wrappers as k
-from . import beta_features
-from .config import _check_beta
+from .config import pykx_4_1
from .compress_encrypt import Compress, Encrypt
import os
from pathlib import Path
-from typing import Union
+from typing import Any, Optional, Union
from warnings import warn
__all__ = [
'DB',
]
-beta_features.append('Database Management')
-
def _init(_q):
global q
@@ -83,61 +74,131 @@ class _TABLES:
class DB(_TABLES):
- """Singleton class used for the management of kdb+ Databases"""
_instance = None
_init_tabs = None
+ _dir_cache = None
+ _change_dir = True
+ _load_script = True
path = None
tables = None
table = _TABLES
loaded = False
- def __new__(cls, *, path=None):
+ def __new__(cls,
+ *,
+ path: Optional[Union[str, Path]] = None,
+ change_dir: Optional[bool] = True,
+ load_scripts: Optional[bool] = True
+ ) -> None:
+ if cls._dir_cache is None:
+ cls._dir_cache = dir(cls)
if cls._instance is None:
cls._instance = super(DB, cls).__new__(cls)
return cls._instance
- def __init__(self, *, path=None):
- _check_beta('Database Management')
+ def __init__(self,
+ *,
+ path: Optional[Union[str, Path]] = None,
+ change_dir: Optional[bool] = True,
+ load_scripts: Optional[bool] = True
+ ) -> None:
+ """
+ Initialize a database class used within your process. This is a singleton class from
+ which all interactions with your database will be made. On load if supplied
+ with a 'path' this functionality will attempt to load the database at this location.
+ If no database exists at this location the path supplied will be used when a new
+ database is created.
+
+ Parameters:
+ path: The location at which your database is/will be located.
+ change_dir: Should the working directory be changed to the location of the
+ loaded database, for q 4.0 this is the only supported behavior, please
+ set `PYKX_4_1_ENABLED` to allow use if this functionality.
+ load_scripts: Should any q scripts find in the database directory be loaded,
+ for q 4.0 this is the only supported behavior, please set
+ `PYKX_4_1_ENABLED` to allow use if this functionality.
+
+ Returns:
+ A database class which can be used to interact with a partitioned database.
+
+ Examples:
+
+ Load a partitioned database at initialization
+
+ ```python
+ >>> import pykx as kx
+ >>> db = kx.DB(path = '/tmp/db')
+ >>> db.tables
+ ['quote', 'trade']
+ ```
+
+ Define the path to be used for a database which does not initially exist
+
+ ```python
+ >>> import pykx as kx
+ >>> db = kx.DB(path = 'db')
+ >>> db.tables
+ >>> db.path
+ PosixPath('/usr/projects/pykx/db')
+ ```
+ """
+ self._change_dir = change_dir
+ self._load_scripts = load_scripts
+ if not pykx_4_1:
+ if not change_dir:
+ raise QError("'change_dir' behavior only supported with PYKX_4_1_ENABLED")
+ if not load_scripts:
+ raise QError("'load_scripts' behavior only supported with PYKX_4_1_ENABLED")
if path is not None:
try:
- self.load(path)
+ self.load(path, change_dir=self._change_dir, load_scripts=self._load_scripts)
except BaseException:
self.path = Path(os.path.abspath(path))
pass
- def create(self, table, table_name, partition, *, # noqa: C901
- by_field=None, sym_enum=None, log=True,
- compress=None, encrypt=None):
+ def create(self,
+ table: k.Table,
+ table_name: str,
+ partition: Union[int, str, k.DateAtom],
+ *, # noqa: C901
+ by_field: Optional[str] = None,
+ sym_enum: Optional[str] = None,
+ log: Optional[bool] = True,
+ compress: Optional[Compress] = None,
+ encrypt: Optional[Encrypt] = None,
+ change_dir: Optional[bool] = True,
+ load_scripts: Optional[bool] = True
+ ) -> None:
"""
Create an on-disk partitioned table within a kdb+ database from a supplied
- `pykx.Table` object. Once generated this table will be accessible
- as an attribute of the `DB` class or a sub attribute of `DB.table`.
+ `#!python pykx.Table` object. Once generated this table will be accessible
+ as an attribute of the `#!python DB` class or a sub attribute of `#!python DB.table`.
Parameters:
- table: The `pykx.Table` object which is to be persisted to disk
+ table: The `#!python pykx.Table` object which is to be persisted to disk
table_name: The name with which the table will be persisted and accessible
- once loaded and available as a `pykx.PartitionedTable`
+ once loaded and available as a `#!python pykx.PartitionedTable`
partition: The name of the column which is to be used to partition the data if
- supplied as a `str` or if supplied as non string object this will be used as
- the partition to which all data is persisted
+ supplied as a `#!python str` or if supplied as non string object this is
+ used as the partition to which all data is persisted.
by_field: A field of the table to be used as a by column, this column will be
the second column in the table (the first being the virtual column determined
by the partitioning column)
sym_enum: The name of the symbol enumeration table to be associated with the table
- log: Print information about status of partitioned datab
- compress: `pykx.Compress` initialized class denoting the
- compression settings to be used when persisting a partition/partitions
- encrypt: `pykx.Encrypt` initialized class denoting the encryption setting to be used
- when persisting a partition/partitions
+ log: Print information about status while persisting the partitioned database
+ compress: `#!python pykx.Compress` initialized class denoting the compression settings
+ to be used when persisting a partition/partitions
+ encrypt: `#!python pykx.Encrypt` initialized class denoting the encryption setting
+ to be used when persisting a partition/partitions
Returns:
- A `None` object on successful invocation, the database class will be
+ A `#!python None` object on successful invocation, the database class is
updated to contain attributes associated with the available created table
Examples:
- Generate a partitioned table from a table containing multiple partitions
+ Generate a partitioned database from a table containing multiple partitions.
```python
>>> import pykx as kx
@@ -165,8 +226,7 @@ def create(self, table, table_name, partition, *, # noqa: C901
'))
```
- Add a table as a partition to an on-disk database, in the example below we are adding
- a partition to the table generated above
+ Add a table as a partition to an on-disk database.
```python
>>> import pykx as kx
@@ -193,8 +253,8 @@ def create(self, table, table_name, partition, *, # noqa: C901
'))
```
- Add a table as a partition to an on-disk database, in the example below we are
- additionally applying gzip compression to the persisted table
+ Add a table as a partition to an on-disk database and apply gzip
+ compression to the persisted table
```python
>>> import pykx as kx
@@ -263,23 +323,41 @@ def create(self, table, table_name, partition, *, # noqa: C901
raise QError(err)
q('{![`.;();0b;enlist x]}', table_name)
q.z.zd = compression_cache
- self.load(self.path, overwrite=True)
+ if change_dir is None:
+ change_dir = self._change_dir
+ if load_scripts is None:
+ load_scripts = self._load_scripts
+ self.load(self.path, overwrite=True, change_dir=change_dir, load_scripts=load_scripts)
return None
- def load(self, path: Union[Path, str], *, overwrite=False, encrypt=None):
+ def load(self,
+ path: Union[Path, str],
+ *,
+ change_dir: Optional[bool] = True,
+ load_scripts: Optional[bool] = True,
+ overwrite: Optional[bool] = False,
+ encrypt: Optional[Encrypt] = None
+ ) -> None:
"""
- Load the tables associated with a kdb+ Database, once loaded a table
- is accessible as an attribute of the `DB` class or a sub attribute
- of `DB.table`. Note that can alternatively be called when providing a path
+ Load the tables associated with a kdb+ database. Once loaded, a table
+ is accessible as an attribute of the `#!python DB` class or a sub-attribute
+ of `#!python DB.table`. Note this can alternatively be called when providing a path
on initialisation of the DB class.
Parameters:
path: The file system path at which your database is located
+ change_dir: Should the working directory be changed to the location of the
+ loaded database, for q 4.0 this is the only supported behavior, please
+ set `PYKX_4_1_ENABLED` to allow use if this functionality.
+ load_scripts: Should any q scripts find in the database directory be loaded,
+ for q 4.0 this is the only supported behavior, please set
+ `PYKX_4_1_ENABLED` to allow use if this functionality.
overwrite: Should loading of the database overwrite any currently
loaded databases
+ encrypt: The encryption key object to be loaded prior to database load
Returns:
- A `None` object on successful invocation, the database class will be
+ A `#!python None` object on successful invocation, the database class is
updated to contain attributes associated with available tables
Examples:
@@ -365,19 +443,35 @@ def load(self, path: Union[Path, str], *, overwrite=False, encrypt=None):
raise ValueError('Supplied encrypt object not an instance of pykx.Encrypt')
if not encrypt.loaded:
encrypt.load_key()
- q('''
- {[dbpath]
- @[system"l ",;
- 1_string dbpath;
- {'"Failed to load Database with error: ",x}
- ]
- }
- ''', load_path)
+ if pykx_4_1:
+ q('''
+ {[path;cd;ld]
+ .[.Q.lo;
+ (`$1_string path;cd;ld);
+ {'"Failed to load Database with error: ",x}
+ ]
+ }
+ ''', load_path, change_dir, load_scripts)
+ else:
+ if not change_dir:
+ raise QError("'change_dir' behavior only supported with PYKX_4_1_ENABLED")
+ if not load_scripts:
+ raise QError("'load_scripts' behavior only supported with PYKX_4_1_ENABLED")
+ db_path = load_path.parent
+ db_name = os.path.basename(load_path)
+ q('''
+ {[dbpath;dbname]
+ .[.pykx.util.loadfile;
+ (1_string dbpath;string dbname);
+ {'"Failed to load Database with error: ",x}
+ ]
+ }
+ ''', db_path, db_name)
self.path = load_path
self.loaded = True
self.tables = q.Q.pt.py()
for i in self.tables:
- if hasattr(self, i):
+ if i in self._dir_cache:
warn(f'A database table "{i}" would overwrite one of the pykx.DB() methods, please access your table via the table attribute') # noqa: E501
else:
setattr(self, i, q[i])
@@ -386,19 +480,26 @@ def load(self, path: Union[Path, str], *, overwrite=False, encrypt=None):
def _reload(self):
_check_loading(self, None, None)
- return self.load(self.path, overwrite=True)
-
- def rename_column(self, table, original_name, new_name):
+ return self.load(self.path,
+ overwrite=True,
+ change_dir=self._change_dir,
+ load_scripts=self._load_scripts)
+
+ def rename_column(self,
+ table: str,
+ original_name: str,
+ new_name: str
+ ) -> None:
"""
Rename a column within a loaded kdb+ Database
Parameters:
- table: The name of the table within which a column is to be renamed
+ table: The name of the table containing the column to be renamed
original_name: Name of the column which is to be renamed
- new_name: Column name which will be used as the new column name
+ new_name: Updated column name
Returns:
- A `None` object on successful invocation, the database class will be
+ A `#!python None` object on successful invocation, the database class is```
updated and column rename actioned.
Examples:
@@ -414,17 +515,8 @@ def rename_column(self, table, original_name, new_name):
>>> db.list_columns('testTable')
['month', 'sym', 'time', 'price', 'size']
>>> db.rename_column('testTable', 'sym', 'symbol')
- >>> db.testTable
- pykx.PartitionedTable(pykx.q('
- month symbol time price size
- ---------------------------------------
- 2020.01 FDP 00:00:00.004 90.94738 12
- 2020.01 FDP 00:00:00.005 33.81127 15
- 2020.01 FDP 00:00:00.027 88.89853 16
- 2020.01 FDP 00:00:00.035 78.33244 9
- 2020.01 JPM 00:00:00.055 68.65177 1
- ..
- '))
+ >>> db.list_columns('testTable')
+ ['month', 'symbol', 'time', 'price', 'size']
```
"""
_check_loading(self, table, 'Column rename')
@@ -433,16 +525,16 @@ def rename_column(self, table, original_name, new_name):
self._reload()
return None
- def delete_column(self, table, column):
+ def delete_column(self, table: str, column: str) -> None:
"""
- Delete the column of a loaded kdb+ Database
+ Delete a column from a loaded kdb+ Database.
Parameters:
- table: The name of the table within which a column is to be deleted
- column: Column which is to be deleted from the database
+ table: The name of the table containing the column to be deleted
+ column: Name of the column which is to be deleted from the table
Returns:
- A `None` object on successful invocation, the database class will be
+ A `#!python None` object on successful invocation, the database class is
updated and specified column deleted
Examples:
@@ -458,17 +550,8 @@ def delete_column(self, table, column):
>>> db.list_columns('testTable')
['month', 'sym', 'time', 'price', 'size']
>>> db.delete_column('testTable', 'size')
- >>> db.testTable
- pykx.PartitionedTable(pykx.q('
- month symbol time price
- -------------------------------------
- 2020.01 FDP 00:00:00.004 90.94738
- 2020.01 FDP 00:00:00.005 33.81127
- 2020.01 FDP 00:00:00.027 88.89853
- 2020.01 FDP 00:00:00.035 78.33244
- 2020.01 JPM 00:00:00.055 68.65177
- ..
- '))
+ >>> db.list_columns('testTable')
+ ['month', 'sym', 'time', 'price']
```
"""
_check_loading(self, table, 'Column deletion')
@@ -477,7 +560,7 @@ def delete_column(self, table, column):
self._reload()
return None
- def rename_table(self, original_name, new_name):
+ def rename_table(self, original_name: str, new_name: str) -> None:
"""
Rename a table within a loaded kdb+ Database
@@ -486,7 +569,7 @@ def rename_table(self, original_name, new_name):
new_name: Updated table name
Returns:
- A `None` object on successful invocation, the database class will be
+ A `#!python None` object on successful invocation, the database class is
updated, original table name deleted from q memory and new table
accessible
@@ -512,15 +595,15 @@ def rename_table(self, original_name, new_name):
self._reload()
return None
- def list_columns(self, table):
+ def list_columns(self, table: str) -> None:
"""
List the columns of a table within a loaded kdb+ Database
Parameters:
- table: The name of the table whose columns are listed
+ table: The name of the table whose columns are to be listed
Returns:
- A list of strings defining the columns of a table
+ A list of strings defining the columns of the table
Examples:
@@ -539,7 +622,11 @@ def list_columns(self, table):
_check_loading(self, table, 'Column listing')
return q.dbmaint.listcols(self.path, table).py()
- def add_column(self, table, column_name, default_value):
+ def add_column(self,
+ table: str,
+ column_name: str,
+ default_value: Any
+ ) -> None:
"""
Add a column to a table within a loaded kdb+ Database
@@ -554,7 +641,8 @@ def add_column(self, table, column_name, default_value):
Examples:
- Add a column to a table within a partitioned database
+ Add a column to a table within a partitioned database where all items are
+ an integer null
```python
>>> import pykx as kx
@@ -574,7 +662,7 @@ def add_column(self, table, column_name, default_value):
self._reload()
return(None)
- def find_column(self, table, column_name):
+ def find_column(self, table: str, column_name: str) -> None:
"""
Functionality for finding a column across partitions within a loaded kdb+ Database
@@ -583,9 +671,9 @@ def find_column(self, table, column_name):
column_name: The name of the column to be found within a table
Returns:
- A `None` object on successful invocation printing search status per partition,
- if a column does not exist in a specified partition an error will be raised
- and the logs will indicate which columns did not have the specified column.
+ A `#!python None` object on successful invocation printing search status per partition.
+ If a column does not exist in a specified partition, an error is raised
+ and the logs indicate which columns did not contain the specified column.
Examples:
@@ -625,7 +713,7 @@ def find_column(self, table, column_name):
_check_loading(self, table, 'Finding columns')
return q.dbmaint.findcol(self.path, table, column_name).py()
- def reorder_columns(self, table, new_order):
+ def reorder_columns(self, table: str, new_order: list) -> None:
"""
Reorder the columns of a persisted kdb+ database
@@ -634,7 +722,7 @@ def reorder_columns(self, table, new_order):
new_order: The ordering of the columns following update
Returns:
- A `None` object on successfully updating the columns of the database
+ A `#!python None` object on successfully updating the columns of the database
Examples:
@@ -662,7 +750,7 @@ def reorder_columns(self, table, new_order):
q.dbmaint.reordercols(self.path, table, new_order)
return None
- def set_column_attribute(self, table, column_name, new_attribute):
+ def set_column_attribute(self, table: str, column_name: str, new_attribute: str) -> None:
"""
Set an attribute associated with a column for an on-disk database
@@ -670,10 +758,11 @@ def set_column_attribute(self, table, column_name, new_attribute):
table: The name of the table within which an attribute will be set
column_name: Name of the column to which the attribute will be applied
new_attribute: The attribute which is to be applied, this can be one of
- 'sorted'/'u', 'partitioned'/'p', 'unique'/'u' or 'grouped'/'g'.
+ `#!python 'sorted'`/`#!python 's'`, `#!python 'partitioned'`/`#!python 'p'`,
+ `#!python 'unique'`/`#!python 'u'` or `#!python 'grouped'`/`#!python 'g'`.
Returns:
- A `None` object on successfully setting the attribute for a column
+ A `#!python None` object on successfully setting the attribute for a column
Examples:
@@ -720,7 +809,7 @@ def set_column_attribute(self, table, column_name, new_attribute):
q.dbmaint.setattrcol(self.path, table, column_name, new_attribute)
return None
- def set_column_type(self, table, column_name, new_type):
+ def set_column_type(self, table: str, column_name: str, new_type: k.K) -> None:
"""
Convert/set the type of a column to a specified type
@@ -730,7 +819,7 @@ def set_column_type(self, table, column_name, new_type):
new_type: PyKX type to which a column is to be converted
Returns:
- A `None` object on successfully updating the type of the column
+ A `#!python None` object on successfully updating the type of the column
Examples:
@@ -777,7 +866,7 @@ def set_column_type(self, table, column_name, new_type):
self._reload()
return None
- def clear_column_attribute(self, table, column_name):
+ def clear_column_attribute(self, table: str, column_name: str) -> None:
"""
Clear an attribute associated with a column of an on-disk database
@@ -786,7 +875,7 @@ def clear_column_attribute(self, table, column_name):
column_name: Name of the column from which an attribute will be removed
Returns:
- A `None` object on successful removal of the attribute of a column
+ A `#!python None` object on successful removal of the attribute of a column
Examples:
@@ -824,7 +913,7 @@ def clear_column_attribute(self, table, column_name):
q.dbmaint.clearattrcol(self.path, table, column_name)
return None
- def copy_column(self, table, original_column, new_column):
+ def copy_column(self, table: str, original_column: str, new_column: str) -> None:
"""
Create a copy of a column within a table
@@ -834,7 +923,7 @@ def copy_column(self, table, original_column, new_column):
new_column: Name of the copied column
Returns:
- A `None` object on successful column copy, reloading the
+ A `#!python None` object on successful column copy, reloading the
database following column copy
Examples:
@@ -857,18 +946,18 @@ def copy_column(self, table, original_column, new_column):
self._reload()
return None
- def apply_function(self, table, column_name, function):
+ def apply_function(self, table: str, column_name: str, function: callable) -> None:
"""
Apply a function per partition on a column of a persisted kdb+ database
Parameters:
table: Name of the table
column_name: Name of the column on which the function is to be applied
- function: Callable function to be applied on a column vector per column
+ function: Callable function to be applied on a column vector per partition
Returns:
- A `None` object on successful application of a function to the column
- and the reloading of the database
+ A `#!python None` object on successful application of a function to the column
+ and reloading of the database
Examples:
@@ -942,14 +1031,14 @@ def apply_function(self, table, column_name, function):
self._reload()
return None
- def fill_database(self):
+ def fill_database(self) -> None:
"""
Fill missing tables from partitions within a database using the
most recent partition as a template, this will report the
partitions but not the tables which are being filled.
Returns:
- A `None` object on successful filling of missing tables in
+ A `#!python None` object on successful filling of missing tables in
partitioned database
Examples:
@@ -979,25 +1068,25 @@ def fill_database(self):
self._reload()
return None
- def partition_count(self, *, subview=None):
+ def partition_count(self, *, subview: Optional[list] = None) -> k.Dictionary:
"""
Count the number of rows per partition for the presently loaded database.
- Use of the parameter `subview` can allow users to count only the rows
- in specifies partitions.
+ Use of the parameter `#!python subview` can allow users to count only the rows
+ in specified partitions.
Parameters:
subview: An optional list of partitions from which to retrieve the per partition
count
Returns:
- A `pykx.Dictionary` object showing the count of data in each table within
+ A `#!python pykx.Dictionary` object showing the count of data in each table within
the presently loaded partioned database.
!!! Warning
- Using this function will result in any specified `subview` of the data being reset,
- if you require the use of a subview for queries please reset using the database
- `subview` command.
+ Using this function results in any specified `#!python subview` of the data
+ being reset, if you require the use of a subview for queries please set using
+ the database `#!python subview` command.
Examples:
@@ -1041,18 +1130,18 @@ def partition_count(self, *, subview=None):
q.Q.view()
return res
- def subview(self, view=None):
+ def subview(self, view: list = None) -> None:
"""
Specify the subview to be used when querying a partitioned table
Parameters:
view: A list of partition values which will serve as a filter
for all queries against any partitioned table within the
- database. If view is supplied as `None` this will reset
+ database. If view is supplied as `#!python None` this resets
the query view to all partitions
Returns:
- A `None` object on successful setting of the view state
+ A `#!python None` object on successful setting of the view state
Examples:
@@ -1095,24 +1184,23 @@ def subview(self, view=None):
q.Q.view(view)
return None
- def enumerate(self, table, *, sym_file=None):
+ def enumerate(self, table: str, *, sym_file: Optional[str] = None) -> k.Table:
"""
Perform an enumeration on a user specified table against the
current sym files associated with the database
Parameters:
- path: The folder location to which your table will be persisted
- table: The `pykx.Table` object which is to be persisted to disk
+ table: The `#!python pykx.Table` object which is to be persisted to disk
and which is to undergo enumeration
sym_file: The name of the sym file contained in the folder specified by
- the `path` parameter against which enumeration will be completed
+ the `#!python path` parameter against which enumeration will be completed
Returns:
The supplied table with enumeration applied
Examples:
- Enumerate the symbol columns of a table without specifying the `sym` file
+ Enumerate the symbol columns of a table without specifying the `#!python sym_file`
```python
>>> import pykx as kx
@@ -1128,7 +1216,7 @@ def enumerate(self, table, *, sym_file=None):
pykx.EnumVector(pykx.q('`sym$`a`b`a`c`b..'))
```
- Enumerate the symbol columns of a table specifying the `sym` file used
+ Enumerate the symbol columns of a table specifying the `#!python sym_file`
```python
>>> import pykx as kx
diff --git a/src/pykx/embedded_q.py b/src/pykx/embedded_q.py
index 86b6d81..68399ce 100644
--- a/src/pykx/embedded_q.py
+++ b/src/pykx/embedded_q.py
@@ -2,6 +2,7 @@
import os
from pathlib import Path
import sys
+import platform
from typing import Any, Optional, Union
from warnings import warn
@@ -11,7 +12,7 @@
from . import toq
from . import wrappers
from . import schema
-from .config import find_core_lib, licensed, no_qce, pykx_dir, pykx_qdebug, pykx_threading, qargs, skip_under_q # noqa
+from .config import find_core_lib, licensed, no_qce, pykx_dir, pykx_qdebug, pykx_threading, qargs, skip_under_q, suppress_warnings, pykx_debug_insights # noqa
from .core import keval as _keval
from .exceptions import FutureCancelled, LicenseException, NoResults, PyKXException, PyKXWarning, QError # noqa
from ._wrappers import _factory as factory
@@ -114,9 +115,8 @@ def __call__(self):
class EmbeddedQ(Q, metaclass=ABCMetaSingleton):
- """Interface for q within the current process; can be called to execute q code."""
+ """Interface for using q within the current python process. Call this to execute q code."""
def __init__(self): # noqa
-
if licensed:
kxic_path = (pykx_dir/'lib').as_posix()
kxic_file = 'kxic.k'
@@ -125,6 +125,8 @@ def __init__(self): # noqa
code = ''
code += '''
.pykx.util.loadfile:{[folder;file]
+ path:$[.z.o like "w*";"\\\\";"/"] sv ((),folder;(),file);
+ if[not " " in path;:system"l ",path];
cache:system"cd";
system"cd ",folder;
folder:system"cd";
@@ -136,6 +138,7 @@ def __init__(self): # noqa
$[res[0];'res[1];res[1]]
};
'''
+ code += f'.pykx.util.loadfile["{kxic_path}";"html.q"];'
if not no_qce:
code += f'''
if[not `comkxic in key `;
@@ -146,21 +149,20 @@ def __init__(self): # noqa
os.environ['PYKX_UNDER_PYTHON'] = 'true'
code += 'setenv[`PYKX_UNDER_PYTHON;"true"];'
code += f'2:[`$"{pykx_qlib_path}";(`k_pykx_init; 2)][`$"{find_core_lib("q").as_posix()}";{"1b" if pykx_threading else "0b"}];' # noqa: E501
- code += f'`.pykx.i.pyfunc set (`$"{pykx_qlib_path}") 2: (`k_pyfunc; 2);'
code += f'`.pykx.modpow set {{((`$"{pykx_qlib_path}") 2: (`k_modpow; 3))["j"$x;"j"$y;$[z~(::);(::);"j"$z]]}};' # noqa: E501
else:
code += f'2:[`$"{pykx_qlib_path}q";(`k_pykx_init; 2)][`$"{find_core_lib("q").as_posix()}";{"1b" if pykx_threading else "0b"}];' # noqa: E501
code += f'`.pykx.modpow set {{((`$"{pykx_qlib_path}q") 2: (`k_modpow; 3))["j"$x;"j"$y;$[z~(::);(::);"j"$z]]}};' # noqa: E501
- if pykx_threading:
+ if pykx_threading and (not suppress_warnings):
warn('pykx.q is not supported when using PYKX_THREADING.')
code += '@[get;`.pykx.i.kxic.loadfailed;{()!()}]'
kxic_loadfailed = self._call(code, skip_debug=True).py()
- if (not no_qce) and ('--no-sql' not in qargs):
+ if (not platform.system() == "Linux") and (not no_qce) and ('--no-sql' not in qargs):
sql = self._call('$[("insights.lib.sql" in " " vs .z.l 4)¬ `s in key`; @[system; "l s.k_";{x}];::]', skip_debug=True).py() # noqa: E501
if sql is not None:
kxic_loadfailed['s.k'] = sql
for lib, msg in kxic_loadfailed.items():
- if os.getenv('PYKX_DEBUG_INSIGHTS_LIBRARIES'):
+ if pykx_debug_insights:
warn(f'Failed to load KX Insights Core library {lib!r}: {msg.decode()}',
PyKXWarning)
else:
@@ -208,21 +210,17 @@ def __call__(self,
skip_debug: bool = False,
**kwargs # since sync got removed this is added to ensure it doesn't break
) -> wrappers.K:
- """Run code in the q instance.
+ """Run q code in the q instance.
Parameters:
query: The code to run in the q instance.
- *args: Arguments to the q query. Each argument will be converted into a `pykx.K` object.
- Up to 8 arguments can be provided, as that is the maximum supported by q.
- wait: Keyword argument provided solely for conformity with `pykx.QConnection`. All
- queries against the embedded q instance are synchronous regardless of what this
- parameter is set to. Setting this keyword argument to `False` results in q generic
- null (`::`) being returned, so as to conform with `pykx.QConnection`. This
- conformity enables one to call any `pykx.Q` instance the same way regardless of
- whether it is a `pykx.EmbeddedQ` or `pykx.QConnection` instance. For cases where
- the query executing asynchronously (and returning after it has been issued, rather
- than after is is done executing) is actually important, one can discriminate
- between `pykx.Q` instances using `isinstance` as normal.
+ *args: Arguments to the q query. Each argument is converted into a
+ `#!python pykx.K` object. This parameter supports up to 8 args (the maximum amount
+ supported by q functions), providing more causesw an error.
+ wait: A keyword to allow users to call any `#!python pykx.EmbeddedQ` or
+ `#!python pykx.QConnection` instance the same way. All queries executed by this
+ function are synchronous on the embedded q instance. Using a `#!python False`
+ argument for this parameter returns a q generic null (`#!q ::`).
Returns:
The value obtained by evaluating the `query` within the current process.
@@ -232,7 +230,6 @@ def __call__(self,
TypeError: Too many arguments were provided - q queries cannot have more than 8
parameters.
"""
-
if not licensed:
raise LicenseException("run q code via 'pykx.q'")
if len(args) > 8:
@@ -248,7 +245,7 @@ def __call__(self,
else:
result = _keval(bytes(query), *[wrappers.K(x) for x in args])
if wait is None or wait:
- return factory(result, False)
+ return factory(result, False, name=query.__str__())
return self('::', wait=True)
# Asynchronous q calls internally use a _call method to run q code synchronously, so this has
diff --git a/src/pykx/exceptions.py b/src/pykx/exceptions.py
index eb1038c..cebdb6e 100644
--- a/src/pykx/exceptions.py
+++ b/src/pykx/exceptions.py
@@ -82,7 +82,8 @@ def __init__(self, msg=None, *args, **kwargs):
class QError(PyKXException):
- """Exception type for q errors.
+ """
+ Exception type for q errors.
Refer to https://code.kx.com/q/basics/errors/ for clarification about error messages.
"""
diff --git a/src/pykx/extensions/chained_tick.q b/src/pykx/extensions/chained_tick.q
new file mode 100644
index 0000000..e1075bd
--- /dev/null
+++ b/src/pykx/extensions/chained_tick.q
@@ -0,0 +1,39 @@
+\d .u
+
+d:.z.D
+
+init:{w::t!(count t::tables`.)#()}
+
+del:{w[x]_:w[x;;0]?y};.z.pc:{del[;x]each t};
+
+sel:{$[`~y;x;select from x where sym in y]}
+
+pub:{[t;x]{[t;x;w]if[count x:sel[x]w 1;(neg first w)(`upd;t;x)]}[t;x]each w t}
+
+add:{$[(count w x)>i:w[x;;0]?.z.w;.[`.u.w;(x;i;1);union;y];w[x],:enlist(.z.w;y)];(x;$[99=type v:value x;sel[v]y;@[0#v;`sym;`g#]])}
+
+sub:{if[x~`;:sub[;y]each t];if[not x in t;'x];del[x].z.w;add[x;y]}
+
+\d .
+
+if[system"t";
+ .z.ts:{.u.pub'[.u.t;value each .u.t];@[`.;.u.t;@[;`sym;`g#]0#]};
+ upd:{[t;x] t insert x;}
+ ]
+
+if[not system"t";
+ system"t 1000";
+ upd:{[t;x] .u.pub[t;x];}
+ ]
+
+/ get the ticker plant port, default is 5010
+.u.x:.z.x,(count .z.x)_enlist":5010"
+
+/ init schema
+.u.rep:{(.[;();:;].)each x;}
+
+.tick.init:{[ports]
+ conn_port:enlist[`tickerplant]!enlist":5010";
+ conn_ports,:string ports;
+ .u.init .u.rep(.u.m:hopen`$":",conn_ports`tickerplant)".u.sub[`;`]"
+ }
diff --git a/src/pykx/extensions/gateway.q b/src/pykx/extensions/gateway.q
new file mode 100644
index 0000000..dc4282e
--- /dev/null
+++ b/src/pykx/extensions/gateway.q
@@ -0,0 +1,44 @@
+@[get;`.gw.ports;{.gw.ports:()!()}]
+
+\d .tick
+
+ports:([name:`$()]details:();connection:`int$())
+
+init:{[config]
+ if[0~count .gw.ports;
+ '"'connections' not set in gateway configuration"
+ ];
+ if[99h<>type config;
+ '"Supplied configuration must be a dictionary"
+ ];
+ addPort[.gw.ports];
+ if[0=count config;:()];
+ }
+
+getPort:{[name]
+ port:.tick.ports[name;`connection];
+ if[()~port;
+ '"Unable to retrieve specified port"
+ ];
+ port
+ }
+
+callPort:{[name;arguments]
+ port:.tick.getPort[name];
+ .[port;
+ arguments;
+ {[x;y;z]x . enlist y}[port;arguments]
+ ]
+ }
+
+addPort:{[portInfo]
+ {.[{`.tick.ports upsert (x;y;hopen hsym y)};
+ (x;y);
+ {-2"Failed to establish connection to port: ",x;}];
+ }'[key portInfo;value portInfo]
+ }
+
+.pykx.pyexec"import pykx as kx\n",
+ "class gateway:\n",
+ " def call_port(name, *args):\n",
+ " return kx.q('.tick.callPort', name, args)\n"
diff --git a/src/pykx/extensions/hdb.q b/src/pykx/extensions/hdb.q
new file mode 100644
index 0000000..7461159
--- /dev/null
+++ b/src/pykx/extensions/hdb.q
@@ -0,0 +1,9 @@
+.tick.init:{[config]
+ if[99h<>type config;
+ '"Supplied configuration must be a dictionary"
+ ];
+ if[not `database in key config;
+ '"Supplied configuration must contain a 'database' location"
+ ];
+ @[{system"l ",x;-1"Successfully loaded database: ",x;};string config`database;{-1"Database not loaded"}];
+ }
diff --git a/src/pykx/extensions/plant.q b/src/pykx/extensions/plant.q
new file mode 100644
index 0000000..582eacf
--- /dev/null
+++ b/src/pykx/extensions/plant.q
@@ -0,0 +1,77 @@
+\d .u
+
+d:.z.D
+
+init:{w::t!(count t::tables`.)#()}
+
+del:{w[x]_:w[x;;0]?y};.z.pc:{del[;x]each t};
+
+sel:{$[`~y;x;select from x where sym in y]}
+
+pub:{[t;x]{[t;x;w]if[count x:sel[x]w 1;(neg first w)(`upd;t;x)]}[t;x]each w t}
+
+add:{$[(count w x)>i:w[x;;0]?.z.w;.[`.u.w;(x;i;1);union;y];w[x],:enlist(.z.w;y)];(x;$[99=type v:value x;sel[v]y;@[0#v;`sym;`g#]])}
+
+sub:{if[x~`;:sub[;y]each t];if[not x in t;'x];del[x].z.w;add[x;y]}
+
+end:{(neg union/[w[;;0]])@\:(`.u.end;x)}
+
+ld:{
+ L::`$(-10_string L),string x;
+ if[a~key a:hsym L;
+ if[.tick.hardReset;@[hdel;a;{x}]]
+ ];
+ if[not type key L;
+ .[L;();:;()]
+ ];
+ i::j::-11!(-2;L);
+ if[0<=type i;
+ -2 (string L)," is a corrupt log. Truncate to length ",
+ (string last i),
+ " and restart";exit 1
+ ];
+ hopen L
+ };
+
+tick:{
+ init[];
+ @[;`sym;`g#]each t;
+ d::.z.D;
+ if[l::count y;
+ y:$[0h=type y;raze;]y;
+ L::`$":",y,"/",x,10#".";
+ l::ld d
+ ]
+ };
+
+endofday:{end d;d+:1;if[l;hclose l;l::0(`.u.ld;d)]};
+ts:{if[dtype first x;a,x;(enlist(count first x)#a),x]];
+ t insert x;if[l;l enlist (`upd;t;x);j+:1];}];
+
+if[not system"t";
+ system"t 100";
+ .z.ts:{ts .z.D};
+ upd:{[t;x]ts"d"$a:.z.P;
+ if[not -16=type first first x;a:"n"$a;x:$[0>type first x;a,x;(enlist(count first x)#a),x]];
+ f:key flip value t;pub[t;$[0>type first x;enlist f!x;flip f!x]];if[l;l enlist (`upd;t;x);i+:1];}];
+
+\d .
+
+.tick.tabs:()
+
+.tick.init:{[config]
+ show config;
+ tick_config:enlist[`log_prefix]!enlist "log";
+ tick_config,:config;
+ .u.tick[tick_config`log_prefix;.tick.logdir];
+ }
+
+.tick.set_tables:{[tabname;schema]
+ .tick.tabs,:enlist[tabname];
+ tabname set schema
+ }
diff --git a/src/pykx/extensions/rdb.q b/src/pykx/extensions/rdb.q
new file mode 100644
index 0000000..0c8cb5e
--- /dev/null
+++ b/src/pykx/extensions/rdb.q
@@ -0,0 +1,42 @@
+/ end of day: save, clear, hdb reload
+.u.end:{
+ t:tables`.;
+ t@:where `g=attr each t@\:`sym;
+ .Q.hdpf[`$":",.u.x 1;`:.;x;`sym];
+ @[;`sym;`g#] each t;
+ };
+
+/ init schema and sync up from log file;cd to hdb(so client save can run)
+.u.rep:{
+ (.[;();:;].)each y;
+ if[null first z;:()];
+ -11!z;
+ system "cd ",x
+ };
+
+upd:$[.tick.vanilla;
+ insert;
+ {
+ pre:.tick.RTPPreProc[x;y];
+ if[pre~(::);:()];
+ ret:x insert pre;
+ .tick.RTPPostProc[x;y];
+ ret
+ }
+ ];
+
+\d .tick
+
+loaded:"RDB"
+subscriptions:`
+
+// Default analytic operates as a pass through
+RTPPreProc:{[x;y]y}
+RTPPostProc:{[x;y]:()}
+
+init:{[config]
+ rdb_config:`tickerplant`hdb`database!(":5010";":5012";"db");
+ rdb_config,:string config;
+ .u.x:rdb_config`tickerplant`hdb;
+ .u.rep[rdb_config`database] . hopen[`$":",.u.x 0]("{(.u.sub[;`]each x;`.u `i`L)}";.tick.subscriptions)
+ }
diff --git a/src/pykx/help.py b/src/pykx/help.py
new file mode 100644
index 0000000..e77b0b0
--- /dev/null
+++ b/src/pykx/help.py
@@ -0,0 +1,193 @@
+import importlib.util
+import warnings
+import os
+
+__all__ = ["help"]
+_filepath = os.path.join(os.path.dirname(__file__), 'docs', 'api', 'pykx-execution', 'q.md')
+
+
+def __dir__():
+ return __all__
+
+
+def _init(_q):
+ global q
+ q = _q
+
+
+def _load_markdown_section(keyword, file_path=_filepath):
+ with open(file_path, 'r') as f:
+ lines = f.readlines()
+
+ # Extract the specific section
+ in_section = False
+ section_lines = []
+ for line in lines:
+ if line.startswith(f'### [{keyword}]'):
+ in_section = True
+ section_lines.append(line)
+ elif line.startswith('###') and in_section:
+ break
+ elif in_section:
+ section_lines.append(line)
+
+ if not section_lines:
+ return "No description available for this function."
+
+ # Convert the section to HTML
+ section_markdown = ''.join(section_lines)
+
+ md2_spec = importlib.util.find_spec("markdown2")
+ if md2_spec is None:
+ warnings.warn("The 'markdown2' package is required to use the 'qhelp' function.")
+ return ""
+ else:
+ import markdown2
+ section_html = markdown2.markdown(section_markdown, extras=["fenced-code-blocks"])
+
+ return section_html
+
+
+def qhelp(keyword):
+ """Parse and format the documentation for a given keyword."""
+ bs4_spec = importlib.util.find_spec("bs4")
+ if bs4_spec is None:
+ warnings.warn("The 'bs4' package is required to use the 'qhelp' function.")
+ return ""
+ else:
+ from bs4 import BeautifulSoup
+
+ html_content = _load_markdown_section(keyword)
+ if html_content:
+ soup = BeautifulSoup(html_content, "html.parser")
+ tag = soup.find("h3", string=keyword)
+
+ if tag is None:
+ return None
+
+ text = " • " + tag.text + "\n\n"
+ for x in tag.findAllNext():
+ if x.name == "h2":
+ break
+
+ elif x.name == "h3":
+ break
+
+ elif x.name == "p":
+ only_link = True
+ for w in x:
+ if w.name != "a":
+ only_link = False
+ if not only_link:
+ text += x.text + "\n"
+
+ elif x.name == "pre":
+ text += "\n"
+ for line in x.text.split("\n"):
+ text += " " + line + "\n"
+
+ elif x.name == "li":
+ text += " - " + x.text + "\n"
+
+ elif x.name == "thead":
+ table = []
+ tmp = []
+ for cell in x.text.split("\n"):
+ if cell != "":
+ tmp += [cell]
+ table += [tmp]
+
+ elif x.name == "tbody":
+ for row in x.text.split("\n\n\n"):
+ tmp = []
+ for cell in row.split("\n"):
+ if cell != "":
+ tmp += [cell]
+ table += [tmp]
+ text += __ascii_table(table[1:], border=True, header=table[0]) + "\n"
+
+ return text
+ else:
+ print("html error")
+ return None
+
+
+def __ascii_table(table: list, header: list = None, align='left', border=False):
+ """Converts a list of lists into an ASCII table."""
+ if header is None:
+ header = []
+ widths = []
+ for i in range(max(map(len, table))):
+ widths.append(max(max(map(len, [row[i] for row in table if len(row) > i])),
+ len(header[i]) if len(header) > i else 0))
+
+ printable = []
+
+ if border:
+ print_row = []
+ for i in range(max(map(len, table))):
+ if i > 0 and i < max(map(len, table)) - 1:
+ print_row.append('─' * (widths[i] + 2))
+ else:
+ print_row.append('─' * (widths[i] + 1))
+ printable.append('┌─' + '┬'.join(print_row) + '─┐')
+
+ # header formatting
+ if len(header) > 0:
+ print_row = []
+ for i in range(len(header)):
+ assert header[i]
+ if align == 'center':
+ print_row.append(header[i].center(widths[i]))
+ elif align == 'left':
+ print_row.append(header[i].ljust(widths[i]))
+ elif align == 'right':
+ print_row.append(header[i].rjust(widths[i]))
+
+ if border:
+ printable.append('│ ' + ' │ '.join(print_row) + ' │')
+ else:
+ printable.append(' │ '.join(print_row))
+
+ print_row = []
+ for i in range(len(header)):
+ if i > 0 and i < len(header) - 1:
+ print_row.append('─' * (widths[i] + 2))
+ else:
+ print_row.append('─' * (widths[i] + 1))
+
+ if border:
+ printable.append('├─' + '┼'.join(print_row) + '─┤')
+ else:
+ printable.append('┼'.join(print_row))
+
+ # table formatting
+ for row in table:
+ print_row = []
+ for _ in range(len(widths) - len(row)):
+ row.append('')
+
+ for i in range(len(row)):
+ if align == 'center':
+ print_row.append(row[i].center(widths[i]))
+ elif align == 'left':
+ print_row.append(row[i].ljust(widths[i]))
+ elif align == 'right':
+ print_row.append(row[i].rjust(widths[i]))
+
+ if border:
+ printable.append('│ ' + ' │ '.join(print_row) + ' │')
+ else:
+ printable.append(' │ '.join(print_row))
+
+ if border:
+ print_row = []
+ for i in range(max(map(len, table))):
+ if i > 0 and i < max(map(len, table)) - 1:
+ print_row.append('─' * (widths[i] + 2))
+ else:
+ print_row.append('─' * (widths[i] + 1))
+ printable.append('└─' + '┴'.join(print_row) + '─┘')
+
+ result = '\n'.join(printable)
+ return result
diff --git a/src/pykx/include/py.h b/src/pykx/include/py.h
index e2f77b3..809b7c3 100644
--- a/src/pykx/include/py.h
+++ b/src/pykx/include/py.h
@@ -73,7 +73,6 @@ typedef struct{
X(int,PyGILState_Ensure,())\
X(void,PyGILState_Release,(int))\
X(int,PyGILState_Check,())\
- X(void,PyEval_InitThreads,())\
X(int,PyEval_ThreadsInitialized,())\
X(void*,PyGILState_GetThisThreadState,())\
X(void*,PyEval_SaveThread,())\
@@ -134,6 +133,7 @@ typedef struct{
X(P,Py_BuildValue,(char*,...))\
X(P,PyTuple_New,(L))\
X(P,PyList_New,(L))\
+ X(int, PyList_SetItem, (P,size_t,P))\
X(P,PyDict_New,())\
X(size_t,PySequence_Size,(P))\
X(P,PySequence_GetItem,(P, size_t))\
diff --git a/src/pykx/ipc.py b/src/pykx/ipc.py
index 069e726..da3cc15 100644
--- a/src/pykx/ipc.py
+++ b/src/pykx/ipc.py
@@ -1,25 +1,5 @@
-"""PyKX q IPC interface.
-
-The IPC communication module provided here works differently than may be expected for users
-familiar with the KX IPC interfaces provided for Java and C#. Unlike these interfaces it does not
-directly convert the encoded data received over the q IPC protocol to an analogous type in Python,
-but rather stores the object within q memory space as a `pykx.K` object for deferred conversion.
-
-This has major benefits with regards to the flexibility of the interface. In particular, the
-[`pykx.K`][pykx.K] conversion methods (i.e. `py`, `np`, `pd`, and `pa`), use the same logic as they
-do when converting `pykx.K` objects that were created by an embedded q instance.
-
-The IPC interface works when running with or without a q license. Refer to
-[the modes of operation documentation](../user-guide/advanced/modes.md) for more details.
-
-The IPC Interface is split between two classes `pykx.AsyncQConnection` and `pykx.SyncQConnection`.
-Both of which extend the base `QConnection` class, instantiating a `QConnection` directly remains
-possible for backward compatibility but will now return an instance of `pykx.SyncQConnection`. There
-is also the `pykx.RawQConnection` class that is a superset of the `pykx.AsyncQConnection` class that
-has extra functionality around manually polling the send an receive message queues.
-
-For more examples of usage of the IPC interface you can look at the
-[`interface overview`](../getting-started/PyKX%20Introduction%20Notebook.ipynb#ipc-communication).
+"""
+_This page documents the API functions for using q IPC within PyKX._
"""
from enum import Enum
@@ -32,7 +12,6 @@
from threading import Lock as threading_lock
from time import monotonic_ns, sleep
from typing import Any, Callable, Optional, Union
-from warnings import warn
from weakref import finalize, WeakMethod
import sys
@@ -41,7 +20,7 @@
from .core import licensed
from .exceptions import FutureCancelled, NoResults, PyKXException, QError, UninitializedConnection
from .util import get_default_args, normalize_to_bytes, normalize_to_str
-from .wrappers import CharVector, Composition, Foreign, Function, K, List, SymbolAtom, SymbolicFunction # noqa : E501
+from .wrappers import CharVector, Composition, Foreign, Function, K, List, SymbolAtom, SymbolicFunction, Table # noqa : E501
from . import _wrappers
from . import _ipc
@@ -64,9 +43,17 @@ def _init(_q):
global q
q = _q
+def reconnection_function(reconnection_delay):
+ return reconnection_delay * 2
class MessageType(Enum):
- """The message types available to q."""
+ """
+ The message types available to q.
+
+ - 0 = async message
+ - 1 = sync message
+ - 2 = response message
+ """
async_msg = 0
sync_msg = 1
resp_msg = 2
@@ -74,13 +61,15 @@ class MessageType(Enum):
class QFuture(asyncio.Future):
"""
- A Future object to be returned by calls to q from an instance of `pykx.AsyncQConnection`.
+ A Future object to be returned by calls to q from an instance of
+ [pykx.AsyncQConnection][pykx.AsyncQConnection].
This object can be awaited to receive the resulting value.
Examples:
- Awaiting an instance of this class to receive the return value of an `AsyncQConnection` call.
+ Await an instance of this class to receive the return value of an
+ `#!python AsyncQConnection` call.
```python
async with pykx.AsyncQConnection('localhost', 5001) as q:
@@ -103,10 +92,10 @@ def __init__(self, q_connection, timeout, debug, poll_recv=None):
super().__init__()
def __await__(self) -> Any:
- """Await the result of the `QFuture`.
+ """Await the result of the `#!python QFuture`.
Returns:
- The result of the `QFuture`.
+ The result of the `#!python QFuture`.
Raises:
FutureCancelled: This QFuture instance has been cancelled and cannot be awaited.
@@ -132,7 +121,8 @@ def __await__(self) -> Any:
self.q_connection._cancel_all_futures()
print('WARNING: Connection lost attempting to reconnect.', file=sys.stderr)
loops = self.q_connection._connection_info['reconnection_attempts']
- reconnection_delay = 0.5
+ reconnection_delay = self.q_connection._connection_info['reconnection_delay']
+ reconnection_function = self.q_connection._connection_info['reconnection_function']
while True:
try:
self.q_connection._create_connection_to_server()
@@ -152,8 +142,12 @@ def __await__(self) -> Any:
'seconds.',
file=sys.stderr
)
+ if not isinstance(reconnection_delay, (int, float)):
+ raise TypeError(
+ 'reconnection_delay must be either int/float'
+ )
sleep(reconnection_delay)
- reconnection_delay *= 2
+ reconnection_delay = reconnection_function(reconnection_delay)
continue
print('Connection successfully reestablished.', file=sys.stderr)
break
@@ -187,7 +181,8 @@ async def __async_await__(self) -> Any:
self.q_connection._cancel_all_futures()
print('WARNING: Connection lost attempting to reconnect.', file=sys.stderr)
loops = self.q_connection._connection_info['reconnection_attempts']
- reconnection_delay = 0.5
+ reconnection_delay = self.q_connection._connection_info['reconnection_delay']
+ reconnection_function = self.q_connection._connection_info['reconnection_function']
while True:
try:
self.q_connection._create_connection_to_server()
@@ -207,8 +202,12 @@ async def __async_await__(self) -> Any:
'seconds.',
file=sys.stderr
)
+ if not isinstance(reconnection_delay, (int, float)):
+ raise TypeError(
+ 'reconnection_delay must be either int/float'
+ )
sleep(reconnection_delay)
- reconnection_delay *= 2
+ reconnection_delay = reconnection_function(reconnection_delay)
continue
print('Connection successfully reestablished.', file=sys.stderr)
break
@@ -231,7 +230,8 @@ def _await(self) -> Any:
# TODO: Clear call stack futures
print('WARNING: Connection lost attempting to reconnect.', file=sys.stderr)
loops = self._connection_info['reconnection_attempts']
- reconnection_delay = 0.5
+ reconnection_delay = self.q_connection._connection_info['reconnection_delay']
+ reconnection_function = self.q_connection._connection_info['reconnection_function']
while True:
try:
self._create_connection_to_server()
@@ -250,8 +250,12 @@ def _await(self) -> Any:
f'Failed to reconnect, trying again in {reconnection_delay} seconds.',
file=sys.stderr
)
+ if not isinstance(reconnection_delay, (int, float)):
+ raise TypeError(
+ 'reconnection_delay must be either int/float'
+ )
sleep(reconnection_delay)
- reconnection_delay *= 2
+ reconnection_delay = reconnection_function(reconnection_delay)
continue
print('Connection successfully reestablished.', file=sys.stderr)
break
@@ -260,13 +264,13 @@ def _await(self) -> Any:
return self.result()
def set_result(self, val: Any) -> None:
- """Set the result of the `QFuture` and mark it as done.
+ """Set the result of the `#!python QFuture` and mark it as done.
- If there are functions in the callback list they will be called using this `QFuture`
- instance as the only parameter after the result is set.
+ The result is set first, then any functions in the callback list will execute
+ with this `#!python Qfuture` as the only parameter input.
Parameters:
- val: The value to set as the result of the `QFuture`.
+ val: The value to set as the result of the `#!python QFuture`.
"""
self._result = val
for _ in self._callbacks:
@@ -275,22 +279,23 @@ def set_result(self, val: Any) -> None:
self._done = True
def set_exception(self, err: Exception) -> None:
- """Set the exception of the `QFuture` and mark it as done.
+ """Set the exception of the `#!python QFuture` and mark it as done.
Parameters:
- err: The exception to set as the exception of the `QFuture`.
+ err: The exception to set as the exception of the `#!python QFuture`.
"""
self._done = True
self._exception = err
def result(self) -> Any:
- """Get the result of the `QFuture`.
+ """Get the result of the `#!python QFuture`.
Returns:
- The result of the `QFuture`.
+ The result of the `#!python QFuture`.
Raises:
- FutureCancelled: This QFuture instance has been cancelled and cannot be awaited.
+ FutureCancelled: This `#!python QFuture` instance has been cancelled and
+ cannot be awaited.
NoResults: The result is not ready.
"""
if self._exception is not None:
@@ -322,31 +327,32 @@ def _disconnected(self):
def done(self) -> bool:
"""
Returns:
- `True` if the `QFuture` is done or if it has been cancelled.
+ `#!python True` if the `#!python QFuture` is done or if it has been cancelled.
"""
return self._done or self._cancelled
def cancelled(self) -> bool:
"""
Returns:
- `True` if the `QFuture` has been cancelled.
+ `#!python True` if the `#!python QFuture` has been cancelled.
"""
return self._cancelled
def cancel(self, msg: str = '') -> None:
- """Cancel the `QFuture`.
+ """Cancel the `#!python QFuture`.
Parameters:
- msg: An optional message to append to the end of the `pykx.FutureCancelled` exception.
+ msg: An optional message to append to the end of the
+ `#!python pykx.FutureCancelled` exception.
"""
self._cancelled = True
self._cancelled_message = msg
def exception(self) -> None:
- """Get the exception of the `QFuture`.
+ """Get the exception of the `#!python QFuture`.
Returns:
- The excpetion of the `QFuture` object.
+ The excpetion of the `#!python QFuture` object.
"""
if self._cancelled:
return FutureCancelled(self._cancelled_message)
@@ -355,14 +361,17 @@ def exception(self) -> None:
return self._exception
def add_done_callback(self, callback: Callable):
- """Add a callback function to be ran when the `QFuture` is done.
-
- Note: The callback should expect one parameter that is the current instance of this class.
- The functions are called when the result of the future is set and therefore can use the
- result and modify it.
+ """Add a callback function to the list of callback functions which will be executed after
+ the `#!python QFuture` result is set.
Parameters:
- callback: The callback function to be called when the result is set.
+ callback: A callback function to append to the list of callback functions which will be
+ executed after the `#!python QFuture` result is set.
+
+ Note: The callback parameter must accept one parameter.
+ When it is executed the callback function will be passed the current instance of this
+ class. The callback function is executed after the result of the future is set,
+ allowing the use and modification of the result itself.
"""
self._callbacks.append(callback)
@@ -426,52 +435,61 @@ def __init__(self,
wait: bool = True,
lock: Optional[Union[threading_lock, multiprocessing_lock]] = None,
no_ctx: bool = False,
- reconnection_attempts: int = -1
+ reconnection_attempts: int = -1,
+ reconnection_delay: float = 0.5,
+ reconnection_function: callable = reconnection_function
):
"""Interface with a q process using the q IPC protocol.
- Note: Creating an instance of this class returns an instance of `pykx.SyncQConnection`.
- Directly instantiating an instance of `pykx.SyncQConnection` is recommended, but
- this behavior will remain for backwards compatibility.
+ Users are recommended to instantiate an object of
+ [pykx.SyncQConnection][pykx.SyncQConnection] instead of using this class
+ directly.
Parameters:
- host: The host name to which a connection is to be established.
- port: The port to which a connection is to be established.
+ host: The hostname to connect to.
+ port: The port to connect to.
username: Username for q connection authorization.
password: Password for q connection authorization.
- timeout: Timeout for blocking socket operations in seconds. If set to `0`, the socket
- will be non-blocking.
- large_messages: Whether support for messages >2GB should be enabled.
- tls: Whether TLS should be used.
- unix: Whether a Unix domain socket should be used instead of TCP. If set to `True`, the
- host parameter is ignored. Does not work on Windows.
+ timeout: Timeout for blocking socket operations in seconds. If set to 0,
+ the socket will be non-blocking.
+ large_messages: Flag to enable support for messages >2GB.
+ tls: Flag to enable tls.
+ unix: Flag to enable Unix domain socket instead of TCP socket. If set to
+ `#!python True`, the `#!python host` parameter is ignored. Does not work on Windows.
wait: Whether the q server should send a response to the query (which this connection
- will wait to receive). Can be overridden on a per-call basis. If `True`, Python will
- wait for the q server to execute the query, and respond with the results. If
- `False`, the q server will respond immediately to every query with generic null
- (`::`), then execute them at some point in the future.
- no_ctx: This parameter determines whether or not the context interface will be disabled.
- disabling the context interface will stop extra q queries being sent but will
- disable the extra features around the context interface.
- reconnection_attempts: This parameter specifies how many attempts will be made to
- reconnect to the server if the connection is lost. The query will be resent if the
- reconnection is successful. The default is -1 which will not attempt to reconnect, 0
- will continuosly attempt to reconnect to the server with no stop and an exponential
- backoff between successive attempts. Any positive integer will specify the maximum
- number of tries to reconnect before throwing an error if a connection can not be
- made.
-
- Note: The `username` and `password` parameters are not required.
- The `username` and `password` parameters are only required if the q server requires
- authorization. Refer to [ssl documentation](https://code.kx.com/q/kb/ssl/) for more
- information.
-
- Note: The `timeout` argument may not always be enforced when making successive queries.
- When making successive queries if one query times out the next query will wait until a
- response has been received from the previous query before starting the timer for its own
- timeout. This can be avoided by using a separate `QConnection` instance for each query.
-
- Note: When querying `KX Insights` the `no_ctx=True` keyword argument must be used.
+ will wait to receive). Can be overridden on a per-call basis. If `#!python True`,
+ Python will wait for the q server to execute the query, and respond with the
+ results. If `#!python False`, the q server will respond immediately to every query
+ with generic null(`#!q ::`), then execute them at some point in the future.
+ no_ctx: Flag to disable the context interface. Disabling the context interface will not
+ stop extra q queries being sent, but will disable the extra features around the
+ context interface.
+ reconnection_attempts: The number of attempts to reconnect to the q server when there is
+ a disconnect. A negative value will disable reconnect attempts.
+ A value of 0 indicates no limit on reconnect attempts, with each attempt applying
+ `#!python reconnection_function`. Positive integers specify the maximum number of
+ attempts to reconnect. Hitting the maximum without a reconnect will throw an error.
+ reconnection_function: A function to execute on each attempt to reconnect. This function
+ must take one parameter that must be a `#!python float` type. When this function is
+ executed it will be passed the `#!python reconnection_delay` parameter. The default
+ implementation is a function which modifies `#!python reconnection_delay` to
+ increase its value exponentially (delay*2).
+ reconnection_delay: A `#!python float` for the initial delay between reconnect attempts
+ (in seconds). This is passed to the provided `#!python reconnection_function` that
+ is executed on reconnect attempt.
+
+ Note: The `#!python username` and `#!python password` parameters are not required.
+ The `#!python username` and `#!python password` parameters are only required if the
+ q server requires authorization. Refer to
+ [ssl documentation](https://code.kx.com/q/kb/ssl/) for more information.
+
+ Note: The `#!python timeout` argument may not always be enforced.
+ When making successive queries if one query times out the next query will wait until
+ a response has been received from the previous query before starting the timer for its
+ own timeout. This can be avoided by using a separate `#!python QConnection` instance
+ for each query.
+
+ Note: When querying KX Insights the `#!python no_ctx=True` keyword argument must be used.
Raises:
PyKXException: Using both tls and unix is not possible with a QConnection.
@@ -523,7 +541,9 @@ def _init(self,
no_ctx: bool = False,
as_server: bool = False,
conn_gc_time: float = 0.0,
- reconnection_attempts: int = -1
+ reconnection_attempts: int = -1,
+ reconnection_delay: float = 0.5,
+ reconnection_function: callable = reconnection_function
):
credentials = f'{normalize_to_str(username, "Username")}:' \
f'{normalize_to_str(password, "Password")}'
@@ -543,6 +563,8 @@ def _init(self,
'as_server': as_server,
'conn_gc_time': conn_gc_time,
'reconnection_attempts': reconnection_attempts,
+ 'reconnection_delay': reconnection_delay,
+ 'reconnection_function': reconnection_function
})
if system == 'Windows' and unix: # nocov
raise TypeError('Unix domain sockets cannot be used on Windows')
@@ -557,15 +579,20 @@ def _init(self,
object.__setattr__(self, '_handle', server_sock.fileno())
object.__setattr__(self, '_finalizer', lambda: server_sock.close())
else:
- object.__setattr__(self,
- '_handle',
- _ipc.init_handle(host,
- port,
- credentials,
- unix,
- tls,
- timeout,
- large_messages))
+ try:
+ handle = _ipc.init_handle(host,
+ port,
+ credentials,
+ unix,
+ tls,
+ timeout,
+ large_messages)
+ except BaseException as e:
+ if isinstance(e, QError):
+ if 'access' == str(e):
+ raise QError('access: Failed to connect to server with invalid username/password')
+ raise e
+ object.__setattr__(self, '_handle', handle)
if licensed:
object.__setattr__(
self,
@@ -726,8 +753,8 @@ def _send_sock(self,
# can be sent elsewhere, we just need to wait a moment until more data can be
# sent to the sockets buffer
pass
- except BaseException: # nocov
- raise RuntimeError("Failed to send query on IPC socket")
+ except BaseException as e: # nocov
+ raise RuntimeError(f"Failed to send query on IPC socket: '{e}'")
if isinstance(self, SyncQConnection) or isinstance(self, RawQConnection):
return
if wait:
@@ -862,6 +889,45 @@ def _create_result(self, buff):
q_future = self._call_stack.pop(0)
q_future.set_result(deserialize(memoryview(buff).obj))
+ def upd(self, table: str, data:Union[list, Table]) -> None:
+ """
+ Execute `#!q .u.upd` on a remote q process. This function assumes the definition of
+ `#!q .u.upd` on the remote q process takes the same count and data type of arguments
+ as the default implementation (q keyword `#!q insert`). The `#!python data` argument
+ will be converted to a list if it is a PyKX `#!python Table`.
+
+ Parameters:
+ table: The name of the global variable on the q process to update.
+ data: The contents of the update.
+
+ Returns:
+ On successful execution this function will return None
+
+ Example:
+
+ Successfully execute `#!q .u.upd` on connected process
+
+ ```python
+ >>> import pykx as kx
+ >>> with kx.SyncQConnection(port=5050) as q:
+ ... q.upd('trade', [kx.TimespanAtom('now') 'AAPL', 1.0])
+ >>> trades = kx.Table(data = {
+ ... 'time': kx.TimespanAtom('now'),
+ ... 'sym': kx.random.random(N, ['AAPL', 'MSFT', 'GOOG']),
+ ... 'price': kx.random.random(N, 10.0)})
+ >>> with kx.SyncQConnection(port=5050) as q:
+ ... q.upd('trade', trades)
+ ```
+ """
+ if isinstance(data, Table):
+ data = data._values
+ try:
+ self(b'.u.upd', table, data)
+ except QError as err:
+ if '.u.upd' in str(err):
+ raise QError("Update function '.u.upd' not defined on connected process")
+ raise err
+
def file_execute(
self,
file_path: str,
@@ -932,6 +998,8 @@ def __init__(self,
lock: Optional[Union[threading_lock, multiprocessing_lock]] = None,
no_ctx: bool = False,
reconnection_attempts: int = -1,
+ reconnection_delay: float = 0.5,
+ reconnection_function: callable = reconnection_function
):
"""Interface with a q process using the q IPC protocol.
@@ -943,40 +1011,48 @@ def __init__(self,
port: The port to which a connection is to be established.
username: Username for q connection authorization.
password: Password for q connection authorization.
- timeout: Timeout for blocking socket operations in seconds. If set to `0`, the socket
+ timeout: Timeout for blocking socket operations in seconds. If set to 0, the socket
will be non-blocking.
large_messages: Whether support for messages >2GB should be enabled.
tls: Whether TLS should be used.
- unix: Whether a Unix domain socket should be used instead of TCP. If set to `True`, the
- host parameter is ignored. Does not work on Windows.
+ unix: Whether a Unix domain socket should be used instead of TCP. If set to
+ `#!python True`, the host parameter is ignored. Does not work on Windows.
wait: Whether the q server should send a response to the query (which this connection
- will wait to receive). Can be overridden on a per-call basis. If `True`, Python will
- wait for the q server to execute the query, and respond with the results. If
- `False`, the q server will respond immediately to every query with generic null
- (`::`), then execute them at some point in the future.
+ will wait to receive). Can be overridden on a per-call basis. If `#!python True`,
+ Python will wait for the q server to execute the query, and respond with
+ the results. If `#!python False`, the q server will respond immediately to every
+ query with generic null(`#!q ::`), then execute them at some point in the future.
no_ctx: This parameter determines whether or not the context interface will be disabled.
disabling the context interface will stop extra q queries being sent but will
disable the extra features around the context interface.
reconnection_attempts: This parameter specifies how many attempts will be made to
reconnect to the server if the connection is lost. The query will be resent if the
- reconnection is successful. The default is -1 which will not attempt to reconnect, 0
- will continuosly attempt to reconnect to the server with no stop and an exponential
- backoff between successive attempts. Any positive integer will specify the maximum
- number of tries to reconnect before throwing an error if a connection can not be
- made.
-
- Note: The `username` and `password` parameters are not required.
- The `username` and `password` parameters are only required if the q server requires
- authorization. Refer to [ssl documentation](https://code.kx.com/q/kb/ssl/) for more
- information.
-
- Note: The `timeout` argument may not always be enforced when making successive queries.
- When making successive queries if one query times out the next query will wait until a
- response has been received from the previous query before starting the timer for its own
- timeout. This can be avoided by using a separate `SyncQConnection` instance for each
- query.
-
- Note: When querying `KX Insights` the `no_ctx=True` keyword argument must be used.
+ reconnection is successful. The default is -1 which will not attempt to
+ reconnect, 0 will continuously attempt to reconnect to the server using the backoff
+ `#!python reconnection_function`. Any positive integer will specify the maximum
+ number of tries to reconnect before throwing an error if a connection can not
+ be made.
+ reconnection_delay: This parameter outlines the initial delay between reconnection
+ attempts, by default this is set to 0.5 seconds and is passed to the function
+ defined by the `#!python reconnection_function` parameter which takes this delay as
+ it's only parameter.
+ reconnection_function: This parameter defines the function which is used to modify the
+ `#!python reconnection_delay` on successive attempts to reconnect to the server. By
+ default this is an exponential backoff where the `#!python reconnection_delay` is
+ multiplied by two on each invocation.
+
+ Note: The `#!python username` and `#!python password` parameters are not required.
+ The `#!python username` and `#!python password` parameters are only required if the
+ q server requires authorization. Refer to
+ [ssl documentation](https://code.kx.com/q/kb/ssl/) for more information.
+
+ Note: The `#!python timeout` argument may not always be enforced when making successive
+ queries. When making successive queries if one query times out the next query will
+ wait until a response has been received from the previous query before starting the
+ timer for its own timeout. This can be avoided by using a separate
+ `#!python SyncQConnection` instance for each query.
+
+ Note: When querying KX Insights the `#!python no_ctx=True` keyword argument must be used.
Raises:
PyKXException: Using both tls and unix is not possible with a QConnection.
@@ -1029,6 +1105,8 @@ def __init__(self,
lock=lock,
no_ctx=no_ctx,
reconnection_attempts=reconnection_attempts,
+ reconnection_delay=reconnection_delay,
+ reconnection_function=reconnection_function
)
super().__init__()
@@ -1043,14 +1121,16 @@ def __call__(self,
Parameters:
query: A q expression to be evaluated.
- *args: Arguments to the q query. Each argument will be converted into a `pykx.K` object.
- Up to 8 arguments can be provided, as that is the maximum supported by q.
- wait: Whether the q server should execute the query before responding. If `True`,
- Python will wait for the q server to execute the query, and respond with the
- results. If `False`, the q server will respond immediately to the query with
- generic null (`::`), then execute them at some point in the future. Defaults to
- whatever the `wait` keyword argument was for the `SyncQConnection` instance (i.e.
- this keyword argument overrides the instance-level default).
+ *args: Arguments to the q query. Each argument will be converted into a
+ `#!python pykx.K` object. Up to 8 arguments can be provided, as that is the
+ maximum supported by q.
+ wait: Whether the q server should execute the query before responding.
+ If `#!python True`, Python will wait for the q server to execute the query, and
+ respond with the results.
+ If `#!python False`, the q server will respond immediately to the query with
+ generic null (`#!q ::`), then execute them at some point in the future. Defaults to
+ whatever the `#!python wait` keyword argument was for the `#!python SyncQConnection`
+ instance (i.e. this keyword argument overrides the instance-level default).
Raises:
@@ -1138,7 +1218,8 @@ def _call(self,
if self._connection_info['reconnection_attempts'] != -1:
print('WARNING: Connection lost attempting to reconnect.', file=sys.stderr)
loops = self._connection_info['reconnection_attempts']
- reconnection_delay = 0.5
+ reconnection_delay = self._connection_info['reconnection_delay']
+ reconnection_function = self._connection_info['reconnection_function']
while True:
try:
self._create_connection_to_server()
@@ -1157,8 +1238,12 @@ def _call(self,
f'Failed to reconnect, trying again in {reconnection_delay} seconds.',
file=sys.stderr
)
+ if not isinstance(reconnection_delay, (int, float)):
+ raise TypeError(
+ 'reconnection_delay must be either int/float'
+ )
sleep(reconnection_delay)
- reconnection_delay *= 2
+ reconnection_delay = reconnection_function(reconnection_delay)
continue
print('Connection successfully reestablished.', file=sys.stderr)
return self._call(query, *args, wait=wait, debug=debug)
@@ -1226,6 +1311,8 @@ def __init__(self,
event_loop: Optional[asyncio.AbstractEventLoop] = None,
no_ctx: bool = False,
reconnection_attempts: int = -1,
+ reconnection_delay: float = 0.5,
+ reconnection_function: callable = reconnection_function
):
"""Interface with a q process using the q IPC protocol.
@@ -1237,48 +1324,58 @@ def __init__(self,
port: The port to which a connection is to be established.
username: Username for q connection authorization.
password: Password for q connection authorization.
- timeout: Timeout for blocking socket operations in seconds. If set to `0`, the socket
+ timeout: Timeout for blocking socket operations in seconds. If set to 0, the socket
will be non-blocking.
large_messages: Whether support for messages >2GB should be enabled.
tls: Whether TLS should be used.
- unix: Whether a Unix domain socket should be used instead of TCP. If set to `True`, the
- host parameter is ignored. Does not work on Windows.
+ unix: Whether a Unix domain socket should be used instead of TCP. If set to
+ `#!python True`, the host parameter is ignored. Does not work on Windows.
wait: Whether the q server should send a response to the query (which this connection
- will wait to receive). Can be overridden on a per-call basis. If `True`, Python will
- wait for the q server to execute the query, and respond with the results. If
- `False`, the q server will respond immediately to every query with generic null
- (`::`), then execute them at some point in the future.
- event_loop: If running an event loop that supports the `create_task()` method then
- you can provide the event loop here and the returned future object will be an
- instance of the loops future type. This will allow the current event loop to manage
- awaiting `QFuture` objects as well as any other async tasks that may be running.
+ will wait to receive). Can be overridden on a per-call basis. If `#!python True`,
+ Python will wait for the q server to execute the query, and respond with
+ the results. If `#!python False`, the q server will respond immediately to every
+ query with generic null (`::`), then execute them at some point in the future.
+ event_loop: If running an event loop that supports the `#!python create_task()`
+ method then you can provide the event loop here and the returned future object will
+ be an instance of the loops future type. This will allow the current event loop
+ to manage awaiting `#!python QFuture` objects as well as any other async tasks that
+ may be running.
no_ctx: This parameter determines whether or not the context interface will be disabled.
disabling the context interface will stop extra q queries being sent but will
disable the extra features around the context interface.
reconnection_attempts: This parameter specifies how many attempts will be made to
- reconnect to the server if the connection is lost. The query will not be resent if
- the reconnection is successful. The default is -1 which will not attempt to
- reconnect, 0 will continuosly attempt to reconnect to the server with no stop and an
- exponential backoff between successive attempts. Any positive integer will specify
- the maximum number of tries to reconnect before throwing an error if a connection
- can not be made.
-
- Note: The `username` and `password` parameters are not required.
- The `username` and `password` parameters are only required if the q server requires
- authorization. Refer to [ssl documentation](https://code.kx.com/q/kb/ssl/) for more
- information.
-
- Note: The `timeout` argument may not always be enforced when making successive queries.
- When making successive queries if one query times out the next query will wait until a
- response has been received from the previous query before starting the timer for its own
- timeout. This can be avoided by using a separate `QConnection` instance for each query.
-
- Note: When querying `KX Insights` the `no_ctx=True` keyword argument must be used.
+ reconnect to the server if the connection is lost. The query will be resent if the
+ reconnection is successful. The default is -1 which will not attempt to reconnect, 0
+ will continuously attempt to reconnect to the server using the backoff
+ `#!python reconnection_function`. Any positive integer will specify the maximum
+ number of tries to reconnect before throwing an error if a connection
+ cannot be made.
+ reconnection_delay: This parameter outlines the initial delay between reconnection
+ attempts, by default this is set to 0.5 seconds and is passed to the function
+ defined by the `#!python reconnection_function` parameter which takes this delay
+ as it's only parameter.
+ reconnection_function: This parameter defines the function which is used to modify the
+ `#!python reconnection_delay` on successive attempts to reconnect to the server. By
+ default this is an exponential backoff where the `#!python reconnection_delay` is
+ multiplied by two on each invocation
+
+ Note: The `#!python username` and `#!python password` parameters are not required.
+ The `#!python username` and `#!python password` parameters are only required if
+ the q server requires authorization. Refer to
+ [ssl documentation](https://code.kx.com/q/kb/ssl/) for more information.
+
+ Note: The `#!python timeout` argument may not always be enforced when making
+ successive queries. When making successive queries if one query times out the next query
+ will wait until a response has been received from the previous query before starting the
+ timer for its own timeout. This can be avoided by using a separate
+ `#!python QConnection` instance for each query.
+
+ Note: When querying KX Insights the `#!python no_ctx=True` keyword argument must be used.
Warning: AsyncQConnections will not resend queries that have not completed on reconnection.
- When using the `reconnection_attempts` key word argument any queries that were not
- complete before the connection was lost will have to be manually sent again after the
- automatic reconnection.
+ When using the `#!python reconnection_attempts` key word argument any queries that were
+ not complete before the connection was lost will have to be manually sent again after
+ the automatic reconnection.
Raises:
PyKXException: Using both tls and unix is not possible with a QConnection.
@@ -1349,6 +1446,8 @@ async def main():
'loop': event_loop,
'no_ctx': no_ctx,
'reconnection_attempts':reconnection_attempts,
+ 'reconnection_delay': reconnection_delay,
+ 'reconnection_function': reconnection_function,
})
object.__setattr__(self, '_initialized', False)
@@ -1367,6 +1466,8 @@ async def _async_init(self,
event_loop: Optional[asyncio.AbstractEventLoop] = None,
no_ctx: bool = False,
reconnection_attempts: int = -1,
+ reconnection_delay: float = 0.5,
+ reconnection_function: callable = reconnection_function,
):
object.__setattr__(self, '_call_stack', [])
self._init(host,
@@ -1382,6 +1483,8 @@ async def _async_init(self,
lock=lock,
no_ctx=no_ctx,
reconnection_attempts=reconnection_attempts,
+ reconnection_delay=reconnection_delay,
+ reconnection_function=reconnection_function,
)
object.__setattr__(self, '_loop', event_loop)
con_info = object.__getattribute__(self, '_connection_info')
@@ -1407,6 +1510,8 @@ async def _initobj(self): # nocov
event_loop=self._stored_args['loop'],
no_ctx=self._stored_args['no_ctx'],
reconnection_attempts=self._stored_args['reconnection_attempts'],
+ reconnection_delay=self._stored_args['reconnection_delay'],
+ reconnection_function=self._stored_args['reconnection_function'],
)
return self
@@ -1428,17 +1533,20 @@ def __call__(self,
Parameters:
query: A q expression to be evaluated.
- *args: Arguments to the q query. Each argument will be converted into a `pykx.K` object.
- Up to 8 arguments can be provided, as that is the maximum supported by q.
- wait: Whether the q server should execute the query before responding. If `True`,
- Python will wait for the q server to execute the query, and respond with the
- results. If `False`, the q server will respond immediately to the query with
- generic null (`::`), then execute them at some point in the future. Defaults to
- whatever the `wait` keyword argument was for the `ASyncQConnection` instance (i.e.
- this keyword argument overrides the instance-level default).
+ *args: Arguments to the q query. Each argument will be converted into a
+ `#!python pykx.K` object. Up to 8 arguments can be provided, as that is
+ the maximum supported by q.
+ wait: Whether the q server should execute the query before responding.
+ If `#!python True`, Python will wait for the q server to execute the query,
+ and respond with the results. If `#!python False`, the q server will respond
+ immediately to the query with generic null (`#!q ::`), then execute them at some
+ point in the future. Defaults to whatever the `#!python wait` keyword argument
+ was for the `ASyncQConnection` instance (i.e. this keyword argument overrides the
+ instance-level default).
reuse: Whether the AsyncQConnection instance should be reused for subsequent queries,
if using q queries that respond in a deferred/asynchronous manner this should be set
- to `False` so the query can be made in a dedicated `AsyncQConnection` instance.
+ to `#!python False` so the query can be made in a dedicated
+ `#!python AsyncQConnection` instance.
Returns:
A QFuture object that can be awaited on to get the result of the query.
@@ -1531,7 +1639,8 @@ def __call__(self,
self._cancel_all_futures()
print('WARNING: Connection lost attempting to reconnect.', file=sys.stderr)
loops = self._connection_info['reconnection_attempts']
- reconnection_delay = 0.5
+ reconnection_delay = self._connection_info['reconnection_delay']
+ reconnection_function = self._connection_info['reconnection_function']
while True:
try:
self._create_connection_to_server()
@@ -1550,8 +1659,12 @@ def __call__(self,
f'Failed to reconnect, trying again in {reconnection_delay} seconds.',
file=sys.stderr
)
+ if not isinstance(reconnection_delay, (int, float)):
+ raise TypeError(
+ 'reconnection_delay must be either int/float'
+ )
sleep(reconnection_delay)
- reconnection_delay *= 2
+ reconnection_delay = reconnection_function(reconnection_delay)
continue
print('Connection successfully reestablished.', file=sys.stderr)
break
@@ -1579,7 +1692,8 @@ def _call(self,
self._cancel_all_futures()
print('WARNING: Connection lost attempting to reconnect.', file=sys.stderr)
loops = self._connection_info['reconnection_attempts']
- reconnection_delay = 0.5
+ reconnection_delay = self._connection_info['reconnection_delay']
+ reconnection_function = self._connection_info['reconnection_function']
while True:
try:
self._create_connection_to_server()
@@ -1598,8 +1712,12 @@ def _call(self,
f'Failed to reconnect, trying again in {reconnection_delay} seconds.',
file=sys.stderr
)
+ if not isinstance(reconnection_delay, (int, float)):
+ raise TypeError(
+ 'reconnection_delay must be either int/float'
+ )
sleep(reconnection_delay)
- reconnection_delay *= 2
+ reconnection_delay = reconnection_function(reconnection_delay)
continue
print('Connection successfully reestablished.', file=sys.stderr)
break
@@ -1787,49 +1905,51 @@ def __init__(self,
port: The port to which a connection is to be established.
username: Username for q connection authorization.
password: Password for q connection authorization.
- timeout: Timeout for blocking socket operations in seconds. If set to `0`, the socket
+ timeout: Timeout for blocking socket operations in seconds. If set to 0, the socket
will be non-blocking.
large_messages: Whether support for messages >2GB should be enabled.
tls: Whether TLS should be used.
- unix: Whether a Unix domain socket should be used instead of TCP. If set to `True`, the
- host parameter is ignored. Does not work on Windows.
+ unix: Whether a Unix domain socket should be used instead of TCP. If set to
+ `#!python True`, the host parameter is ignored. Does not work on Windows.
wait: Whether the q server should send a response to the query (which this connection
- will wait to receive). Can be overridden on a per-call basis. If `True`, Python will
- wait for the q server to execute the query, and respond with the results. If
- `False`, the q server will respond immediately to every query with generic null
- (`::`), then execute them at some point in the future.
- event_loop: If running an event loop that supports the `create_task()` method then
- you can provide the event loop here and the returned future object will be an
+ will wait to receive). Can be overridden on a per-call basis. If `#!python True`,
+ Python will wait for the q server to execute the query, and respond with the
+ results. If `#!python False`, the q server will respond immediately to every query
+ with generic null (`#!q ::`), then execute them at some point in the future.
+ event_loop: If running an event loop that supports the `#!python create_task()` method
+ then you can provide the event loop here and the returned future object will be an
instance of the loops future type. This will allow the current event loop to manage
- awaiting `QFuture` objects as well as any other async tasks that may be running.
+ awaiting `#!python QFuture` objects as well as any other async tasks that may be
+ running.
no_ctx: This parameter determines whether or not the context interface will be disabled.
disabling the context interface will stop extra q queries being sent but will
disable the extra features around the context interface.
- as_server: If this parameter is set to True the QConnection will act as a `q` server,
+ as_server: If this parameter is set to True the QConnection will act as a q server,
that other processes can connect to, and will not create a connection. this
functionality is licensed only.
conn_gc_time: When running as a server this will determine the number of seconds between
going through the list of opened connections and closing any that the clients have
- closed. If not set the default of `0.0` will cause any old connections to never be
- closed unless `self.clean_open_connections()` is manually called.
-
- Note: The `username` and `password` parameters are not required.
- The `username` and `password` parameters are only required if the q server requires
- authorization. Refer to [ssl documentation](https://code.kx.com/q/kb/ssl/) for more
- information.
-
- Note: The `timeout` argument may not always be enforced when making successive queries.
- When making successive queries if one query times out the next query will wait until a
- response has been received from the previous query before starting the timer for its own
- timeout. This can be avoided by using a separate `QConnection` instance for each query.
-
- Note: The overhead of calling `clean_open_connections` is large.
- When running as a server you should ensure that `clean_open_connections` is called
- fairly infrequently as the overhead of clearing all the dead connections can be quite
- large. It is recommended to have a large delay on successive clears or manage it
+ closed. If not set the default of 0.0 will cause any old connections to never be
+ closed unless `#!python self.clean_open_connections()` is manually called.
+
+ Note: The `#!python username` and `#!python password` parameters are not required.
+ The `#!python username` and `#!python password` parameters are only required if the q
+ server requires authorization. Refer to
+ [ssl documentation](https://code.kx.com/q/kb/ssl/) for more information.
+
+ Note: The `#!python timeout` argument may not always be enforced when making successive
+ queries. When making successive queries if one query times out the next query will wait
+ until a response has been received from the previous query before starting the timer for
+ its own timeout. This can be avoided by using a separate `#!python QConnection` instance
+ for each query.
+
+ Note: The overhead of calling `#!python clean_open_connections` is large.
+ When running as a server you should ensure that `#!python clean_open_connections` is
+ called fairly infrequently as the overhead of clearing all the dead connections can be
+ quite large. It is recommended to have a large delay on successive clears or manage it
manually.
- Note: When querying `KX Insights` the `no_ctx=True` keyword argument must be used.
+ Note: When querying KX Insights the `#!python no_ctx=True` keyword argument must be used.
Raises:
PyKXException: Using both tls and unix is not possible with a QConnection.
@@ -1947,17 +2067,16 @@ def __call__(self,
Parameters:
query: A q expression to be evaluated.
- *args: Arguments to the q query. Each argument will be converted into a `pykx.K` object.
- Up to 8 arguments can be provided, as that is the maximum supported by q.
- wait: Whether the q server should execute the query before responding. If `True`,
- Python will wait for the q server to execute the query, and respond with the
- results. If `False`, the q server will respond immediately to the query with
- generic null (`::`), then execute them at some point in the future. Defaults to
- whatever the `wait` keyword argument was for the `ASyncQConnection` instance (i.e.
- this keyword argument overrides the instance-level default).
- reuse: Whether the AsyncQConnection instance should be reused for subsequent queries,
- if using q queries that respond in a deferred/asynchronous manner this should be set
- to `False` so the query can be made in a dedicated `AsyncQConnection` instance.
+ *args: Arguments to the q query. Each argument will be converted into a
+ `#!python pykx.K` object. Up to 8 arguments can be provided, as that is the maximum
+ supported by q.
+ wait: Whether the q server should execute the query before responding. If
+ `#!python True`, Python will wait for the q server to execute the query, and respond
+ with the results. If `#!python False`, the q server will respond immediately to the
+ query with generic null (`#!q ::`), then execute them at some point in the future.
+ Defaults to whatever the `#!python wait` keyword argument was for the
+ `#!python ASyncQConnection` instance (i.e. this keyword argument overrides the
+ instance-level default).
Returns:
A QFuture object that can be awaited on to get the result of the query.
@@ -1972,7 +2091,7 @@ def __call__(self,
Note: Queries are not sent until a response has been awaited or the send queue is polled.
- Note: When querying `KX Insights` the `no_ctx=True` keyword argument must be used.
+ Note: When querying KX Insights the `#!python no_ctx=True` keyword argument must be used.
Examples:
@@ -2119,8 +2238,8 @@ def _send_sock_server(self, sock, response, level):
# can be sent elsewhere, we just need to wait a moment until more data can be
# sent to the sockets buffer
pass
- except BaseException: # nocov
- raise RuntimeError("Failed to send query on IPC socket")
+ except BaseException as e: # nocov
+ raise RuntimeError(f"Failed to send query on IPC socket: '{e}'")
except BaseException: # nocov
pass
@@ -2416,6 +2535,8 @@ def __init__(self,
lock: Optional[Union[threading_lock, multiprocessing_lock]] = None,
no_ctx: bool = False,
reconnection_attempts: int = -1,
+ reconnection_delay: float = 0.5,
+ reconnection_function: callable = reconnection_function,
):
"""Interface with a q process using the q IPC protocol.
@@ -2428,40 +2549,47 @@ def __init__(self,
port: The port to which a connection is to be established.
username: Username for q connection authorization.
password: Password for q connection authorization.
- timeout: Timeout for blocking socket operations in seconds. If set to `0`, the socket
+ timeout: Timeout for blocking socket operations in seconds. If set to 0, the socket
will be non-blocking.
large_messages: Whether support for messages >2GB should be enabled.
tls: Whether TLS should be used.
- unix: Whether a Unix domain socket should be used instead of TCP. If set to `True`, the
- host parameter is ignored. Does not work on Windows.
+ unix: Whether a Unix domain socket should be used instead of TCP. If set to
+ `#!python True`, the host parameter is ignored. Does not work on Windows.
wait: Whether the q server should send a response to the query (which this connection
- will wait to receive). Can be overridden on a per-call basis. If `True`, Python will
- wait for the q server to execute the query, and respond with the results. If
- `False`, the q server will respond immediately to every query with generic null
- (`::`), then execute them at some point in the future.
+ will wait to receive). Can be overridden on a per-call basis. If `#!python True`,
+ Python will wait for the q server to execute the query, and respond with the
+ results. If `#!python False`, the q server will respond immediately to every query
+ with generic null (`#!q ::`), then execute them at some point in the future.
no_ctx: This parameter determines whether or not the context interface will be disabled.
disabling the context interface will stop extra q queries being sent but will
disable the extra features around the context interface.
reconnection_attempts: This parameter specifies how many attempts will be made to
reconnect to the server if the connection is lost. The query will be resent if the
reconnection is successful. The default is -1 which will not attempt to reconnect, 0
- will continuosly attempt to reconnect to the server with no stop and an exponential
- backoff between successive attempts. Any positive integer will specify the maximum
- number of tries to reconnect before throwing an error if a connection can not be
- made.
-
- Note: The `username` and `password` parameters are not required.
- The `username` and `password` parameters are only required if the q server requires
- authorization. Refer to [ssl documentation](https://code.kx.com/q/kb/ssl/) for more
- information.
-
- Note: The `timeout` argument may not always be enforced when making successive queries.
- When making successive queries if one query times out the next query will wait until a
- response has been received from the previous query before starting the timer for its own
- timeout. This can be avoided by using a separate `SecureQConnection` instance for each
- query.
-
- Note: When querying `KX Insights` the `no_ctx=True` keyword argument must be used.
+ will continuously attempt to reconnect to the server using the backoff
+ `#!python reconnection_function`. Any positive integer will specify the maximum
+ number of tries to reconnect before throwing an error if a connection can not be made.
+ reconnection_delay: This parameter outlines the initial delay between reconnection
+ attempts, by default this is set to 0.5 seconds and is passed to the function
+ defined by the `#!python reconnection_function` parameter which takes this delay
+ as it's only parameter
+ reconnection_function: This parameter defines the function which is used to modify the
+ `#!python reconnection_delay` on successive attempts to reconnect to the server. By
+ default this is an exponential backoff where the `#!python reconnection_delay` is
+ multiplied by two on each invocation
+
+ Note: The `#!python username` and `#!python password` parameters are not required.
+ The `#!python username` and `#!python password` parameters are only required if
+ the q server requires authorization. Refer to
+ [ssl documentation](https://code.kx.com/q/kb/ssl/) for more information.
+
+ Note: The `#!python timeout` argument may not always be enforced when making successive
+ queries. When making successive queries if one query times out the next query will wait
+ until a response has been received from the previous query before starting the timer for
+ its own timeout. This can be avoided by using a separate `#!python SecureQConnection`
+ instance for each query.
+
+ Note: When querying KX Insights the `#!python no_ctx=True` keyword argument must be used.
Raises:
PyKXException: Using both tls and unix is not possible with a QConnection.
@@ -2488,6 +2616,8 @@ def __init__(self,
lock=lock,
no_ctx=no_ctx,
reconnection_attempts=reconnection_attempts,
+ reconnection_delay=reconnection_delay,
+ reconnection_function=reconnection_function,
)
super().__init__()
@@ -2514,15 +2644,16 @@ def __call__(self,
Parameters:
query: A q expression to be evaluated.
- *args: Arguments to the q query. Each argument will be converted into a `pykx.K` object.
- Up to 8 arguments can be provided, as that is the maximum supported by q.
- wait: Whether the q server should execute the query before responding. If `True`,
- Python will wait for the q server to execute the query, and respond with the
- results. If `False`, the q server will respond immediately to the query with
- generic null (`::`), then execute them at some point in the future. Defaults to
- whatever the `wait` keyword argument was for the `SecureQConnection` instance (i.e.
- this keyword argument overrides the instance-level default).
-
+ *args: Arguments to the q query. Each argument will be converted into a
+ `#!python pykx.K` object. Up to 8 arguments can be provided, as that is the maximum
+ supported by q.
+ wait: Whether the q server should execute the query before responding. If
+ `#!python True`, Python will wait for the q server to execute the query, and
+ respond with the results. If `#!python False`, the q server will respond immediately
+ to the query with generic null (`#!q ::`), then execute them at some point in the
+ future. Defaults to whatever the `wait` keyword argument was for the
+ `#!python SecureQConnection` instance (i.e. this keyword argument overrides the
+ instance-level default).
Raises:
RuntimeError: A closed IPC connection was used.
@@ -2609,10 +2740,8 @@ def _call(self,
if not (issubclass(tquery, K) or isinstance(query, (str, bytes))):
raise ValueError('Cannot send object of passed type over IPC: ' + str(tquery))
if not issubclass(tquery, Function):
- if isinstance(query, CharVector):
- query = bytes(query)
- else:
- query = normalize_to_bytes(query, 'Query')
+ if isinstance(query, str):
+ query = query.encode()
if len(args) > 8:
raise TypeError('Too many parameters - q queries cannot have more than 8 parameters')
prev_types = [type(x) for x in args]
@@ -2645,7 +2774,8 @@ def _call(self,
if self._connection_info['reconnection_attempts'] != -1:
print('WARNING: Connection lost attempting to reconnect.', file=sys.stderr)
loops = self._connection_info['reconnection_attempts']
- reconnection_delay = 0.5
+ reconnection_delay = self._connection_info['reconnection_delay']
+ reconnection_function = self._connection_info['reconnection_function']
while True:
try:
self._create_connection_to_server()
@@ -2658,7 +2788,7 @@ def _call(self,
if loops == 0:
print(
'WARNING: Could not reconnect to server within '
- f'{self.q_connection._connection_info["reconnection_attempts"]} attempts.',
+ f'{self._connection_info["reconnection_attempts"]} attempts.',
file=sys.stderr
)
raise err
@@ -2666,8 +2796,12 @@ def _call(self,
f'Failed to reconnect, trying again in {reconnection_delay} seconds.',
file=sys.stderr
)
+ if not isinstance(reconnection_delay, (int, float)):
+ raise TypeError(
+ 'reconnection_delay must be either int/float'
+ )
sleep(reconnection_delay)
- reconnection_delay *= 2
+ reconnection_delay = reconnection_function(reconnection_delay)
continue
print('Connection successfully reestablished.', file=sys.stderr)
return self._call(query, *args, wait=wait, debug=debug)
diff --git a/src/pykx/lib/4-1-libs/l64/libq.so b/src/pykx/lib/4-1-libs/l64/libq.so
index bf837c7..a40309f 100755
Binary files a/src/pykx/lib/4-1-libs/l64/libq.so and b/src/pykx/lib/4-1-libs/l64/libq.so differ
diff --git a/src/pykx/lib/4-1-libs/l64arm/libq.so b/src/pykx/lib/4-1-libs/l64arm/libq.so
index bcc4b10..ff58f38 100755
Binary files a/src/pykx/lib/4-1-libs/l64arm/libq.so and b/src/pykx/lib/4-1-libs/l64arm/libq.so differ
diff --git a/src/pykx/lib/4-1-libs/m64/libq.dylib b/src/pykx/lib/4-1-libs/m64/libq.dylib
index 7864911..678e49c 100755
Binary files a/src/pykx/lib/4-1-libs/m64/libq.dylib and b/src/pykx/lib/4-1-libs/m64/libq.dylib differ
diff --git a/src/pykx/lib/4-1-libs/m64arm/libq.dylib b/src/pykx/lib/4-1-libs/m64arm/libq.dylib
index 953bfc2..52ce515 100755
Binary files a/src/pykx/lib/4-1-libs/m64arm/libq.dylib and b/src/pykx/lib/4-1-libs/m64arm/libq.dylib differ
diff --git a/src/pykx/lib/4-1-libs/q.k b/src/pykx/lib/4-1-libs/q.k
index 06ad989..34b92ec 100644
--- a/src/pykx/lib/4-1-libs/q.k
+++ b/src/pykx/lib/4-1-libs/q.k
@@ -92,7 +92,7 @@ fu:{[f;x]$[0h>@x;f x;f[u](u:?x)?x]} /uniques
fc:{$[1@x;`${$[(*x)in n,"_ ";"a",x;x]}x@&(x:$x)in an;ft[{s[i]:`$($s i:&((s:id'!x:+x) in`i,res,!`.q)),'"1";+({unm[x]}/s)!. x}]x]}
+id:{$[0h>@x;`${$[(*x)in n,"_ ";"a",x;x]}x@&(x:$x)in an;ft[{$[98h=@x;+.z.s[+x];[s[i]:`$($s i:&((s:id'!x) in`i,res,!`.q)),'"1";({unm[x]}/s)!. x]]}]x]}
j10:64/:b6?;x10:b6@0x40\: /base64 J from char10
j12:36/:nA?;x12:nA@0x24\: /base36 J from char12(cusip)
btoa:-32!;sha1:-33!;prf0:+`name`file`line`col`text`pos!*-37!
@@ -117,23 +117,26 @@ IN:{$[99h<@x;x in y;0]};qa:{$[qb x;0;IN[*x;a0];1;|/qa'1_x]};qb:{(2>#x)|(@x)&~11=
/ CAN EXIT HERE FOR SMALL Q
/ pt(tables) pf(date/month/year/int) pd(dirs) pv(values) pn(count) pt::0#pf::`
vt:(,`)!,()!();
-bv:{g:$[(::)~x;max;min];x:.Q.d;d:{`/:'x,'d@&(d:!x)like"[0-9]*"}'P:$[`par.txt in!x;jp[x]'`$0:`/:x,`par.txt;,x];
- t:?,/!:'.Q.vt:{(:'x)(=,/. x)}'{({("DMJJ"`date`month`year`int?.Q.pf)$$last@x:`\:x}'x)!!:'x}'d;
- d:{`/:'x[(. y)[;0]],'(`$$(. y)[;1]),'!y}[P]@{i:y@&:x=y x:@[x;&x~\:();:;*0#`. pf];(i;x i)}[;g]'+:t#/:g''.Q.vt:t#/:.Q.vt;.Q.vt:P!.q.except[. .Q.pf]''.Q.vt;
- .Q.vp:t!{(+(,.Q.pf)!,0#. .Q.pf),'+(-2!'.+x)#'+|0#x:?[x;();0b;()]}'d;.Q.pt,:{.[x;();:;+.q.except[!+.Q.vp x;.Q.pf]!x];x}'.q.except[t;.Q.pt];}
+bvfp:{g:$[(::)~x;max;min];x:.Q.d;d:{$[y~pv;`/:'x,'d@&(d:!x)in`$$y;u@&{11h=@!x}'u:`/:'x,'`$$y]}[;y@:&y in pv]'P:$[`par.txt in!x;jp[x]'`$0:`/:x,`par.txt;,x];if[.Q.vt~(,`)!,()!();.Q.vt:P!(#P)#.:.Q.vt];
+ t:?,/!:'vt:{(:'x)(=,/. x)}'{({("DMJJ"`date`month`year`int?pf)$$last@`\:x}'x)!!:':x}'d;
+ if[#nt:(exc:.q.except)[t;!vp];d:{`/:'x[(. y)[;0]],'(`$$(. y)[;1]),'!y}[P]@{i:y@&:x=y x:@[x;&x~\:();:;*0#pv];(i;x i)}[;g]'+:nt#/:g''nt#/:vt;.Q.vp,:nt!{(+(,pf)!,0#pv),'+(-2!'.+x)#'+|0#x:?[x;();0b;()]}':d;.Q.vt:.Q.vt,\:nt!(#nt)#,exc[?pv;y]];.Q.vt:(.Q.vt,''P!exc[y]''vt)exc''vt:(nt,!vp)#/:vt;{.[x;();:;+.q.except[!+vp x;pf]!x]}'exc[!vp;pt];pt::.q.asc@!vp;vpv::?pv;}
+bvi:{$[(`vpv in !.Q);bvfp[x;?.q.except[pv;vpv]];bv x]}
+bv:{vt::(,`)!,()!();vp::(0#`)!();vpv::0#pv;bvfp[x;pv]}
-pt:pm:();MAP:{{$[0>@a:.+0!. x;.q.set[x]@.`$-1_$a;]}'a@&~(a:."\\a")in pt;pm::();if[#pt;pm::pt!{(`u#pd,'pv)!p2[(x;();0b;())]/':+(pd;pv)}':pt]}
-dd:{`/:x,`$$y};d0:{dd[*|pd;*|pv]};p1:{$[#pm;pm[x](y;z);z in vt[y;x];vp x;+(!+. x)!`/:dd[y;z],x]};p2:{0!(?).@[x;0;p1[;y;z]]}
+sp:{$[0>."\\s";x'y;x':y]}
+pt:pm:();MAP:{{$[0>@a:.+0!. x;.q.set[x]@.`$-1_$a;]}'a@&~(a:."\\a")in pt;pm::();if[#pt;pm::pt!sp[{(`u#pd,'pv)!sp[p2[(x;();0b;())]/;+(pd;pv)]}]pt]}
+dd:{`/:x,`$$y};d0:{dd[*|pd;*|pv]};p1:{$[#pm;+((,pf)!,z),+pm[x](y;z);z in vt[y;x];vp x;+(!+. x)!`/:dd[y;z],x]};p2:{0!(?).@[x;0;p1[;y;z]]}
-p:{$[~#D;p2[x;d]':y;(,/p2[x]'/':P[i](;)'y)@<,/y@:i:&0<#:'y:D{x@&x in y}\:y]}
-view:{pd::PD x:$[(::)~x;x;$[#x:&PV in x;x;'"invalid partition filter"]];u~:?u::..[pf;();:;pv::PV x];.[;();:;]'[pt;{+(x . y,`.d)!y}[x]':pt::!x:d0[]];pn::pt!(#pt)#()}
+p:{$[~#D;sp[p2[x;d]]y;(,/sp[p2[x]'/]P[i](;)'y)@<,/y@:i:&0<#:'y:D{x@&x in y}\:y]}
+view:{pd::PD x:$[(::)~x;x;$[#x:&PV in x;x;'"invalid partition filter"]];u~:?u::..[pf;();:;pv::PV x];.[;();:;]'[pt;sp[{+(x . y,`.d)!y}[x]]pt::!x:d0[]];pn::pt!(#pt)#()}
-jp:{$[$["w"~*$.z.o;u[$[(_u:$y)like"[a-z]:*";2;0]]in"\\/";("/"=*$y)|objp y];-1!y;`/:x,y]};rp:-500!
-L:{D::();f:{!:'?{-1!`$("/"/:3#"/"\:x),"/_"}'u&objp'u:1_'$x;x};f@,d::$[z;rp y;y];if[x~,`par.txt;if[~#x:,/D::{x@&~(x:!x)like"*$"}'P::f@jp[d]'`$0:`/:d,*x;'empty]];if[^*PV::x@:."\\p")|."\\_";cn'.:'pt];}
+L:{D::();L1[x;y;z;0;()]};li:{L1[$[D~();::;,`par.txt];d;0;1;x,()]}
/L:{P::,`:.;D::,x;pf::`date;pt::!P[0]@**D;T::P!P{z!{x!(y . ,[;`]z,)'x}[x;y]'z}[pt]'D}
-cn:{$[#n:pn x:.+x;n;pn[x]:(#p1 .)':+(x;pd;pv)]};pcnt:{+/cn x};dt:{cn[y]@&pv in x}
+cn:{$[#n:pn x:.+x;n;pn[x]:sp[#p1 .;+(x;pd;pv)]]};pcnt:{+/cn x};dt:{cn[y]@&pv in x}
ind:{,/i[j]{fp[pf;p]p1[x;pd y;p:pv y]z}[.+x]'(j:&~=':i)_y-n i:(n:+\0,cn x)bin y}
fp:{+((,*x)!,(#z)#$[-7h=@y;y;(*|x)$y]),+z}
foo:{[t;c;b;a;v;d]if[v;g:*|`\:b f:*!b;b:1_b];,/$[v|~#a;d fp[$[v;f,g;pf]]';::]p[(.+t;c;b;a)]d}
diff --git a/src/pykx/lib/4-1-libs/w64/q.dll b/src/pykx/lib/4-1-libs/w64/q.dll
index c1b7ff9..9b856b8 100644
Binary files a/src/pykx/lib/4-1-libs/w64/q.dll and b/src/pykx/lib/4-1-libs/w64/q.dll differ
diff --git a/src/pykx/lib/html.q b/src/pykx/lib/html.q
new file mode 100644
index 0000000..09dc093
--- /dev/null
+++ b/src/pykx/lib/html.q
@@ -0,0 +1,94 @@
+\d .pykx
+
+// @private
+// @desc
+// Utility for generating the JSON data to be used for the rendering
+// of a HTML version of a PyKX in-memory and splayed table
+util.html.memsplay:{[c;t]
+ n:count t;
+ cls:{x!x}$[c[1]maxrows;?[tab;enlist(=;`i;(last;`i));0b;cls];()];
+ headcols,tailcol
+ }
+
+// @private
+// @desc
+// Utility to allow the table to be artificially extended with an additional column
+// ... if the console width is smaller than the maximum number of columns
+util.html.extendcols:{[maxcols;origtabcols;tab]
+ $[maxcols row x columns printed statement following the HTML table
+util.html.rowcols:{
+ $[x[0]",{reverse "," sv 3 cut reverse string x}[n]," rows × ",
+ {reverse "," sv 3 cut reverse string x}[count cols y]," columns
";
+ z]
+ }
+
+// @private
+// @desc
+// Detect any invalid columns within a table
+util.html.detectbadcols:{
+ typ:.Q.qp x;
+ fn:$[typ;{flip ct!count[ct:cols x]#()};0#];
+ c:cols x;
+ dup:where 11023;'version];vi:sum/ 2/:'.:'"0"^(r[0],20#"0";-20$r[1],10#"0";-10$r 2),\:"b";"***III"$`K`k`Kl`major`minor`patch!enlist[v],enlist["D"$"2022.03.10"],vi,raw};
.[`.comkxic;();,;.comkxic.Kf "3.0.0"];
diff --git a/src/pykx/lib/l64/libq.so b/src/pykx/lib/l64/libq.so
index 5a5f383..640f8d8 100755
Binary files a/src/pykx/lib/l64/libq.so and b/src/pykx/lib/l64/libq.so differ
diff --git a/src/pykx/lib/l64arm/libe.so b/src/pykx/lib/l64arm/libe.so
index ca33271..144511e 100755
Binary files a/src/pykx/lib/l64arm/libe.so and b/src/pykx/lib/l64arm/libe.so differ
diff --git a/src/pykx/lib/l64arm/libq.so b/src/pykx/lib/l64arm/libq.so
index c148bd6..ffb2503 100755
Binary files a/src/pykx/lib/l64arm/libq.so and b/src/pykx/lib/l64arm/libq.so differ
diff --git a/src/pykx/lib/m64/libq.dylib b/src/pykx/lib/m64/libq.dylib
index 74353fa..8c203f0 100755
Binary files a/src/pykx/lib/m64/libq.dylib and b/src/pykx/lib/m64/libq.dylib differ
diff --git a/src/pykx/lib/m64arm/libq.dylib b/src/pykx/lib/m64arm/libq.dylib
index 52604ce..e4c8d67 100755
Binary files a/src/pykx/lib/m64arm/libq.dylib and b/src/pykx/lib/m64arm/libq.dylib differ
diff --git a/src/pykx/lib/q.k b/src/pykx/lib/q.k
index 782f4c6..0ccf8e2 100644
--- a/src/pykx/lib/q.k
+++ b/src/pykx/lib/q.k
@@ -30,7 +30,7 @@ lower:{$[$[(~@x)&10h~@*x;&/10h=@:'x;0b];_x;~t&(77h>t)|99ht)|99h@z;:[;z];z]]}
-/select insert update delete exec / fkeys[&keys] should be eponymous, e.g. order.customer.nation
+/select insert update delete exec / fkeys[&keys] should be eponymous, e.g. order.customer.nation
/{keys|cols}`t `f's{xasc|xdesc}`t n!`t xcol(prename) xcols(prearrange) FT(xcol xasc xdesc)
view:{$`. .`\:x};tables:{."\\a ",$$[^x;`;x]};views:{."\\b ",$$[^x;`;x]}
cols:{$[.Q.qp x:.Q.v x;.Q.pf,!+x;98h=@x;!+x;11h=@!x;!x;!+0!x]} /cols:{!.Q.V x}
diff --git a/src/pykx/lib/w64/e.dll b/src/pykx/lib/w64/e.dll
index 39dd8c3..2426d15 100644
Binary files a/src/pykx/lib/w64/e.dll and b/src/pykx/lib/w64/e.dll differ
diff --git a/src/pykx/lib/w64/e.lib b/src/pykx/lib/w64/e.lib
index 6a5c327..5d5c431 100644
Binary files a/src/pykx/lib/w64/e.lib and b/src/pykx/lib/w64/e.lib differ
diff --git a/src/pykx/lib/w64/q.dll b/src/pykx/lib/w64/q.dll
index 442727f..2bb1b54 100644
Binary files a/src/pykx/lib/w64/q.dll and b/src/pykx/lib/w64/q.dll differ
diff --git a/src/pykx/lib/w64/q.lib b/src/pykx/lib/w64/q.lib
index efe485c..afb66ce 100644
Binary files a/src/pykx/lib/w64/q.lib and b/src/pykx/lib/w64/q.lib differ
diff --git a/src/pykx/license.py b/src/pykx/license.py
index 8d9dfa1..091bce4 100644
--- a/src/pykx/license.py
+++ b/src/pykx/license.py
@@ -2,6 +2,7 @@
import os
import shutil
from pathlib import Path
+from typing import Optional
from . import licensed
from .config import qlic
@@ -23,10 +24,14 @@ def _init(_q):
q = _q
-def check(license, *, format='FILE', license_type='kc.lic') -> bool:
+def check(license: str,
+ *,
+ format: Optional[str] = 'FILE',
+ license_type: Optional[str] = 'kc.lic'
+) -> bool:
"""
- Validate that the license key information that you have provided matches the license
- saved to disk which is used by PyKX
+ Validate the license key information you provided matches the license
+ saved to disk which is read by PyKX
Parameters:
license: If using "FILE" format this is the location of the file being used for comparison.
@@ -39,7 +44,44 @@ def check(license, *, format='FILE', license_type='kc.lic') -> bool:
Returns:
A boolean indicating if the license is correct or not and a printed message describing
- the issue
+ the issue
+
+ Examples:
+
+ Validate that a provided license matches an existing persisted license
+
+ ```python
+ >>> import pykx as kx
+ >>> kx.license.check('/usr/location/kc.lic')
+ True
+ ```
+
+ Attempt to check a new license against an existing installed license
+
+ ```python
+ >>> import pykx as kx
+ >>> check = kx.license.check('/usr/location/kc.lic')
+ Supplied license information does not match.
+ Please consider reinstalling your license using pykx.util.install_license
+
+ Installed license representation:
+ b'iIXSiEWzCNTkkCWK5Gggy..'
+ User expected license representation:
+ b'IyEvdXNyL2Jpbi9lbngDf..'
+ >>> check
+ False
+ ```
+
+ Attempt to check a license in the case no license is currently installed
+
+ ```python
+ >>> import pykx as kx
+ >>> check = kx.license.check('setup.py', license_type='kc.lic')
+ Unable to find an installed license: kc.lic at location: /usr/local/anaconda3/envs/qenv/q.
+ Please consider installing your license again using pykx.util.install_license
+ >>> check
+ False
+ ```
"""
format = format.lower()
if format not in ('file', 'string'):
@@ -81,19 +123,34 @@ def check(license, *, format='FILE', license_type='kc.lic') -> bool:
def expires() -> int:
"""
- The number of days until a license is set to expire
+ The number of days until the license is set to expire.
Returns:
- The number of days until a users license is set to expire
+ The number of days until the license is set to expire
+
+ Example:
+
+ The number of days until your license expires:
+
+ ```python
+ >>> import pykx as kx
+ >>> kx.license.expires()
+ 265
+ ```
"""
if not licensed:
raise Exception('Unlicensed user, unable to determine license expiry')
return (q('"D"$', q.z.l[1]) - q.z.D).py()
-def install(license, *, format='FILE', license_type='kc.lic', force=False):
+def install(license: str,
+ *,
+ format: Optional[str] = 'FILE',
+ license_type: Optional[str] = 'kc.lic',
+ force: Optional[bool] = False
+) -> bool:
"""
- (Re)install a KX license key optionally overwriting the currently installed license
+ (Re)install a KX license key, optionally overwriting the currently installed license.
Parameters:
license: If using "FILE" this is the location of the file being used for comparison.
@@ -107,6 +164,25 @@ def install(license, *, format='FILE', license_type='kc.lic', force=False):
Returns:
A boolean indicating if the license has been correctly overwritten
+
+ Examples:
+
+ Install a license using a supplied file location
+
+ ```python
+ >>> import pykx as kx
+ >>> kx.license.install('/path/to/license')
+ True
+ ```
+
+ Install a k4.lic base64 encoded string representation of the license
+
+ ```python
+ >>> import pykx as kx
+ >>> b64_string = 'IdyannfDangfa4FasdjD9fcda' # Example
+ >>> kx.license.install(b64_string, format = "STRING", license_type = "k4.lic")
+ True
+ ```
"""
format = format.lower()
if format not in ('file', 'string'):
diff --git a/src/pykx/nbextension.py b/src/pykx/nbextension.py
index beb6c3a..250a7e0 100644
--- a/src/pykx/nbextension.py
+++ b/src/pykx/nbextension.py
@@ -1,6 +1,8 @@
import pykx as kx
from IPython.display import display
+from pathlib import Path
+
def q(instructions, code): # noqa
ld = kx.SymbolAtom('.Q.pykxld')
host = 'localhost'
@@ -15,6 +17,11 @@ def q(instructions, code): # noqa
displayRet = False
debug = False
reconnection_attempts = -1
+ save = False
+ execute = True
+ path = ''
+ code_str= code
+ locked = False
if len(instructions)>0:
instructions = instructions.split(' ')
@@ -75,6 +82,22 @@ def q(instructions, code): # noqa
instructions.pop(0)
instructions.pop(0)
continue
+ elif instructions[0] == '--save':
+ save = True
+ path = instructions[1]
+ if ((path[-2:] != '.q') and (path[-3:] != '.q_')):
+ raise NameError("File must be of type '.q' or '.q_'")
+ if (path[-3:] == '.q_'):
+ locked = True
+ instructions.pop(0)
+ instructions.pop(0)
+ continue
+ elif instructions[0] == '--execute':
+ if instructions[1] not in ['True', 'False']:
+ raise NameError("Execute must be 'True' or 'False'")
+ execute = instructions[1] == 'True'
+ instructions.pop(0)
+ instructions.pop(0)
elif instructions[0] == '':
instructions.pop(0)
continue
@@ -83,68 +106,102 @@ def q(instructions, code): # noqa
f'Received unknown argument "{instructions[0]}" in %%q magic command'
)
- if port is not None:
- _q = kx.SyncQConnection(
- host,
- port,
- username=username,
- password=password,
- timeout=timeout,
- large_messages=large_messages,
- tls=tls,
- unix=unix,
- no_ctx=no_ctx,
- reconnection_attempts=reconnection_attempts
- )
+ if execute or save:
+ if port is not None:
+ _q = kx.SyncQConnection(
+ host,
+ port,
+ username=username,
+ password=password,
+ timeout=timeout,
+ large_messages=large_messages,
+ tls=tls,
+ unix=unix,
+ no_ctx=no_ctx,
+ reconnection_attempts=reconnection_attempts
+ )
+ try:
+ _q(ld, skip_debug=True)
+ except kx.QError as err:
+ if '.Q.pykxld' in str(err):
+ # .Q.pykxld is not defined on the server so we pass it as inline code
+ with open(kx.config.pykx_lib_dir/'q.k', 'r') as f:
+ lines = f.readlines()
+ for line in lines:
+ if 'pykxld:' in line:
+ ld = _q("k)"+line[7:-1], skip_debug=True)
+ break
+ else:
+ raise err
+ else:
+ _q = kx.q
+ code = [kx.CharVector(x) for x in code.split('\n')][:-1]
try:
- _q(ld, skip_debug=True)
- except kx.QError as err:
- if '.Q.pykxld' in str(err):
- # .Q.pykxld is not defined on the server so we pass it as inline code
- with open(kx.config.pykx_lib_dir/'q.k', 'r') as f:
- lines = f.readlines()
- for line in lines:
- if 'pykxld:' in line:
- ld = _q("k)"+line[7:-1], skip_debug=True)
- break
- else:
- raise err
- else:
- _q = kx.q
- code = [kx.CharVector(x) for x in code.split('\n')][:-1]
- ret = _q(
- '''{[ld;code;file]
- res:1_ {x,enlist `err`res`trc!$[any x`err;
- (1b;(::);(::));
- .Q.trp[{(0b;(@) . ("q";x);(::))};y;{(1b;x;.Q.sbt y)}]]} over
- enlist[enlist `err`res`trc!(0b;(::);(::))],enlist[file],/:value(ld;code);
- select from res where not (::)~/:res}
- ''',
- ld,
- code,
- b'jupyter_cell.q', skip_debug=True
- )
- if not kx.licensed:
- ret = ret.py()
- for i in range(len(ret['res'])):
- if ret['err'][i]:
- if debug or kx.config.pykx_qdebug:
- print(ret['trc'][i].decode())
- raise kx.QError(ret['res'][i].decode())
- else:
- display(ret['res'][i]) if displayRet else print(ret['res'][i])
+ if execute:
+ ret = _q(
+ '''{[ld;code;file]
+ res:1_ {x,enlist `err`res`trc!$[any x`err;
+ (1b;(::);(::));
+ .Q.trp[{(0b;(@) . ("q";x);(::))};y;{(1b;x;.Q.sbt y)}]]} over
+ enlist[enlist `err`res`trc!(0b;(::);(::))],
+ enlist[file],/:value(ld;code);
+ select from res where not (::)~/:res}
+ ''',
+ ld,
+ code,
+ b'jupyter_cell.q', skip_debug=True
+ )
+ if not kx.licensed:
+ ret = ret.py()
+ for i in range(len(ret['res'])):
+ if ret['err'][i]:
+ if debug or kx.config.pykx_qdebug:
+ print(ret['trc'][i].decode())
+ raise kx.QError(ret['res'][i].decode())
+ else:
+ display(ret['res'][i]) if displayRet else print(ret['res'][i])
+ else:
+ for i in range(len(ret)):
+ r = _q('@', ret, i)
+ if r['err']:
+ if debug or kx.config.pykx_qdebug:
+ print(r['trc'])
+ raise kx.QError(r['res'].py().decode())
+ else:
+ display(r['res']) if displayRet else print(r['res'])
+ if save:
+ write_to_q_file(_q, locked, path, code_str)
+ serv = " on q server" if issubclass(type(_q), kx.QConnection) else ""
+ ex = " without cell logic being run. To run the cell remove '--execute False'."
+ ex = '.' if execute else ex
+ print(f"Cell contents saved to '{path}'{serv}{ex}")
+ except Exception as e:
+ if save:
+ print(f"Cell contents not saved to '{path}' due to error during execution/saving.")
+ raise e
+ finally:
+ if issubclass(type(_q), kx.QConnection):
+ _q.close()
+
+
+def write_to_q_file(_q, locked, path, code):
+ if locked:
+ output_file = Path(path[:-1])
+ _q('0:', output_file, [kx.CharVector(code)])
+ _q('\_ ' + path[:-1])
+ _q('hdel', output_file)
else:
- for i in range(len(ret)):
- r = _q('@', ret, i)
- if r['err']:
- if debug or kx.config.pykx_qdebug:
- print(r['trc'])
- raise kx.QError(r['res'].py().decode())
- else:
- display(r['res']) if displayRet else print(r['res'])
- if issubclass(type(_q), kx.QConnection):
- _q.close()
+ output_file = Path(path)
+ if issubclass(type(_q), kx.QConnection):
+ _q('0:', output_file, [code[:-1].encode()])
+ else:
+ output_file.parent.mkdir(exist_ok=True, parents=True)
+ output_file.write_text(code)
def load_ipython_extension(ipython):
ipython.register_magic_function(q, 'cell')
+
+ def q_complete(self, event):
+ return list(kx.q.reserved_words)
+ ipython.set_hook('complete_command', q_complete, re_key='.*')
diff --git a/src/pykx/pandas_api/__init__.py b/src/pykx/pandas_api/__init__.py
index e8f755d..045e3c3 100644
--- a/src/pykx/pandas_api/__init__.py
+++ b/src/pykx/pandas_api/__init__.py
@@ -73,6 +73,7 @@ def return_val(*args, **kwargs):
from .pandas_set_index import _init as _set_index_init, PandasSetIndex
from .pandas_reset_index import _init as _reset_index_init, PandasResetIndex
from .pandas_apply import _init as _apply_init, PandasApply
+from .pandas_map import _init as _map_init, PandasMap
from .pandas_sorting import _init as _sorting_init, PandasSorting
from .pandas_replace import _init as _replace_init, PandasReplace
@@ -86,6 +87,7 @@ def _init(_q):
_merge_init(q)
_set_index_init(q)
_apply_init(q)
+ _map_init(q)
_sorting_init(q)
_reset_index_init(q)
_replace_init(q)
@@ -93,7 +95,7 @@ def _init(_q):
class PandasAPI(PandasApply, PandasMeta, PandasIndexing, PandasReindexing,
PandasConversions, PandasMerge, PandasSetIndex, PandasGroupBy,
- PandasSorting, PandasReplace, PandasResetIndex):
+ PandasSorting, PandasReplace, PandasResetIndex, PandasMap):
"""PandasAPI mixin class"""
replace_self = False
prev_locs = {}
diff --git a/src/pykx/pandas_api/pandas_conversions.py b/src/pykx/pandas_api/pandas_conversions.py
index 4f0f5bf..1604876 100644
--- a/src/pykx/pandas_api/pandas_conversions.py
+++ b/src/pykx/pandas_api/pandas_conversions.py
@@ -334,18 +334,26 @@ def select_dtypes(self, include=None, exclude=None):
# Run if include is not None
if include is not None:
- table_out = q('''{[qtab;inc] tCols:cols qtab;
+ table_out = q('''{[qtab;inc]
+ tCols:cols $[99h~type qtab;value qtab;qtab];
inc:abs 5h$inc;
- bList:value (type each flip 0#qtab) in inc;
+ bList:value (type each flip 0#$[99h~type qtab;value qtab;qtab]) in inc;
+ if[not any bList;:(::)];
colList:tCols where bList;
- ?[qtab; (); 0b; colList!colList]}''',
+ res:?[qtab; (); 0b; colList!colList];
+ $[99h~type qtab;(key qtab)!res;res]
+ }''',
self, include_type_nums) # noqa
else:
- table_out = q('''{[qtab;exc] tCols:cols qtab;
+ table_out = q('''{[qtab;exc]
+ tCols:cols $[99h~type qtab;value qtab;qtab];
exc:abs 5h$exc;
- bList:value (type each flip 0#qtab) in exc;
+ bList:value (type each flip 0#$[99h~type qtab;value qtab;qtab]) in exc;
+ if[all bList;:(::)];
colList:tCols where not bList;
- ?[qtab; (); 0b; colList!colList] }''',
+ res:?[qtab; (); 0b; colList!colList];
+ $[99h~type qtab;(key qtab)!res;res]
+ }''',
self, exclude_type_nums) # noqa
return table_out
diff --git a/src/pykx/pandas_api/pandas_indexing.py b/src/pykx/pandas_api/pandas_indexing.py
index 4961e72..2ae9c69 100644
--- a/src/pykx/pandas_api/pandas_indexing.py
+++ b/src/pykx/pandas_api/pandas_indexing.py
@@ -10,40 +10,38 @@ def _init(_q):
q = _q
-def _get(tab, key, default, cols_check=True):
+def _get(tab, key, default=None, cols_check=True):
idxs = None
_init_tab = None
- single_col = False
- if isinstance(key, SymbolAtom) or isinstance(key, str):
- single_col = True
+ single_col = isinstance(key, (SymbolAtom, str))
+
if 'Keyed' in str(type(tab)):
keys, idxs = key
_init_tab = tab
- tab = q('{value x}', tab)
+ tab = q('value', tab)
if 0 in idxs:
keys = keys[1:]
key = keys
- if cols_check:
- if q('{not all x in cols y}', key, tab):
- colstr = str(q('{((),x) except cols y}', key, tab).py())
- raise QError(f'Attempted to retrieve inaccessible columns: {colstr}')
- if isinstance(key, list) or isinstance(key, SymbolVector):
- if not all([x in tab._keys for x in key]):
+
+ if cols_check and q('{not all x in cols y}', key, tab) and default is None:
+ colstr = str(q('{((),x) except cols y}', key, tab).py())
+ raise QError(f'Attempted to retrieve inaccessible columns: {colstr}')
+
+ if isinstance(key, (list, SymbolVector)):
+ if not all(x in tab._keys for x in key):
return default
tab = q('{?[x; (); 0b; y!y]}', tab, SymbolVector(key))
if idxs is not None and 0 in idxs:
tab = q('{(key x)!(y)}', _init_tab, tab)
return tab
+
if isinstance(key, SymbolAtom):
key = key.py()
- if single_col:
- warnings.warn("\n\tSingle column retrieval using 'get' method will return a vector/list "
- "object in release 3.0+\n\t"
- "To access the vector/list directly use table['column_name']",
- FutureWarning)
- if key in q('{key flip 0#x}', tab).py():
- tab = q(f'{{([] {key}: x[y])}}', tab, key)
- return tab
+
+ if key in q('cols', tab).py():
+ return (q('{[t;k](0!t)k}', tab, key) if single_col
+ else q('{((),y)#0!x}', tab, key))
+
return default
@@ -220,12 +218,10 @@ def _loc(tab, loc): # noqa
keys = [keys]
if type(loc) is str or isinstance(loc, SymbolAtom):
if loc in keys:
+ raise KeyError(f"['{loc}'] is the index of a keyed column")
+ if loc not in q.cols(tab).py():
raise KeyError(f"['{loc}'] is not an index")
- return q(
- f'{{[x; y] (key x)!(flip (enlist `{loc})!(enlist y))}}',
- tab,
- q('{0!x}', tab)[loc]
- )
+ return q('{[x; y] key[x]!?[x;();0b;{x!x} enlist y]}', tab, loc)
if any([x in keys for x in loc]):
raise KeyError(f"['{loc}'] is not an index")
return q(
@@ -364,7 +360,7 @@ def pop(self, col_name: str):
@api_return
def get(self, key, default=None):
"""Get items from table based on key, if key is not found default is returned."""
- return _get(self, key, default, cols_check=False)
+ return _get(self, key, default)
@property
def at(self):
diff --git a/src/pykx/pandas_api/pandas_map.py b/src/pykx/pandas_api/pandas_map.py
new file mode 100644
index 0000000..d5bd603
--- /dev/null
+++ b/src/pykx/pandas_api/pandas_map.py
@@ -0,0 +1,45 @@
+from . import api_return
+
+
+def _init(_q):
+ global q
+ q = _q
+
+
+class PandasMap:
+
+ @api_return
+ def map(self, func, na_action=None, *args, **kwargs):
+ if not callable(func):
+ raise TypeError("Provided value 'func' is not callable")
+ if na_action is not None:
+ if na_action != 'ignore':
+ raise TypeError("na_action must be None or 'ignore'")
+ return q(
+ '{[f; tab; args; kwargs;nan] '
+ ' iskeyed:99h=type tab;'
+ ' tab:$[iskeyed;[kt:key tab;value tab];tab];'
+ ' func: $[.pykx.util.isw f;'
+ ' f[; pyarglist args; pykwargs kwargs];'
+ ' ['
+ ' if[0t == 112) {
+ PyGILState_Release(gstate);
return get_py_ptr(x);
}
@@ -197,6 +198,7 @@ static PyObject* k_to_py_list(K x) {
PyGILState_STATE gstate;
gstate = PyGILState_Ensure();
if (x->t == 112) {
+ PyGILState_Release(gstate);
return get_py_ptr(x);
}
@@ -284,93 +286,6 @@ void construct_args_kwargs(PyObject* params, PyObject** args, PyObject** kwargs,
}
-EXPORT K k_pyfunc(K k_guid_string, K k_args) {
-
- if (pykx_threading)
- return raise_k_error("pykx.q is not supported when using PYKX_THREADING");
- PyGILState_STATE gstate;
- gstate = PyGILState_Ensure();
- K k = (K)0; // the K object which will be returned
-
- PyObject* py_k_args[8]; // args to the Python callable as `pykx.K` objects
- // use `pykx.wrappers.factory` to convert each of the K objects to Python objects
- for (int i = 0; i < k_args->n - 1; ++i) {
- py_k_args[i] = PyObject_CallFunction(factory, "(Li)", (uintptr_t)kK(k_args)[i+1], 1);
- if ((k = k_py_error())) {
- PyGILState_Release(gstate);
- return k;
- }
- }
-
- // get the uuid for the python function
- PyObject* guid_string = PyUnicode_FromStringAndSize((const char*)k_guid_string->G0, k_guid_string->n);
- if ((k = k_py_error())) {
- PyGILState_Release(gstate);
- return k;
- }
- PyObject* guid = PyObject_CallFunctionObjArgs(UUID, guid_string, NULL);
- if ((k = k_py_error())) {
- PyGILState_Release(gstate);
- return k;
- }
-
- // get the python function
- PyObject* converted_callables = PyDict_GetItemString(toq_module, "converted_callables");
- PyObject* pyfunc_tuple = PyDict_GetItemWithError(converted_callables, guid);
- if (!pyfunc_tuple) PyErr_SetObject(PyExc_KeyError, guid);
- if ((k = k_py_error())) {
- PyGILState_Release(gstate);
- return k;
- }
- Py_XDECREF(guid);
- PyObject* pyfunc = PyTuple_GET_ITEM(pyfunc_tuple, 0);
- PyObject* params = PyTuple_GET_ITEM(pyfunc_tuple, 1);
- if ((k = k_py_error())) {
- PyGILState_Release(gstate);
- return k;
- }
- Py_INCREF(pyfunc);
- Py_INCREF(params);
-
- // construct the args and kwargs for the pyfunc
- PyObject* args; // positional arguments for Python callables
- PyObject* kwargs; // keyword arguments for Python callables
- construct_args_kwargs(params, &args, &kwargs, k_args->n, py_k_args);
- Py_XDECREF(params);
- if ((k = k_py_error())) {
- Py_XDECREF(args);
- Py_XDECREF(kwargs);
- Py_XDECREF(pyfunc);
- PyGILState_Release(gstate);
- return k;
- }
-
- // call the python function
- PyObject* py_ret = PyObject_Call(pyfunc, args, kwargs);
- Py_XDECREF(args);
- Py_XDECREF(kwargs);
- Py_XDECREF(pyfunc);
- if ((k = k_py_error())) {
- PyGILState_Release(gstate);
- return k;
- }
-
- // Convert the returned value to q
- PyObject* py_k_ret = PyObject_CallFunctionObjArgs(toq, py_ret, NULL);
- Py_XDECREF(py_ret);
- if ((k = k_py_error())) {
- PyGILState_Release(gstate);
- return k;
- }
- PyObject* py_addr = PyObject_GetAttrString(py_k_ret, "_addr");
- k = (K)PyLong_AsLongLong(py_addr);
- Py_XDECREF(py_addr);
-
- PyGILState_Release(gstate);
- return k;
-}
-
-
// k_eval_or_exec == 0 -> eval the code string
// k_eval_or_exec == 1 -> exec the code string
EXPORT K k_pyrun(K k_ret, K k_eval_or_exec, K as_foreign, K k_code_string) {
@@ -518,7 +433,7 @@ EXPORT K k_modpow(K k_base, K k_exp, K k_mod_arg) {
}
-EXPORT K foreign_to_q(K f) {
+EXPORT K foreign_to_q(K f, K b) {
if (pykx_threading)
return raise_k_error("pykx.q is not supported when using PYKX_THREADING");
if (f->t != 112)
@@ -535,7 +450,10 @@ EXPORT K foreign_to_q(K f) {
PyTuple_SetItem(toq_args, 0, pyobj);
PyTuple_SetItem(toq_args, 1, Py_BuildValue(""));
- PyObject* qpy_val = PyObject_CallObject(toq, toq_args);
+ PyObject* _kwargs = PyDict_New();
+ PyDict_SetItemString(_kwargs, "strings_as_char", PyBool_FromLong((long)b->g));
+
+ PyObject* qpy_val = PyObject_Call(toq, toq_args, _kwargs);
if ((k = k_py_error())) {
PyGILState_Release(gstate);
return k;
@@ -544,6 +462,7 @@ EXPORT K foreign_to_q(K f) {
PyObject* k_addr = PyObject_GetAttrString(qpy_val, "_addr");
if ((k = k_py_error())) {
Py_XDECREF(toq_args);
+ Py_XDECREF(_kwargs);
Py_XDECREF(k_addr);
Py_XDECREF(qpy_val);
PyGILState_Release(gstate);
@@ -553,6 +472,7 @@ EXPORT K foreign_to_q(K f) {
K res = (K)(uintptr_t)_addr;
r1_ptr(res);
Py_XDECREF(toq_args);
+ Py_XDECREF(_kwargs);
Py_XDECREF(qpy_val);
Py_XDECREF(k_addr);
@@ -627,6 +547,7 @@ EXPORT K get_attr(K f, K attr) {
return k;
}
K res = create_foreign(pres);
+ Py_XDECREF(_attr);
PyGILState_Release(gstate);
return res;
}
@@ -653,6 +574,7 @@ EXPORT K get_global(K attr) {
return k;
}
K res = create_foreign(pres);
+ Py_XDECREF(_attr);
PyGILState_Release(gstate);
return res;
}
diff --git a/src/pykx/pykx.q b/src/pykx/pykx.q
index 4ba2ee6..b5136ea 100644
--- a/src/pykx/pykx.q
+++ b/src/pykx/pykx.q
@@ -24,7 +24,7 @@
// @private
// @overview
// For a given function retrieve the location from which the file was loaded
-//
+//
// @return {string} the location from which this file is being loaded
util.getLoadDir:{@[{"/"sv -1_"/"vs ssr[;"\\";"/"](-3#get .z.s)0};`;""]}
@@ -46,6 +46,8 @@ util.startup:.Q.opt .z.x
// @desc Load a file at an associated folder location, this is used
// to allow loading of files at folder locations containing spaces
util.loadfile:{[folder;file]
+ path:$[.z.o like "w*";"\\";"/"] sv ((),folder;(),file);
+ if[not " " in path;:system"l ",path];
cache:system"cd";
system"cd ",folder;
folder:system"cd";
@@ -54,6 +56,7 @@ util.loadfile:{[folder;file]
$[res[0];'res[1];res[1]]
}
+util.warnCache:{gx:getenv x;$[""~gx;"False";gx]}`PYKX_SUPPRESS_WARNINGS
// @private
// @desc Retrieval of PyKX initialization directory on first initialization
if[not "true"~lower getenv`PYKX_LOADED_UNDER_Q;
@@ -67,6 +70,15 @@ if[not "true"~lower getenv`PYKX_LOADED_UNDER_Q;
pykxDir:ssr[;"\\";"/"]last vs["PYKX_DIR: "]last pykxDir
];
];
+setenv[`PYKX_SUPPRESS_WARNINGS;util.warnCache]
+
+// @private
+// @desc Allow a user to force PyKX to use the location of libpython
+// found by the Python library find_libpython
+if[(lower getenv`PYKX_USE_FIND_LIBPYTHON) in ("true";enlist"1");
+ libpython_path:first system util.whichPython," -c\"from find_libpython import find_libpython;print(find_libpython())\"";
+ setenv[`PYKX_PYTHON_LIB_PATH;libpython_path]
+ ];
// @private
// @desc
@@ -82,11 +94,6 @@ k)c:{'[y;x]}/|:
// @desc Compose using enlist for generation of variadic functions
k)ce:{'[y;x]}/enlist,|:
-// @desc Print a message warning that "UNDER_PYTHON" is deprecated
-if[not ""~getenv`UNDER_PYTHON;
- -1"WARN: Environment variable 'UNDER_PYTHON' is deprecated, if set locally update to use 'PYKX_UNDER_PYTHON'";
- ]
-
// @desc Make use of `pykx.so` logic when running under Python
if["true"~getenv`PYKX_UNDER_PYTHON;
util.load:2:[hsym`$pykxDir,"/pykx";]
@@ -136,7 +143,7 @@ util.CFunctions:flip `qname`cname`args!flip (
(`util.pyForeign ;`k_to_py_foreign;3);
(`util.isf ;`k_check_python ;1);
(`util.pyrun ;`k_pyrun ;4);
- (`util.foreignToq;`foreign_to_q ;1);
+ (`util.foreignToq;`foreign_to_q ;2);
(`util.callFunc ;`call_func ;4);
(`pyimport ;`import ;1);
(`util.setGlobal ;`set_global ;2);
@@ -256,7 +263,7 @@ util.wf:{[f;x].pykx.util.pykx[f;x]}
// @private
// @desc
// Functionality used for checking if an supplied
-// argument is a Python foreign or wrapped object
+// argument is a Python foreign or wrapped object
util.isw:{
if[not 105h~type x;:0b];
$[.pykx.util.wf~$[104 105h~t:type each u:get x;
@@ -327,7 +334,7 @@ util.parseArgs:{
// ```
//
// !!! Warning
-//
+//
// This function will be set in the root `.q` namespace
//
// **Parameters:**
@@ -341,7 +348,7 @@ util.parseArgs:{
//
// type | description
// -------------|------------
-// `projection` | A projection which when used with a wrapped callable Python
+// `projection` | A projection which when used with a wrapped callable Python
//
// **Example:**
//
@@ -412,8 +419,8 @@ util.parseArgs:{
// ```
//
// !!! Warning
-//
-// This function will be set in the root `.q` namespace
+//
+// This function will be set in the root `.q` namespace
//
// **Parameters:**
//
@@ -513,7 +520,7 @@ topy:{x y}(`..python;;)
// // Pass a q object to Python with default conversions and return type
// q).pykx.print .pykx.eval["lambda x: type(x)"]til 10
//
-//
+//
// // Pass a q object to Python treating the Python object as a Numpy Object
// q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.tonp til 10
//
@@ -691,7 +698,7 @@ toraw: {x y}(`..raw;;)
//
// ```q
// // Denote that a q object once passed to Python should be managed as a default object
-// // in this case a q list is converted to numpy
+// // in this case a q list is converted to numpy
// q).pykx.todefault til 10
// enlist[`..numpy;;][0 1 2 3 4 5 6 7 8 9]
//
@@ -703,7 +710,7 @@ toraw: {x y}(`..raw;;)
// q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.todefault ([]til 10;til 10)
//
// ```
-todefault:{$[0h=type x;toraw x;$[99h~type x;all 98h=type each(key x;value x);0b]|98h=type x;topd x;tonp x]}
+todefault:{$[0h=type x;topy x;$[99h~type x;all 98h=type each(key x;value x);0b]|98h=type x;topd x;tonp x]}
// @kind function
// @name .pykx.wrap
@@ -780,7 +787,7 @@ wrap:ce util.wf@
unwrap:{
c:last get last get first get last@;
$[util.isw x;t:type each u:get x;:x];
- if[(101 105h~t) and (::)~first u;:c u];
+ if[(101 105h~t) and (::)~first u;:c u];
if[(100 105h~t) and .pykx.toq~first u;:c u];
if[104 105h~t;:(last u)`.];
x`.}
@@ -883,7 +890,60 @@ setdefault:{
// q).pykx.toq b
// 2
// ```
-py2q:toq:{$[type[x]in 104 105 112h;util.foreignToq unwrap x;x]}
+py2q:toq:{$[type[x]in 104 105 112h;util.foreignToq[unwrap x;0b];x]}
+
+// @kind function
+// @name .pykx.toq0
+// @category api
+// @overview
+// _Convert an (un)wrapped `PyKX` foreign object into an analogous q type._
+//
+// ```q
+// .pykx.toq0[pythonObject;strAsChar]
+// ```
+//
+// **Parameters:**
+//
+// name | type | description |
+// ---------------|------------------------|-------------|
+// `pythonObject` | foreign/composition | A foreign Python object or composition containing a Python foreign to be converted to q
+// `strAsChar` | Optional[boolean] | A boolean indicating if when returned to q a Python `str` should be converted to a q string rather than the default symbol
+//
+// **Return:**
+//
+// type | description
+// ------|------------
+// `any` | A q object converted from Python
+//
+// ```q
+// // Convert a wrapped PyKX foreign object to q
+// q)show a:.pykx.eval["1+1"]
+// {[f;x].pykx.util.pykx[f;x]}[foreign]enlist
+// q).pykx.toq0 a
+// 2
+//
+// // Convert an unwrapped PyKX foreign object to q
+// q)show b:a`.
+// foreign
+// q).pykx.toq0 b
+// 2
+// ```
+//
+// // Convert a Python string to q symbol or string
+//
+// q).pykx.toq0[.pykx.eval"\"test\""]
+// `test
+//
+// q).pykx.toq0[.pykx.eval"\"test\"";1b]
+// "test"
+toq0:ce {
+ if[2);util.isf x;wrap[x](>);'"Could not co
// name | type | description
// -------------|-----------|-------------
// `pyObject` | `foreign` | A Python object representing an underlying callable function
-//
+//
// **Returns:**
//
// type | description
@@ -1533,6 +1589,11 @@ qcallable:{$[util.isw x;wrap[unwrap[x]](<);util.isf x;wrap[x](<);'"Could not con
// .pykx.safeReimport[qFunction]
// ```
//
+// For more information on the reimporter module which this functionality calls see
+// https://code.kx.com/pykx/api/reimporting.html#pykx.reimporter.PyKXReimport
+//
+//
+//
// **Parameters:**
//
// name | type | description
@@ -1547,19 +1608,40 @@ qcallable:{$[util.isw x;wrap[unwrap[x]](<);util.isf x;wrap[x](<);'"Could not con
//
// **Example:**
//
+// Initializing a Python process which imports PyKX
+//
// ```q
// q)\l pykx.q
// q).pykx.safeReimport[{system"python -c 'import pykx as kx'";til 5}]
// 0 1 2 3 4
// ```
+//
+// Initializing a q child process which uses pykx.q
+//
+// ```q
+// q)\cat child.q
+// "\l pykx.q"
+// ".pykx.print \"Hello World\""
+//
+// q)\l pykx.q
+// q)system"q child.q" // Failing execution
+// q)'2024.08.29T12:29:39.967 util.whichPython
+// [5] /usr/local/anaconda3/envs/qenv/q/pykx.q:123:
+// (`os ; util.os);
+// (`whichPython ; util.whichPython)
+// ^
+// )
+// [2] /usr/projects/pykx/child.q:1: \l pykx.q
+// ^
+// q).pykx.safeReimport {system"q child.q"}
+// "Hello World"
+// ```
safeReimport:{[x]
pyexec["pykx_internal_reimporter = pykx.PyKXReimport()"];
envlist:(`PYKX_DEFAULT_CONVERSION;
`PYKX_UNDER_Q;
- `SKIP_UNDERQ;
`PYKX_SKIP_UNDERQ;
`PYKX_UNDER_PYTHON;
- `UNDER_PYTHON;
`PYKX_LOADED_UNDER_Q;
`PYKX_Q_LOADED_MARKER;
`PYKX_EXECUTABLE;
@@ -1574,6 +1656,20 @@ safeReimport:{[x]
$[r 0;';::] r 1
}
+// @kind function
+// @name .pykx.enableJupyter
+// @overview
+// Enable qfirst mode in a Jupyter Notebook.
+
+.pykx.enableJupyter:{.pykx.import[`pykx;`:util.jupyter_qfirst_enable][];}
+
+// @kind function
+// @name .pykx.disableJupyter
+// @overview
+// Disable qfirst mode in a Jupyter Notebook and return to Python first execution.
+
+.pykx.disableJupyter:{.pykx.import[`pykx;`:util.jupyter_qfirst_disable][];}
+
// @kind function
// @name .pykx.debugInfo
// @category api
@@ -1642,7 +1738,7 @@ debugInfo:{
// >>> kx.q['table'] = kx.q('([]2?1f;2?0Ng;2?`3)'
// >>> quit()
// q)table
-// x x1 x2
+// x x1 x2
// --------------------------------------------------
// 0.439081 49f2404d-5aec-f7c8-abba-e2885a580fb6 mil
// 0.5759051 656b5e69-d445-417e-bfe7-1994ddb87915 igf
@@ -1744,3 +1840,4 @@ loadExtension:{[ext]
// @desc Restore context used at initialization of script
system"d ",string .pykx.util.prevCtx;
+
diff --git a/src/pykx/pykx_init.q_ b/src/pykx/pykx_init.q_
index 5fd30ed..e121a7f 100644
Binary files a/src/pykx/pykx_init.q_ and b/src/pykx/pykx_init.q_ differ
diff --git a/src/pykx/pykxq.c b/src/pykx/pykxq.c
index 879218a..9f61fd4 100644
--- a/src/pykx/pykxq.c
+++ b/src/pykx/pykxq.c
@@ -115,7 +115,6 @@ EXPORT K k_init_python(K x, K y, K z) {
Py_InitializeEx(0);
if(PyEval_ThreadsInitialized()&&!PyGILState_Check())
PyEval_RestoreThread(PyGILState_GetThisThreadState());
- PyEval_InitThreads();
}
M = PyModule_GetDict(PyImport_AddModule("__main__"));
n = ktn(KS,0);
@@ -197,6 +196,7 @@ static P k_to_py_cast(K x, K typenum, K israw) {
PyGILState_STATE gstate;
gstate = PyGILState_Ensure();
if (x->t == 112) {
+ PyGILState_Release(gstate);
return get_py_ptr(x);
}
@@ -225,6 +225,7 @@ static P k_to_py_list(K x) {
PyGILState_STATE gstate;
gstate = PyGILState_Ensure();
if (x->t == 112) {
+ PyGILState_Release(gstate);
return get_py_ptr(x);
}
@@ -400,7 +401,7 @@ EXPORT K k_modpow(K k_base, K k_exp, K k_mod_arg) {
}
-EXPORT K foreign_to_q(K f) {
+EXPORT K foreign_to_q(K f, K b) {
if (f->t != 112)
return raise_k_error("Expected foreign object for call to .pykx.toq");
if (!check_py_foreign(f))
@@ -415,7 +416,10 @@ EXPORT K foreign_to_q(K f) {
PyTuple_SetItem(toq_args, 0, pyobj);
PyTuple_SetItem(toq_args, 1, Py_BuildValue(""));
- P qpy_val = PyObject_CallObject(toq, toq_args);
+ P _kwargs = PyDict_New();
+ PyDict_SetItemString(_kwargs, "strings_as_char", PyBool_FromLong((long)b->g));
+
+ P qpy_val = PyObject_Call(toq, toq_args, _kwargs);
if ((k = k_py_error())) {
PyGILState_Release(gstate);
return k;
@@ -424,6 +428,7 @@ EXPORT K foreign_to_q(K f) {
P k_addr = PyObject_GetAttrString(qpy_val, "_addr");
if ((k = k_py_error())) {
Py_XDECREF(toq_args);
+ Py_XDECREF(_kwargs);
Py_XDECREF(k_addr);
Py_XDECREF(qpy_val);
PyGILState_Release(gstate);
@@ -433,6 +438,7 @@ EXPORT K foreign_to_q(K f) {
K res = (K)(uintptr_t)_addr;
r1(res);
Py_XDECREF(toq_args);
+ Py_XDECREF(_kwargs);
Py_XDECREF(qpy_val);
Py_XDECREF(k_addr);
@@ -502,6 +508,7 @@ EXPORT K get_attr(K f, K attr) {
P p = get_py_ptr(f);
P _attr = Py_BuildValue("s", attr->s);
K res = create_foreign(PyObject_GetAttr(p, _attr));
+ Py_XDECREF(_attr);
if ((k = k_py_error())) {
PyGILState_Release(gstate);
return k;
@@ -524,6 +531,7 @@ EXPORT K get_global(K attr) {
}
P _attr = Py_BuildValue("s", attr->s);
K res = create_foreign(PyObject_GetAttr(p, _attr));
+ Py_XDECREF(_attr);
if ((k = k_py_error())) {
PyGILState_Release(gstate);
return k;
diff --git a/src/pykx/query.py b/src/pykx/query.py
index 661fa6c..f3a6e3c 100644
--- a/src/pykx/query.py
+++ b/src/pykx/query.py
@@ -1,16 +1,13 @@
-"""Query interfaces for PyKX."""
+"""_This page documents query interfaces for querying q tables using PyKX._"""
from abc import ABCMeta
from typing import Any, Dict, List, Optional, Union
-import warnings
-from uuid import uuid4
from . import Q
from . import wrappers as k
from .ipc import QFuture
from .exceptions import PyKXException, QError
-
__all__ = [
'Insert',
'QSQL',
@@ -23,34 +20,28 @@ def __dir__():
class QSQL:
- """Generates and submits functional q SQL queries.
-
- Instances of this class can be accessed as the `qsql` attribute of any [`pykx.Q`][pykx.Q]. For
- instance, `pykx.q.qsql`, or `pykx.QConnection(...).qsql`.
-
- The `QSQL` class provides Python users with a method of querying q simple, keyed, splayed and
- partitioned tables using a single set of functionality.
+ """The `#!python QSQL` class provides methods to query or modify q tables.
- This is achieved by wrapping the logic contained within the q functional select, exec, update,
- and delete functionality. For more information on this functionality please refer to [Chapter 9
- Section 12 of Q for Mortals](https://code.kx.com/q4m3/9_Queries_q-sql/#912-functional-forms).
+ The methods [select][pykx.QSQL.select], [exec][pykx.QSQL.exec], [update][pykx.QSQL.update]
+ and [delete][pykx.QSQL.delete] generate and execute functional queries on the given table.
+ To learn about functionally querying databases see [Chapter 9 Section 12 of Q for
+ Mortals](https://code.kx.com/q4m3/9_Queries_q-sql/#912-functional-forms).
- While it is also conceivable that the interface could compile a qSQL statement to achieve the
- same end goal there are a number of advantages to using the more complex functional form.
+ There are a number of advantages to using this query style as opposed to interpolating
+ strings to generate simple qSQL queries:
- 1. Users that are unfamiliar with q who use the interface are introduced to the more powerful
- version of querying with q, while still operating within a familiar setting.
- 2. Using the functional form provides the ability when running functional updates to update the
- q tables with data derived from Python:
+ 1. Users that are unfamiliar with q who use the interface are introduced to this more powerful
+ version of querying with q, while still operating within a familiar setting in Python.
+ 2. Using the functional form promotes data-oriented designs for modifying or querying the q
+ tables programmatically using data derived from Python:
```python
qtable = pykx.q('([]1 2 3;4 5 6)')
pykx.q.qsql.update(qtable, {'x': [10, 20, 30]})
```
- 3. It makes development and maintenance of the interface easier when dealing across the forms
- of supported table within q within which the functional forms of interacting with tables
- are more natural.
+ 3. Development and maintenance of this interface is easier with regard to the different
+ supported table formats.
"""
def __init__(self, q: Q):
@@ -63,24 +54,28 @@ def select(self,
by: Optional[Union[Dict[str, str], k.Dictionary]] = None,
inplace: bool = False,
) -> k.K:
- """Apply a q style select statement on tables defined within the process.
+ """
+ Execute a q functional select statement on tables defined within the process.
- This implementation follows the q functional select syntax with limitations on
- structures supported for the various clauses a result of this.
+ This implementation follows the q functional select syntax with limited support
+ on structures used in the parameters.
Parameters:
- table: The q table or name of the table (provided the table is named within the q
- memory space) on which the select statement is to be applied.
- columns: A dictionary mapping the name to be given to a column and the logic to be
- applied in aggregation to that column both as strings.
- where: Conditional filtering used to select subsets of the data on which by-clauses and
- appropriate aggregations are to be applied.
- by: A dictionary mapping the names to be assigned to the produced columns and the
- columns whose results are used to construct the groups of the by clause.
- inplace: Whether the result of an update is to be persisted. This operates for tables
- referenced by name in q memory or general table objects
+ table: The q table or name of the table to query. The table must be named within
+ the q memory space.
+ columns: A dictionary where the keys are names assigned for the query's output columns
+ and the values are the logic used to compute the column's result.
+ where: Filtering logic for reducing the data used in group-bys and
+ output column aggregations.
+ by: A dictionary where they keys are names assigned for the produced columns and the
+ values are aggregation rules used to construct the group-by parameter.
+ inplace: Indicates if the result of an update is to be persisted. This applies to
+ tables referenced by name in q memory or general table objects
https://code.kx.com/q/basics/qsql/#result-and-side-effects.
+ Returns:
+ A PyKX Table or KeyedTable object resulting from the executed select query
+
Examples:
Define a q table in python, and give it a name in q memory
@@ -126,33 +121,36 @@ def exec(self,
by: Optional[Union[Dict[str, str], k.Dictionary]] = None
) -> k.K:
"""
- Apply a q style exec statement on tables defined within the process.
+ Execute a q functional exec statement on tables defined within the process.
- This implementation follows the q functional exec syntax with limitations on structures
- supported for the various clauses a result of this.
+ This implementation follows the q functional exec syntax with limited support on
+ structures used for the parameters.
Parameters:
- table: The q table or name of the table (provided the table is named within the q
- memory space) on which the exec statement is to be applied.
- columns: A dictionary mapping the name to be given to a column and the logic to be
- applied in aggregation to that column both as strings. A string defining a single
- column to be retrieved from the table as a list.
- where: Conditional filtering used to select subsets of the data on which by clauses and
- appropriate aggregations are to be applied.
- by: A dictionary mapping the names to be assigned to the produced columns and the
- the columns whose results are used to construct the groups of the by clause.
+ table: The q table or name of the table to query. The table must be named within
+ the q memory space.
+ columns: A dictionary where the keys are names assigned to the query's output columns
+ and the values are the logic used to compute the column's result.
+ where: Filtering logic for reducing the data used in group-by and
+ output column aggregations.
+ by: A dictionary where they keys are names assigned to the produced columns and the
+ values are aggregation rules used when q functionally applies group-by.
+
+ Returns:
+ A PyKX Vector or Dictionary object resulting from the executed exec query
Examples:
Define a q table in python and named in q memory
```python
- pykx.q['qtab'] = pd.DataFrame.from_dict({
- 'col1': [['a', 'b', 'c'][randint(0, 2)] for _ in range(100)],
- 'col2': [random() for _ in range(100)],
- 'col3': [randint(0, 1) == 1 for _ in range(100)],
- 'col4': [random() * 10 for _ in range(100)]
- })
+ qtab = pykx.Table(data={
+ 'col1': pykx.random.random(100, ['a', 'b', 'c']),
+ 'col2': pykx.random.random(100, 100),
+ 'col3': pykx.random.random(100, [0, 1]),
+ 'col4': pykx.random.random(100, 100.0)
+ })
+ pykx.q['qtab'] = qtab
```
Select last item of the table
@@ -203,33 +201,30 @@ def update(self,
columns: Optional[Union[Dict[str, str], k.Dictionary]] = None,
where: Optional[Union[List[str], str, k.SymbolAtom, k.SymbolVector]] = None,
by: Optional[Union[Dict[str, str], k.Dictionary]] = None,
- modify: bool = False,
inplace: bool = False,
) -> k.K:
"""
- Apply a q style update statement on tables defined within the process.
+ Execute a q style update statement on tables defined within the process.
- This implementation follows the q functional update syntax with limitations on
- structures supported for the various clauses a result of this.
+ This implementation follows the q functional update syntax with limited support on
+ structures used for the parameters.
Parameters:
- table: The q table or name of the table (provided the table is named within the q
- memory space) on which the update statement is to be applied.
- columns: A dictionary mapping the name of a column present in the table or one to be
- added to the contents which are to be added to the column, this content can be a
- string denoting q data or the equivalent Python data.
- where: Conditional filtering used to select subsets of the data on which by-clauses and
- appropriate aggregations are to be applied.
- by: A dictionary mapping the names to be assigned to the produced columns and the
- columns whose results are used to construct the groups of the by clause.
- modify: `Deprecated`, please use `inplace` instead. Whether the result of an update
- is to be saved. This operates for tables referenced by name in q memory or
- general table objects
- https://code.kx.com/q/basics/qsql/#result-and-side-effects.
+ table: The q table or name of the table to update. The table must be named within
+ the q memory space.
+ columns: A dictionary where the keys are names assigned to the query's output columns
+ and the values are the logic used to compute the column's result.
+ where: Filtering logic for reducing the data used in group-bys and
+ output column aggregations.
+ by: A dictionary where they keys are names assigned to the result columns and the
+ values are aggregation rules used to compute the group-by result.
inplace: Whether the result of an update is to be persisted. This operates for tables
referenced by name in q memory or general table objects
https://code.kx.com/q/basics/qsql/#result-and-side-effects.
+ Returns:
+ The updated PyKX Table or KeyedTable object resulting from the executed update query
+
Examples:
Define a q table in python and named in q memory
@@ -272,27 +267,26 @@ def update(self,
pykx.q.qsql.update(byqtab, {'weight': 'avg weight'}, by={'city': 'city'})
```
- Apply an update grouping based on a by phrase and persist the result using the modify keyword
+ Apply an update grouping based on a by phrase and persist the result using the inplace keyword
```python
pykx.q.qsql.update('byqtab', columns={'weight': 'avg weight'}, by={'city': 'city'}, inplace=True)
pykx.q['byqtab']
```
""" # noqa: E501
- return self._seud(table, 'update', columns, where, by, modify, inplace)
+ return self._seud(table, 'update', columns, where, by, inplace)
def delete(self,
table: Union[k.Table, str],
columns: Optional[Union[List[str], k.SymbolVector]] = None,
where: Optional[Union[List[str], str, k.SymbolAtom, k.SymbolVector]] = None,
- modify: bool = False,
inplace: bool = False,
) -> k.K:
"""
- Apply a q style delete statement on tables defined within the process.
+ Execute a q functional delete statement on tables defined within the process.
- This implementation follows the q functional delete syntax with limitations on
- structures supported for the various clauses a result of this.
+ This implementation follows the q functional delete syntax with limited support on
+ structures used for the parameters.
Parameters:
table: The q table or name of the table (provided the table is named within the q
@@ -300,14 +294,13 @@ def delete(self,
columns: Denotes the columns to be deleted from a table.
where: Conditional filtering used to select subsets of the data which are to be
deleted from the table.
- modify: `Deprecated`, please use `inplace` instead. Whether the result of a delete
- is to be saved. This holds when `table` is the name of a table in q memory,
- as outlined at:
- https://code.kx.com/q/basics/qsql/#result-and-side-effects.
inplace: Whether the result of an update is to be persisted. This operates for tables
referenced by name in q memory or general table objects
https://code.kx.com/q/basics/qsql/#result-and-side-effects.
+ Returns:
+ The updated PyKX Table or KeyedTable object resulting from the executed delete query
+
Examples:
Define a q table in python and named in q memory
@@ -339,26 +332,19 @@ def delete(self,
```
Delete a column from the dataset named in q memory and persist the result using the
- modify keyword
+ inplace keyword
```python
- pykx.q.qsql.delete('qtab', 'age', modify=True)
+ pykx.q.qsql.delete('qtab', 'age', inplace=True)
pykx.q['qtab']
```
""" # noqa: E501
if columns is not None and where is not None:
raise TypeError("'where' and 'columns' clauses cannot be used simultaneously in a "
"delete statement")
- return self._seud(table, 'delete', columns, where, None, modify, inplace)
-
- def _seud(self, table, query_type, columns=None, where=None, by=None, modify=False, inplace=False) -> k.K: # noqa: C901, E501
- if modify and inplace:
- raise RuntimeError("Attempting to use both 'modify' and 'inplace' keywords, please use only 'inplace'") # noqa: E501
-
- if modify:
- warnings.warn("The 'modify' keyword is now deprecated please use 'inplace'")
- inplace = modify
+ return self._seud(table, 'delete', columns, where, None, inplace)
+ def _seud(self, table, query_type, columns=None, where=None, by=None, inplace=False) -> k.K: # noqa: C901, E501
if not isinstance(table, str):
table = k.K(table)
@@ -368,52 +354,43 @@ def _seud(self, table, query_type, columns=None, where=None, by=None, modify=Fal
select_clause = self._generate_clause(columns, 'columns', query_type)
by_clause = self._generate_clause(by, 'by', query_type)
where_clause = self._generate_clause(where, 'where', query_type)
- original_table = table
+ get = ''
+ query_char = '!' if query_type in ('delete', 'update') else '?'
if isinstance(table, k.K):
if not isinstance(table, (k.Table, k.KeyedTable)):
raise TypeError("'table' object provided was not a K tabular object or an "
"object which could be converted to an appropriate "
"representation")
- randguid = str(uuid4())
- self._q(f'''
- {{@[{{get x}};`.pykx.i.updateCache;{{.pykx.i.updateCache:(`guid$())!()}}];
- .pykx.i.updateCache["G"$"{randguid}"]:x}}
- ''', table)
- original_table = table
- table_code = f'.pykx.i.updateCache["G"$"{randguid}"]'
- if not inplace:
- query_char = '!' if query_type in ('delete', 'update') else '?'
- else:
- query_char = table_code + (':!' if query_type in ('delete', 'update') else ':?')
- elif not isinstance(table, str):
+ elif isinstance(table, str):
+ if (not inplace and query_type in ('delete', 'update')):
+ get = 'get'
+ else:
raise TypeError("'table' must be a an object which is convertible to a K object "
"or a string denoting an item in q memory")
- else:
- if (not inplace and query_type in ('delete', 'update')):
- table_code = f'get`$"{table}"'
- else:
- table_code = f'`$"{table}"'
- query_char = '!' if query_type in ('delete', 'update') else '?'
- try:
- res = self._q(
- f'{{{query_char}[{table_code};value x;value y;value z]}}',
- where_clause,
- by_clause,
- select_clause,
- wait=True,
- )
- if inplace and isinstance(original_table, k.K):
- res = self._q(table_code)
- if isinstance(res, QFuture):
- raise QError("'inplace' not supported with asynchronous query")
- if type(original_table) != type(res):
- raise QError('Returned data format does not match input type, '
- 'cannot perform inplace operation')
- original_table.__dict__.update(res.__dict__)
- return res
- finally:
- if isinstance(original_table, k.K):
- self._q._call(f'.pykx.i.updateCache _:"G"$"{randguid}"', wait=True)
+ wv, bv, sv = 'value', 'value', 'value'
+ if isinstance(where_clause, k.QueryPhrase):
+ wv = ''
+ where_clause = where_clause._phrase
+ if isinstance(by_clause, (dict, k.ParseTree)):
+ bv = ''
+ if isinstance(select_clause, (dict, k.ParseTree)):
+ sv = ''
+ res = self._q(
+ f'{{[tab;x;y;z]{query_char}[{get} tab;{wv} x;{bv} y;{sv} z]}}',
+ table,
+ where_clause,
+ by_clause,
+ select_clause,
+ wait=True,
+ )
+ if inplace and isinstance(table, k.K):
+ if isinstance(res, QFuture):
+ raise QError("'inplace' not supported with asynchronous query")
+ if type(table) != type(res):
+ raise QError('Returned data format does not match input type, '
+ 'cannot perform inplace operation')
+ table.__dict__.update(res.__dict__)
+ return res
def _generate_clause(self, clause_value, clause_name, query_type):
if clause_value is None:
@@ -432,6 +409,32 @@ def _generate_clause(self, clause_value, clause_name, query_type):
def _generate_clause_columns_by(self, clause_value, clause_name, query_type):
if isinstance(clause_value, dict):
return self._generate_clause_columns_by_dict(clause_value)
+ elif isinstance(clause_value, k.QueryPhrase):
+ if clause_value._are_trees[0]:
+ return [b'{x!.[y;(0 0);eval]}', clause_value._names, clause_value._phrase]
+ elif query_type == 'delete' and clause_name == 'columns':
+ return [b'{x}', clause_value._names]
+ else:
+ return [b'{x!y}', clause_value._names, clause_value._phrase]
+ elif isinstance(clause_value, k.Column):
+ if query_type == 'exec':
+ if clause_value._is_tree:
+ return [b'.[;enlist 0;eval]', clause_value._value]
+ else:
+ return k.ParseTree(clause_value._value)
+ elif query_type == 'delete' and clause_name == 'columns':
+ return [b'enlist', clause_value._name]
+ else:
+ if clause_value._is_tree:
+ return [b'{enlist[x]!enlist .[y;enlist 0;eval]}',
+ clause_value._name, clause_value._value]
+ else:
+ return {clause_value._name: clause_value._value}
+ elif isinstance(clause_value, k.Variable):
+ if query_type == 'exec':
+ return k.ParseTree(clause_value._name)
+ else:
+ return {clause_value._name: clause_value._name}
elif clause_name == 'columns' and query_type == 'delete':
if isinstance(clause_value, str):
if clause_value == '':
@@ -441,8 +444,21 @@ def _generate_clause_columns_by(self, clause_value, clause_name, query_type):
clause_value = [k.CharVector(x) for x in clause_value]
return [b'{parse each x}', clause_value]
elif (query_type in ['select', 'exec']) and (clause_name in ['columns', 'by']):
- if isinstance(clause_value, list):
- return [b'{v!v:{$[0>type x;x;(0h>v 0)&1~count v:distinct type each x;raze x;x]}x}', clause_value] # noqa: E501
+ if isinstance(clause_value, k.Column):
+ return clause_value
+ elif isinstance(clause_value, k.QueryPhrase):
+ return [b'{x!y}', clause_value._names, clause_value._phrase]
+ elif isinstance(clause_value, list):
+ kys=[]
+ vls=[]
+ for x in clause_value:
+ if isinstance(x, k.Column):
+ kys.append(x._name)
+ vls.append(x._value)
+ else:
+ kys.append(x)
+ vls.append(x)
+ return [b'{x!y}', kys, vls]
elif isinstance(clause_value, str) and query_type == 'select':
return [b'{x!x}enlist@', clause_value]
return [b'{x}', k.K(clause_value)]
@@ -456,36 +472,47 @@ def _generate_clause_columns_by_dict(self, clause_value):
if isinstance(val, str):
if val == '':
raise ValueError(f'q query specifying column for key {key!r} cannot be empty')
- clause_dict[key] = [True, k.CharVector(val)]
+ clause_dict[key] = [1, k.CharVector(val)]
+ elif isinstance(val, k.Column):
+ if val._is_tree:
+ clause_dict[key] = [2, val._value]
+ else:
+ clause_dict[key] = [0, val._value]
else:
- clause_dict[key] = [False, val]
- return [b'{key[x]!{$[x 0;parse;{$[0>type x;x;(0h>v 0)&1~count v:distinct type each x;raze x;x]}]x 1}each value x}', clause_dict] # noqa: E501
+ clause_dict[key] = [0, val]
+ return [b'''{
+ key[x]!{$[0=x 0;(::);1=x 0;parse;2=x 0;.[;enlist 0;eval];(::)] x 1}each value x
+ }''', clause_dict]
def _generate_clause_where(self, clause_value) -> k.List:
- if isinstance(clause_value, k.List):
- return [b'{x}', clause_value]
+ if isinstance(clause_value, (k.QueryPhrase, k.ParseTree, k.Column, k.List)):
+ return k.QueryPhrase(clause_value)
if isinstance(clause_value, k.BooleanVector):
- return [b'{enlist x}', clause_value]
+ return k.QueryPhrase([clause_value])
if isinstance(clause_value, str):
clause_value = [k.CharVector(clause_value)]
- else:
+ elif all([isinstance(x, str) for x in clause_value]):
clause_value = [k.CharVector(x) for x in clause_value]
+ else:
+ wp = k.QueryPhrase(clause_value[0])
+ for wc in clause_value[1:]:
+ wp.extend(k.QueryPhrase(wc))
+ return wp
return [b'{parse each x}', clause_value]
class SQL:
"""Wrapper around the [KX Insights Core ANSI SQL](https://code.kx.com/insights/core/sql.html) interface.
- Lots of examples within this interface use a table named trades, an example of this table is
+ Examples within this interface use a table named **trades**, an example of this table is
```Python
- >>> kx.q['trades'] = kx.toq(
- pd.DataFrame.from_dict({
- 'sym': [['AAPL', 'GOOG', 'MSFT'][randint(0, 2)] for _ in range(100)],
- 'date': [[date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 3)][randint(0, 2)] for _ in range(100)],
- 'price': [random() * 1000 for _ in range(100)]
+ >>> trades = kx.Table(data={
+ 'sym': kx.random.random(100, ['AAPL', 'GOOG', 'MSFT']),
+ 'date': kx.random.random(100, kx.q('2022.01.01') + [0,1,2]),
+ 'price': kx.random.random(100, 1000.0)
})
- )
+ >>> kx.q['trades'] = trades
```
""" # noqa: E501
@@ -493,24 +520,24 @@ def __init__(self, q: Q):
self._q = q
def __call__(self, query: str, *args: Any) -> k.Table:
- """Compile and run a SQL statement.
+ """Compile and run a SQL statement using string interpolation.
Parameters:
- query: The SQL query, using KX Insights Core SQL, documented at
- https://code.kx.com/insights/core/sql.html
- *args: The arguments for the query, which will be interpolated into the query. Each
- argument will be converted into a [`pykx.K`][] object.
+ query: The query to execute formatted in
+ [KX Insights SQL style](https://code.kx.com/insights/core/sql.html)
+ *args: The arguments for the query, which will be interpolated into the string. Each
+ argument will be converted into a [pykx.K][pykx.K] object.
Returns:
- The result of the evaluation of `query` with `args` interpolated.
+ The result of the evaluation of `#!python query` with `#!python args` interpolated.
- Note: Avoid interpolating the table into the query when running over IPC.
- It's common to interpolate a `pykx.Table` object into the query as `'$1'`. This works
- well when running embedded within the process, but when the `Q` instance is an
- [IPC connection][pykx.QConnection] this will result in the entire table being sent over
- the connection, which will negatively impact performance. Instead, when running over
- IPC, write the name of the table (as defined in the connected q server) directly into
- the query.
+ Note: Avoid interpolating the table name into the query when using this with IPC.
+ Use the full name of the table in the string.
+ When using this class on the embedded q process it is common to interpolate a
+ `#!python pykx.Table` object into a query using `#!python '$1'`. When the `#!python Q`
+ object used in the initialization of this class is an [IPC connection][pykx.QConnection]
+ the entire table will be sent in the message over the connection. If the table is large
+ this will significantly impact performance.
Examples:
@@ -529,7 +556,7 @@ def __call__(self, query: str, *args: Any) -> k.Table:
'))
```
- Query a [`pykx.Table`][] instance by interpolating it in as the first argument:
+ Query a [`pykx.Table`][] instance by injecting it as the first argument using `$n` syntax:
```python
>>> q.sql('select * from $1', trades) # where `trades` is a `pykx.Table` object
@@ -543,7 +570,8 @@ def __call__(self, query: str, *args: Any) -> k.Table:
..
'))
```
- Query a table using interpolated conditions:
+
+ Query a table using multiple injected arguments:
```python
>>> q.sql('select * from trades where date = $1 and price < $2', date(2022, 1, 2), 500.0)
@@ -561,35 +589,32 @@ def __call__(self, query: str, *args: Any) -> k.Table:
return self._q('.s.sp', k.CharVector(query), args)
def prepare(self, query: str, *args: Any) -> k.List:
- """Prepare a parametrized query to be executed later, the parameter types are deduced from
- the types of the arguments used to prepare the statement.
+ """Prepare a parametrized query to be executed later.
Parameters:
- query: The SQL query, using KX Insights Core SQL, documented at
- https://code.kx.com/insights/core/sql.html
- *args: The arguments for the query, these arguments are not used in the query. They are
- used to determine the types of the parameters that will later be used as parameters
- when executing the query.
+ query: The query to parameterize in
+ [KX Insights SQL format](https://code.kx.com/insights/core/sql.html).
+ *args: The arguments for `#!python query`. The arguments are not used in the query. They
+ are used to determine the expected types of the parameters of the parameterization.
Returns:
- The parametrized query, which can later be used with `q.query.execute()`
+ The parametrized query, which can later be used with `#!python q.query.execute()`
Examples:
- Note: When preparing a query with K types you don't have to fully construct one.
- For example you can pass `kx.LongAtom(1)` as a value to the prepare function as well as
- just [`pykx.LongAtom`][]. This only works for Atom and Vector types. There is also a
- helper function for tables that you can use called `pykx.Table.prototype`.
+ Note: Preparing a query does not require fully constructed K Atom and Vector types.
+ Both the value `#!python kx.LongAtom(1)` and the wrapper [pykx.LongAtom][pykx.LongAtom]
+ are valid. To determine table type use `#!python pykx.Table.prototype`.
Prepare a query for later execution that will expect a table with 3 columns a, b, and c with
- ktypes [`pykx.SymbolVector`][], [`pykx.FloatVector`][], and [`pykx.LongVector`][]
- respectively.
+ ktypes [pykx.SymbolVector][pykx.SymbolVector], [pykx.FloatVector][pykx.FloatVector], and
+ [pykx.LongVector][pykx.LongVector] respectively.
```Python
>>> p = q.sql.prepare('select * from $1', kx.q('([] a:``; b: 0n 0n; c: 0N 0N)'))
```
- You can also use the `pykx.Table.prototype` helper function to build a table to pass into a
- prepared SQL query.
+ You can also use the `#!python pykx.Table.prototype` helper function to build a table to
+ pass into a prepared SQL query.
```Python
>>> p = q.sql.prepare('select * from $1', kx.Table.prototype({
@@ -609,7 +634,7 @@ def prepare(self, query: str, *args: Any) -> k.List:
)
```
- You can also directly pass in the [`pykx.K`][] types you wish to use instead.
+ You can also directly pass in the [pykx.K][pykx.K] types you wish to use instead.
```Python
>>> p = q.sql.prepare('select * from trades where date = $1 and price < $2',
@@ -624,30 +649,30 @@ def prepare(self, query: str, *args: Any) -> k.List:
return self._q('.s.sq', k.CharVector(query), _args)
def execute(self, query: k.List, *args: Any) -> k.K:
- """Execute a prepared query the parameter types must match the types of the arguments
- used in the prepare statement.
+ """Execute a prepared query. Parameter types must match the types of the arguments
+ used when executing the `#!python sql.prepare` function.
Parameters:
- query: A prepared SQL statement returned by a call to `q.sql.prepare`.
+ query: A prepared SQL statement returned by a call to `#!python sql.prepare`.
*args: The arguments for the query, which will be interpolated into the query. Each
- argument will be converted into a [`pykx.K`][] object.
+ argument will be converted into a [pykx.K][pykx.K] object.
Returns:
- The result of the evaluation of `query` with `args` interpolated.
+ The result of the evaluation of `#!python query` with `#!python args` interpolated.
- Note: Avoid interpolating the table into the query when running over IPC.
- It's common to interpolate a [`pykx.Table`][] object into the query as `'$1'`. This
- works well when running embedded within the process, but when the `Q` instance is an
- [IPC connection][pykx.QConnection] this will result in the entire table being sent over
- the connection, which will negatively impact performance. Instead, when running over
- IPC, write the name of the table (as defined in the connected q server) directly into
- the query.
+ Note: Avoid interpolating the table name into the query when using this with IPC.
+ Use the full name of the table in the string.
+ When using this class on the embedded q process it is common to interpolate a
+ `#!python pykx.Table` object into a query using `#!python '$1'`. When the `#!python Q`
+ object used in the initialization of this class is an [IPC connection][pykx.QConnection]
+ the entire table will be sent in the message over the connection. If the table is large
+ this will significantly impact performance.
Examples:
- Execute a prepared query passing in a [`pykx.Table`][] with 3 columns a, b, and c with
- ktypes [`pykx.SymbolVector`][], [`pykx.FloatVector`][], and [`pykx.LongVector`][]
- respectively.
+ Execute a prepared query passing in a [pykx.Table][pykx.Table] with 3 columns a, b, and c
+ with ktypes [pykx.SymbolVector][pykx.SymbolVector], [pykx.FloatVector][pykx.FloatVector],
+ and [pykx.LongVector][pykx.LongVector] respectively.
```Python
>>> p = q.sql.prepare('select * from $1', kx.q('([] a:``; b: 0n 0n; c: 0N 0N)'))
@@ -684,10 +709,10 @@ def execute(self, query: k.List, *args: Any) -> k.K:
return self._q('.s.sx', query, args)
def get_input_types(self, prepared_query: k.List) -> List[str]:
- """Get the [`pykx.K`][] types that are expected to be used with a prepared query.
+ """Get the [pykx.K][pykx.K] types that are expected to be used with a prepared query.
Parameters:
- prepared_query: A prepared SQL statement returned by a call to `q.sql.prepare`.
+ prepared_query: A prepared SQL statement returned by a call to `#!python q.sql.prepare`.
Returns:
A Python list object containing the string representations of the expected K types for
@@ -734,7 +759,7 @@ def get_input_types(self, prepared_query: k.List) -> List[str]:
class TableAppend:
- """Helper class for the q insert function"""
+ """Helper class for the q insert and upsert functions"""
def __init__(self, _q):
self._q = _q
@@ -802,7 +827,7 @@ def append(
class Insert(TableAppend):
- """Helper class for the q insert and upsert functions"""
+ """Helper class for the q insert function"""
def __init__(self, _q: Q):
super().__init__(_q)
@@ -814,40 +839,40 @@ def __call__(
match_schema: bool = False,
test_insert: bool = False
) -> Union[None, k.Table]:
- """Helper function around `q`'s `insert` function which inserts a row or multiple rows into
+ """Helper function around q's `#!q insert` function which inserts a row or multiple rows into
a q table object.
Parameters:
- table: The name of the table to be inserted onto.
- row: A list of objects to be inserted as a row, or a list of lists containing objects
+ table: The name of the table for the insert operation.
+ row: A list of objects to be inserted as a row, or a list of lists of objects
to insert multiple rows at once.
- match_schema: Whether the row/rows to be inserted must match the tables current schema.
+ match_schema: Whether the row/rows to be inserted must match the table's current schema.
test_insert: Causes the function to modify a small local copy of the table and return
the modified example, this can only be used with embedded q and will not modify the
- source tables contents.
+ source table's contents.
Returns:
- A `k.LongVector` denoting the index of the rows that were inserted, unless the
- `test_insert` keyword argument is used in which case it returns the
- last 5 rows of the table with the new rows inserted onto the end, this does not modify
- the actual table object.
+ When `#!python test_insert` is false return a `#!python k.LongVector` denoting the
+ index of the rows that were inserted. When `#!python test_insert` is true return the
+ last 5 rows of the table with the new rows inserted onto the end leaving
+ `#!python table` unmodified.
Raises:
- PyKXException: If the `match_schema` parameter is used this function may raise an error
- if the row to be inserted does not match the tables schema. The error message will
- contain information about which columns did not match.
+ PyKXException: If the `#!python match_schema` parameter is used this function may raise
+ an error if the row to be inserted does not match the table's schema. The error
+ message will contain information about which columns did not match.
Examples:
- Insert a single row onto a table named `tab` ensuring that the row matches the tables
- schema. Will raise an error if the row does not match
+ Insert a single row onto a table named `#!python tab` ensuring that the row matches the
+ table's schema. This will raise an error if the row does not match.
```Python
>>> q.insert('tab', [1, 2.0, datetime.datetime(2020, 2, 24)], match_schema=True)
```
- Insert multiple rows onto a table named `tab` ensuring that each of the rows being added
- match the tables schema.
+ Insert multiple rows onto a table named `#!python tab` ensuring that each of the rows being
+ added match the table's schema.
```Python
>>> q.insert(
@@ -897,40 +922,41 @@ def __call__(
match_schema: bool = False,
test_insert: bool = False
) -> Union[None, k.Table]:
- """Helper function around `q`'s `upsert` function which inserts a row or multiple rows into
+ """Helper function around q's `#!q upsert` function which inserts a row or multiple rows into
a q table object.
Parameters:
- table: A `k.Table` object or the name of the table to be inserted onto.
- row: A list of objects to be inserted as a row, if the table is within embedded q you
+ table: A `#!python k.Table` object or the name of the table.
+ row: A list of objects to be appended as a row, if the table is within embedded q you
may also pass in a table object to be upserted.
- match_schema: Whether the row/rows to be inserted must match the tables current schema.
+ match_schema: Whether the row/rows to be appended must match the table's current schema.
test_insert: Causes the function to modify a small local copy of the table and return
the modified example, this can only be used with embedded q and will not modify the
- source tables contents.
+ source table's contents.
Returns:
- The modified table if a `k.Table` is passed in, otherwise `None` is returned.
- If the `test_insert` keyword argument is used it returns the last 5 rows of the table
- with the new rows inserted onto the end, this does not modify the actual table object.
+ When `#!python test_insert` is false and `#!python table` is a `#!python k.Table` return
+ the modified table. When `#!python test_insert` is true return the last 5 rows of
+ the table with new rows appended to the end. In all other cases `#!python None`
+ is returned.
Raises:
- PyKXException: If the `match_schema` parameter is used this function may raise an error
- if the row to be inserted does not match the tables schema. The error message will
- contain information about which columns did not match.
+ PyKXException: If the `#!python match_schema` parameter is used this function may raise
+ an error if the row to be inserted does not match the table's schema. The error
+ message will contain information about which columns did not match.
Examples:
- Upsert a single row onto a table named `tab` ensuring that the row matches the tables
- schema. Will raise an error if the row does not match
+ Upsert a single row onto a table named `#!python tab` ensuring that the row matches the
+ table's schema. This will raise an error if the row does not match.
```Python
>>> q.upsert('tab', [1, 2.0, datetime.datetime(2020, 2, 24)], match_schema=True)
>>> table = q.upsert(table, [1, 2.0, datetime.datetime(2020, 2, 24)], match_schema=True)
```
- Upsert multiple rows onto a table named `tab` ensuring that each of the rows being added
- match the tables schema. Upserting multiple rows only works within embedded q.
+ Upsert multiple rows onto a table named `#!python tab` ensuring that each of the rows being
+ added match the table's schema.
```Python
>>> q.upsert(
@@ -945,8 +971,8 @@ def __call__(
)
```
- Run a test insert to modify a local copy of the table to test what the table would look
- like after inserting the new rows.
+ Run a test upsert to modify a local copy of the table to test what the table would look
+ like after appending the new rows.
```Python
>>> kx.q['tab'] = kx.Table([[1, 1.0, 'a'], [2, 2.0, 'b'], [3, 3.0, 'c']], columns=['a', 'b', 'c'])
diff --git a/src/pykx/random.py b/src/pykx/random.py
index 836cda5..2a1d6e7 100644
--- a/src/pykx/random.py
+++ b/src/pykx/random.py
@@ -16,13 +16,27 @@ def _init(_q):
def seed(seed: int) -> None:
- """Set random seed for PyKX random data generation
+ """Set random seed for PyKX random data generation.
Parameters:
seed: Integer value defining the seed value to be set
Returns:
On successful invocation this function returns None
+
+ Examples:
+
+ Set the random seed for generated data to 42 validating random generation is deterministic
+
+ ```python
+ >>> import pykx as kx
+ >>> kx.random.seed(42)
+ >>> kx.random.random(10, 10)
+ pykx.LongVector(pykx.q('4 7 2 2 9 4 2 0 8 0'))
+ >>> kx.random.random(42)
+ >>> kx.random.random(10, 10)
+ pykx.LongVector(pykx.q('4 7 2 2 9 4 2 0 8 0'))
+ ```
"""
q('{system"S ",string x}', seed)
@@ -31,22 +45,78 @@ def random(dimensions: Union[int, List[int]],
data: Any,
seed: Optional[int] = None
) -> Any:
- """Return random data of specified dimensionality
+ """Generate random data in the shape of the specified dimensions.
Parameters:
- dimensions: The dimensions of the data returned. Will produce a 1D array if single integer
- passed. Returns random data in shape of a list passed. Passing a negative value will perfom
- a kdb Deal on the data.
+ dimensions: The dimensions of the data returned. A a 1D array is produced if the input for
+ this parameter is a single integer. A list input generates random data in the shape of
+ the list. Passing a negative value performs a kdb Deal on the data.
- data: The data from which a random sample is chosen. If an int or a float is passed,
- the random values are chosen in the range [0,data]. If a list is passed,
- the values are chosen from that list.
+ data: The data from which a random sample is chosen. Input an [int][int] or [float][float]
+ to generate random values from the range [0,data]. Input a list to pick random values
+ from that list.
- seed: Denotes whether or not a seed should be used in the generation of data. Defaulted to
- None, any value passed will be used as a seed to generate the data.
+ seed: Optional parameter to force randomisation to use a specific seed. Defaults to None.
Returns:
- Randomised data in the shape specified by the 'dimensions' variable
+ Randomised data in the shape specified by the 'dimensions' variable.
+
+ Examples:
+
+ Generate a random vector of floats between 0 and 10.5 of length 20
+
+ ```python
+ >>> import pykx as kx
+ >>> kx.random.random(20, 10.5)
+ pykx.FloatVector(pykx.q('5.233059 0.5785577 2.668026 4.834967 0.5733764..'))
+ ```
+
+ Generate a 1D generic list containing random values from a supplied list
+
+ ```python
+ >>> import pykx as kx
+ >>> kx.random.random(20, ['a', 10, 1.5])
+ pykx.List(pykx.q('
+ 10
+ 10
+ 1.5
+ `a
+ ..
+ '))
+ ```
+
+ Generate a 2D generic list containing random long atoms between 0 and 100
+
+ ```python
+ >>> import pykx as kx
+ >>> arr = kx.random.random([5, 5], 100)
+ >>> arr
+ pykx.List(pykx.q('
+ 67 46 30 29 61
+ 82 80 0 73 97
+ 92 75 38 28 94
+ 64 75 92 35 95
+ 81 45 44 59 49
+ '))
+ >>> arr[0]
+ pykx.LongVector(pykx.q('67 46 30 29 61'))
+ ```
+
+ Generate a random vector of GUIDs using GUID null to generate full range with a defined seed
+
+ ```python
+ >>> import pykx as kx
+ >>> kx.random.random(100, kx.GUIDAtom.null, seed=42)
+ pykx.GUIDVector(pykx.q('84cf32c6-c711-79b4-2f31-6e85923decff 223..'))
+ ```
+
+ Using a negative value perform a "deal" returning non repeating values from a list of strings
+
+ ```python
+ >>> import pykx as kx
+ >>> kx.random.random(-3, ['the', 'quick', 'brown', 'fox'])
+ pykx.SymbolVector(pykx.q('`the`fox`brown'))
+ ```
"""
if seed is not None:
diff --git a/src/pykx/register.py b/src/pykx/register.py
index b268f99..7b65840 100644
--- a/src/pykx/register.py
+++ b/src/pykx/register.py
@@ -1,9 +1,12 @@
"""Functionality for the registration of conversion functions between PyKX and Python"""
from .toq import _converter_from_python_type
+from .wrappers import Column
+from typing import Any, Callable
__all__ = [
'py_toq',
+ 'column_function',
]
@@ -16,8 +19,8 @@ def __dir__():
return __all__
-def py_toq(py_type,
- conversion_function,
+def py_toq(py_type: Any,
+ conversion_function: Callable,
*,
overwrite: bool = False
) -> None:
@@ -84,8 +87,73 @@ def py_toq(py_type,
if not overwrite and py_type in _converter_from_python_type:
raise Exception("Attempting to overwrite already defined type :" + str(py_type))
- def wrap_conversion(data, ktype=None, cast=False, handle_nulls=False):
+ def wrap_conversion(data, ktype=None, cast=False, handle_nulls=False, strings_as_char=False):
return conversion_function(data)
_converter_from_python_type.update({py_type: wrap_conversion})
return None
+
+
+def column_function(name: str,
+ conversion_function: Callable,
+ overwrite: bool = False
+) -> None:
+ """
+ Register a function to be accessible as a callable function off the kx.Column
+ objects
+
+ !!! Note
+ The return of this function should be a `QueryPhrase` object
+
+ !!! Warning
+ Application of this functionality is at a users discretion, issues
+ arising from overwritten default conversion types are unsupported
+
+ Parameters:
+ name: The name to be given to the method which can be used on a column
+ conversion_function: The function/callable which will be applied when calling
+ a query
+
+ Returns:
+ A `None` object on successful invocation
+
+ Examples:
+
+ Register min-max scaler function for application on column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data = {
+ ... 'sym': kx.random.random(100, ['a', 'b', 'c']),
+ ... 'true': kx.random.random(100, 100.0),
+ ... 'pred': kx.random.random(100, 100.0)
+ ... })
+ >>> def min_max_scaler(self):
+ ... return self.call('{(x-minData)%max[x]-minData:min x}')
+ >>> kx.register.column_function('minmax', min_max_scaler)
+ >>> tab.select(kx.Column('true') & kx.Column('true').minmax().rename('scaled_true'))
+ ```
+
+ Register mean-absolute error function to be applied between 'true' and 'pred' columns
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data = {
+ ... 'sym': kx.random.random(100, ['a', 'b', 'c']),
+ ... 'true': kx.random.random(100, 100.0),
+ ... 'pred': kx.random.random(100, 100.0)
+ ... })
+ >>> def mean_abs_error(self, other):
+ ... return self.call('{avg abs x-y}', other)
+ >>> kx.register.column_function('mean_abs_error', mean_abs_error)
+ >>> tab.exec(kx.Column('pred').mean_abs_error(kx.Column('true')))
+ >>> tab.select(kx.Column('pred').mean_abs_error(kx.Column('true')), by=kx.Column('sym'))
+ ```
+ """
+ if not overwrite:
+ try:
+ getattr(Column, name)
+ raise Exception(f"Attribute {name} already defined, please use 'overwrite' keyword")
+ except AttributeError:
+ pass
+ setattr(Column, name, conversion_function)
diff --git a/src/pykx/reimporter.py b/src/pykx/reimporter.py
index 629e952..e08ffe3 100644
--- a/src/pykx/reimporter.py
+++ b/src/pykx/reimporter.py
@@ -9,8 +9,7 @@
"""
import os
-
-original_qhome = str(os.getenv('QHOME'))
+from .config import pykx_executable, qhome
class PyKXReimport:
@@ -33,9 +32,6 @@ class PyKXReimport:
def __init__(self):
self.envlist = ('PYKX_DEFAULT_CONVERSION',
'PYKX_UNDER_Q',
- 'SKIP_UNDERQ',
- 'PYKX_SKIP_UNDERQ',
- 'UNDER_PYTHON',
'PYKX_UNDER_PYTHON',
'PYKX_Q_LOADED_MARKER',
'PYKX_LOADED_UNDER_Q',
@@ -58,7 +54,8 @@ def reset(self):
os.unsetenv(x)
if y is not None:
del os.environ[x]
- os.environ['QHOME'] = original_qhome
+ os.environ['QHOME'] = str(qhome)
+ os.environ['PYKX_EXECUTABLE'] = pykx_executable
def restore(self):
"""Restore all the required environment variables.
diff --git a/src/pykx/remote.py b/src/pykx/remote.py
index 050b065..72718a2 100644
--- a/src/pykx/remote.py
+++ b/src/pykx/remote.py
@@ -1,19 +1,9 @@
"""
-Functionality for the generation and management of remote Python function
-execution.
-
-!!! Warning
-
- This functionality is provided in it's present form as a BETA
- Feature and is subject to change. To enable this functionality
- for testing please following configuration instructions
- [here](../user-guide/configuration.md) setting `PYKX_BETA_FEATURES='true'`
+_This page documents the API for generation and management of remote Python function execution._
"""
import inspect
from typing import Union
-from . import beta_features
-from .config import _check_beta
from .ipc import SyncQConnection
@@ -29,8 +19,6 @@
'function'
]
-beta_features.append('Remote Functions')
-
def _init(_q):
global q
@@ -41,97 +29,71 @@ def __dir__():
return __all__
-class session():
- """
- A session refers to a connection to a remote kdb+/q process against which
- users are defining/registering Python Functions which will return results
- to a Python session.
- """
- def __init__(self):
- _check_beta('Remote Functions')
- if not import_success:
- raise ImportError("Failed to load Python package: 'dill',"
- " please install dependency using 'pip install pykx[beta]'")
- self.valid = False
- self._libraries = []
- self._session = None
-
- def add_library(self, *args):
- """
- Add a list of Python libraries which will be imported prior to definition
- of a remote Python function, this allows users for example to import numpy
- and use it as a defined library within a remote function.
-
- Parameters:
- *args: A list of strings denoting the packages which are to be imported
- for use by a remote function.
-
- Returns:
- Returns a `None` type object on successful invocation.
-
- Example:
-
- ```python
- >>> from pykx.remote import session
- >>> remote_session = session()
- >>> remote_session.add_library('numpy', 'pandas')
- ```
+class session:
+ def __init__(self,
+ host: Union[str, bytes] = 'localhost',
+ port: int = None,
+ libraries: dict = None,
+ *,
+ username: Union[str, bytes] = '',
+ password: Union[str, bytes] = '',
+ timeout: float = 0.0,
+ large_messages: bool = True,
+ tls: bool = False,
+ reconnection_attempts: int = -1
+ ) -> None:
"""
- if self._session is None:
- raise Exception("Unable to add packages in the absence of a session")
- for i in args:
- if not isinstance(i, str):
- raise Exception(f'Supplied library argument {i} is not a str like object, '
- f'supplied object is of type: {type(i)}')
- self._libraries.append(i)
-
- def create(self,
- host: Union[str, bytes] = 'localhost',
- port: int = None,
- *,
- username: Union[str, bytes] = '',
- password: Union[str, bytes] = '',
- timeout: float = 0.0,
- large_messages: bool = True,
- tls: bool = False):
- """
- Populate a session for use when generating a function for remote execution. This
- session will be backed by a SyncQConnection instance, note that only one session
- can be associated with a given instance of a `session` class.
+ Initialise a session object, opening a connection to the specified remote q process. Users
+ can specify the Python libraries to load into the remote process. Once the connection is
+ successful, pykx will be loaded if it is not, then the requested libraries will be imported.
Parameters:
- host: The host name to which a connection is to be established.
- port: The port to which a connection is to be established.
+ host: The host name running the remote process.
+ port: The port of the remote process.
+ libraries: A dictionary mapping the desired name of the imported Python library to
+ its library which is being imported
username: Username for q connection authorization.
password: Password for q connection authorization.
- timeout: Timeout for blocking socket operations in seconds. If set to `0`, the socket
- will be non-blocking.
- large_messages: Whether support for messages >2GB should be enabled.
- tls: Whether TLS should be used.
-
- Returns:
- Returns a `None` type object on successful connection creation
+ timeout: Number of seconds to set the timeout for blocking socket operations. Input 0
+ to set the socket to non-blocking.
+ large_messages: Boolean flag to enable/disable messages >2GB in size.
+ tls: Boolean flag to enable/disable TLS.
+ reconnection_attempts: The number of attempts to reconnect to the q server when there is
+ a disconnect. Input a negative value to disable reconnect attempts. A value of 0
+ indicates no limit on reconnect attempts, with each attempt applying an exponential
+ backoff on the time between successive attempts. Input a positive number to
+ specify the maximum number of reconnect attempts. Hitting the maximum without a
+ successful reconnect will throw an error.
- Example:
+ Examples:
- - Connect to a q session on localhost at port 5050
+ - Generate a session connecting to a process running locally
```python
- >>> from pykx.remote import session
- >>> remote_session = session()
- >>> remote_session.create(port = 5050)
+ >>> import pykx as kx
+ >>> remote_session = kx.remote.session(port=5050)
```
- - Connect to a user-password protected q session at a defined port
+ - Generate a session connecting to a remote q process, providing required Python libraries,
+ a username and password
```python
- >>> from pykx.remote import session
- >>> remote_session = session()
- >>> remote_session.create_session(port=5001, username='username', password='password')
+ >>> import pykx as kx
+ >>> remote_session = kx.remote.session(
+ ... port = 5050,
+ ... username = 'user',
+ ... password = 'pass',
+ ... libraries = {'kx': 'pykx', 'np': 'numpy'})
```
"""
- if self._session is not None:
- raise Exception("Active session in progress")
+ if not import_success:
+ raise ImportError("Failed to load Python package: 'dill',"
+ " please install dependency using 'pip install pykx[remote]'")
+ if not (isinstance(libraries, dict) or libraries is None):
+ raise TypeError("libraries must be supplied as a dictionary or None")
+
+ self.valid = False
+ self._libraries = libraries
self._session = SyncQConnection(host, port,
username=username,
password=password,
@@ -139,101 +101,136 @@ def create(self,
large_messages=large_messages,
tls=tls,
no_ctx=True)
+ pykx_loaded = self._session('`pykx in key `')
+ if not pykx_loaded:
+ print("PyKX not loaded on remote server, attempting to load PyKX")
+ self._session('@[system"l ",;"pykx.q";{\'"Failed to load PyKX with error: ",x}]')
+ self.valid = True
+ if self._libraries is not None:
+ self.libraries(self._libraries)
+
+ def libraries(self, libs: dict = None) -> None:
+ """
+ Send a list of libraries to the remote process and load them into that process.
+
+ Parameters:
+ libs: A dictionary mapping the desired name of the imported Python library to
+ its library which is being imported
+
+ Returns:
+ `#!python None` if successful.
+
+ Example:
- def clear(self):
+ ```python
+ >>> import pykx as kx
+ >>> remote_session = kx.remote.session(port=5050)
+ >>> remote_session.libraries({'np': 'numpy', 'pd': 'pandas', 'kx': 'pykx'})
+ ```
+ """
+ if not isinstance(libs, dict):
+ raise TypeError("libs must be provided as a dictionary")
+ for key, value in libs.items():
+ self._session('''
+ {[alias;library]
+ alias:string alias;
+ library:string library;
+ @[.pykx.pyexec;
+ "import ",library," as ",alias;
+ {'"Failed to load library '",x,
+ "' with alias '",y,"' with error: ",z}[library;alias]
+ ]}
+ ''', key, value)
+
+ def close(self) -> None:
"""
- Reset/clear the session and libraries associated with a defined session information
+ Close the connection.
Example:
```python
>>> from pykx.remote import session
- >>> remote_session = session()
- >>> remote_session.create(port = 5050)
- >>> remote_session.add_library('numpy')
- >>> {'session': session._session, 'libraries': session._libraries}
- {'session': pykx.QConnection(port=5001), 'libraries': ['numpy']}
- >>> remote_session.clear()
- >>> {'session': session._session, 'libraries': session._libraries}
- {'session': None, 'libraries': []}
+ >>> remote_session = session(port=5050)
+ >>> remote_session.close()
```
"""
- self._session = None
- self._libraries = []
+ if self._session is not None:
+ self._session.close()
-def function(remote_session, *args):
+def function(remote_session: session, *args) -> None:
"""
This decorator allows users to tag functions which will be executed
- on a remote server defined by a `kx.remote.session` instance.
+ on a remote server defined by a `#!python kx.remote.session` instance.
Parameters:
- remote_session: Valid `kx.remote.session` object used to interact with external q process
- *args: When invoked the decorated function will be passed supplied arguments
+ remote_session: Valid `#!python kx.remote.session` object used to interact with external
+ q process
+ *args: Arguments that will be passed to the decorated function when it is invoked
Returns:
- When invoked the decorated function will return the result as a PyKX object to the
- calling process
+ A PyKX converted type of the result returned from the execution of the decorated function
+ on the remote process
Examples:
- Call a basic decorated function on a remote process
```python
- >>> from pykx.remote import session, function
- >>> remote_session = session()
- >>> session.create(port = 5050)
- >>> @function(session)
+ >>> import pykx as kx
+ >>> session = kx.remote.session(port=5050)
+ >>> @kx.remote.function(session)
... def func(x):
... return x+1
>>> func(1)
pykx.LongAtom(pykx.q('2'))
```
- - Apply a function making use of a named library
+ - Initialize a remote session object with a named library then decorate a function which uses
+ that session to call functions from that library
```python
- >>> from pykx.remote import session, function
- >>> remote_session = session()
- >>> session.create(port = 5050)
- >>> session.add_library('numpy')
- >>> @function(session)
+ >>> import pykx as kx
+ >>> session = kx.remote.session(port=5050, libraries={'np': 'numpy'})
+ >>> @kx.remote.function(session)
... def func(start, stop, count):
... return numpy.linspace(start, stop, count)
>>> func(0, 10, 5)
pykx.FloatVector(pykx.q('0 2.5 5 7.5 10'))
```
+
+ - Initialize a remote session object. Once created have that session import a new library.
+
+ ```python
+ >>> import pykx as kx
+ >>> session = kx.remote.session(port=5050)
+ >>> session.libraries({'kx': 'pykx'})
+ >>> @kx.remote.function(session)
+ ... def func(start, stop):
+ ... return start + kx.q.til(stop)
+ >>> func(10, 5)
+ pykx.LongVector(pykx.q('10 11 12 13 14'))
+ ```
"""
def inner_decorator(func):
def pykx_func(*args, _function=func):
- _check_beta('Remote Functions')
if not isinstance(remote_session, session):
raise Exception("Supplied remote_session instance must "
"be a kx.remote.session object")
- if remote_session._session is None:
- raise Exception("User session must be generated using "
- "the 'create_session' function")
- if not remote_session.valid:
- pykx_loaded = remote_session._session('`pykx in key `')
- if not pykx_loaded:
- print("PyKX not loaded on remote server, attempting to load PyKX")
- remote_session._session("@[system\"l \",;\"pykx.q\";"
- "{'\"Failed to load PyKX with error: \",x}]")
- remote_session.valid = True
- if remote_session._libraries is not None:
- for i in remote_session._libraries:
- remote_session._session('{x:string x;'
- ' @[.pykx.pyexec;'
- '"import ",x;{\'"Failed to load package: ",'
- 'x," with: ",y}[x]]}',
- i)
try:
src = dill.source.getsource(_function)
except BaseException:
src = inspect.getsource(_function)
- return remote_session._session('{.pykx.pyexec "\n" sv 1_"\n" vs x; .pykx.get[y;<] . z}',
+ return remote_session._session('''
+ {[code;func_name;args;lenargs]
+ .pykx.pyexec trim "\n" sv 1_"\n" vs code;
+ func:.pykx.get[func_name;<];
+ $[lenargs;func . args;func[]]
+ }
+ ''',
bytes(src, 'UTF-8'),
_function.__name__,
- list(args))
+ list(args),
+ len(args))
return pykx_func
return inner_decorator
diff --git a/src/pykx/schema.py b/src/pykx/schema.py
index 9de1d7d..d13963e 100644
--- a/src/pykx/schema.py
+++ b/src/pykx/schema.py
@@ -1,10 +1,6 @@
"""
-Functionality to support the creation and manipulation of schemas.
-
-Generated schemas can be used in combination with both
- [`insert`](https://code.kx.com/pykx/api/pykx-q-data/wrappers.html#pykx.wrappers.Table.insert) and
- [`upsert`](https://code.kx.com/pykx/api/pykx-q-data/wrappers.html#pykx.wrappers.Table.upsert)
- functionality to create populated table and keyed table objects.
+_This page documents the API for generating table schemas that are compatible with both
+ [upsert](https://code.kx.com/q/ref/upsert/) and [insert](https://code.kx.com/q/ref/insert/)._
"""
from typing import Dict, List, Optional, Union
@@ -53,24 +49,21 @@ def builder(schema: Dict,
*,
key: Optional[Union[str, List[str]]] = None
) -> k.K:
- """Generate an empty schema for a keyed or unkeyed table.
+ """Generate an empty schema for a keyed or unkeyed q table.
Parameters:
schema: The definition of the schema to be created mapping a 'str'
- to a `pykx.*` type object which is one of the types defined in
- `pykx.schema._ktype_to_conversion`.
- key: A `str`-like object or list of `str` objects denoting the columns
- within the table defined by `schema` to be treated as primary keys,
- see [here](https://code.kx.com/q4m3/8_Tables/#841-keyed-table) for
- more information about q keyed tables.
+ to a `#!python pykx.*` type object. Each `#!python pykx.*` value must be one of the
+ types defined in `#!python pykx.schema._ktype_to_conversion`.
+ key: The column name(s) in `#!python schema` to be treated as primary keys.
Returns:
- A `pykx.Table` or `pykx.KeyedTable` matching the provided schema with
+ A `#!python pykx.Table` or `#!python pykx.KeyedTable` matching the provided schema with
zero rows.
Examples:
- Create a simple `pykx.Table` with four columns of different types
+ Create a simple `#!python pykx.Table` with four columns of different types
```python
>>> import pykx as kx
@@ -96,7 +89,7 @@ def builder(schema: Dict,
'))
```
- Create a `pykx.KeyedTable` with a single primary key.
+ Create a `#!python pykx.KeyedTable` with a single primary key.
```python
>>> import pykx as kx
@@ -121,7 +114,7 @@ def builder(schema: Dict,
'))
```
- Create a `pykx.KeyedTable` with multiple primary keys.
+ Create a `#!python pykx.KeyedTable` with multiple primary keys.
```python
>>> import pykx as kx
diff --git a/src/pykx/serialize.py b/src/pykx/serialize.py
index 0e62674..2ffd134 100644
--- a/src/pykx/serialize.py
+++ b/src/pykx/serialize.py
@@ -14,7 +14,7 @@ def __init__(self, obj: Any, mode: int = 6, wait: int = 0):
Parameters:
obj: The object to serialize.
mode: The [capability level](https://code.kx.com/q/basics/ipc/#handshake)
- to use for serialization, defaluts to the maximum value of 6.
+ to use for serialization, defaults to the maximum value of 6.
wait: The message type to use, defaults to 0.
Note: The available message types to use are 0, 1, and 2.
@@ -34,7 +34,7 @@ def __init__(self, obj: Any, mode: int = 6, wait: int = 0):
Serializing a `K` object and copying the serialized data.
- ```
+ ```python
>>> k_obj = kx.q('til 10')
>>> ser = kx.serialize(k_obj)
# The 0-copy memoryview of the data can be accessed through the `data` property
@@ -48,7 +48,7 @@ def __init__(self, obj: Any, mode: int = 6, wait: int = 0):
You can also directly index into the serialized object.
- ```
+ ```python
>>> k_obj = kx.q('til 10')
>>> ser = kx.serialize(k_obj)
>>> ser[0]
@@ -77,7 +77,7 @@ def copy(self):
Serializing a `K` object and then copying the serialized data to a new variable.
- ```
+ ```python
>>> k_obj = kx.q('til 10')
>>> ser = kx.serialize(k_obj)
>>> k_obj_copy = ser.copy()
@@ -92,7 +92,7 @@ def deserialize(data: Union[bytes, serialize, memoryview]):
"""Helper method to deserialize `K` objects from bytes.
Parameters:
- obj: The object to deserialize.
+ data: The object to deserialize.
Examples:
@@ -107,7 +107,7 @@ def deserialize(data: Union[bytes, serialize, memoryview]):
You can also directly deserialize a bytes object.
- ```
+ ```python
>>> k_obj = kx.q('til 10')
>>> ser = kx.serialize(k_obj).copy()
>>> ser
diff --git a/src/pykx/streamlit.py b/src/pykx/streamlit.py
index 0636bc5..d43908f 100644
--- a/src/pykx/streamlit.py
+++ b/src/pykx/streamlit.py
@@ -1,12 +1,9 @@
-import warnings
+from warnings import warn
-from . import beta_features
-from .config import _check_beta, pykx_threading, system
+from .config import pykx_threading, suppress_warnings, system
from .exceptions import QError
from .ipc import SyncQConnection
-beta_features.append('Streamlit Integration')
-
# This class is required to ensure that in the absence
# of the streamlit dependency PyKX can be imported
@@ -37,40 +34,37 @@ def _check_streamlit():
class PyKXConnection(BaseConnection[SyncQConnection]):
"""
- A connection to q/kdb+ processes from streamlit. Initialize using:
+ A connection to a q server from Streamlit. Initialise using:
```python
st.connection("", type = pykx.streamlit.PyKXConnection, *args)
```
- PyKX Connection supports the application of queries using Syncronous IPC
- connections to q/kdb+ processes or Python processes running PyKX as a
- server.
-
- This is supported through the ``query()`` method, this method allows
- users to run `sql`, `qsql` or `q` queries against these processes returning
- PyKX data.
-
!!! Warning
- Streamlit integration is not presently supported for Windows as for
- full utilization it requires use of `PYKX_THREADING` functionality
+ Streamlit integration is not supported for Windows.
+ Full utilization requires `#!bash PYKX_THREADING` which is not supported on windows.
Parameters:
- host: The host name to which a connection is to be established.
- port: The port to which a connection is to be established.
- username: Username for q connection authorization.
- password: Password for q connection authorization.
- timeout: Timeout for blocking socket operations in seconds. If set to `0`, the socket
- will be non-blocking.
- large_messages: Whether support for messages >2GB should be enabled.
- tls: Whether TLS should be used.
- unix: Whether a Unix domain socket should be used instead of TCP. If set to `True`, the
- host parameter is ignored. Does not work on Windows.
- wait: Whether the q server should send a response to the query (which this connection
- will wait to receive). Can be overridden on a per-call basis. If `True`, Python will
- wait for the q server to execute the query, and respond with the results. If
- `False`, the q server will respond immediately to every query with generic null
- (`::`), then execute them at some point in the future.
+ host: Server host name.
+ port: Server port number.
+ username: Username for q connection.
+ password: Password for q connection.
+ timeout: The number of seconds before a blocking operation times out. A value of 0 creates
+ a non-blocking connection.
+ large_messages: Boolean flag to enable/disable support for messages >2GB.
+ tls: Boolean flag to enable/disable TLS.
+ unix: Boolean flag to enable Unix domain socket connection. Host parameter is ignored if
+ `#!python True`. Does not work on Windows.
+ wait: Boolean to enable/disable waiting for the q server to complete executing the query
+ and return the result. `#!python False` emulates async queries, causing the q server
+ to respond immediately with the generic null `#!q ::` and perform calculations at
+ another time.
+ reconnection_attempts: The number of maximum attempts to reconnect when a connection is
+ lost. A negative number prevents any attempts to reconnect. A value of 0 will cause
+ continuous reconnect attempts until a connection is established. Positive values are
+ the number of times to attempt. Successive reconnect attempts are run at exponentially
+ increasing backoff times. Hitting the maximum number of limits with unsuccessful
+ attempts will throw an error.
Note: The `username` and `password` parameters are not required.
The `username` and `password` parameters are only required if the q server requires
@@ -85,42 +79,39 @@ class PyKXConnection(BaseConnection[SyncQConnection]):
Examples:
- Connect to a q process at `localhost` on port `5050` as a streamlit connection,
- querying using q
+ Open a streamlit connection to a locally running q process on port 5050.
```python
>>> import streamlit as st
>>> import pykx as kx
>>> conn = st.connection('pykx', type=kx.streamlit.PyKXConnection,
... host = 'localhost', port = 5050)
- >>> df = conn.query('select from tab').pd()
- >>> st.dataframe(df)
+ >>>
```
"""
_connection = None
_connection_kwargs = {}
def _connect(self, **kwargs) -> None:
- _check_beta('Streamlit Integration')
_check_streamlit()
if system == 'Windows':
raise QError('Streamlit integration currently unsupported for Windows')
- if not pykx_threading:
- warnings.warn("Streamlit caching requires execution on secondary threads, "
- "to utilize this fully please consider setting PYKX_THREADING "
- "= 'True'")
+ if (not pykx_threading) and (not suppress_warnings):
+ warn("Streamlit caching requires execution on secondary threads, "
+ "to utilize this fully please consider setting PYKX_THREADING "
+ "= 'True'. To suppress this warning please consider setting "
+ "PYKX_SUPPRESS_WARNINGS = 'True'")
self._connection = SyncQConnection(no_ctx=True, **kwargs)
self._connection_kwargs = kwargs
def reset(self, **kwargs) -> None:
"""
- Reset an existing Streamlit Connection object, this can be used to manually
- reconnect to a datasource which was disconnected. This will use the connection
- details provided at initialisation of the original class.
+ Close and reopen an existing Streamlit connection.
Example:
- Reset a connection if deemed to no longer be valid
+ Open a connection to a locally running process on port 5050 and check if it is a healthy
+ connection. If it is not, reset the connection.
```python
>>> import streamlit as st
@@ -132,7 +123,6 @@ def reset(self, **kwargs) -> None:
>>>
```
"""
- _check_beta('Streamlit Integration')
_check_streamlit()
if not isinstance(self._connection, SyncQConnection):
raise QError('Unable to reset uninitialized connection')
@@ -141,12 +131,10 @@ def reset(self, **kwargs) -> None:
def is_healthy(self) -> bool:
"""
- Check if an existing streamlit connection is 'healthy' and
- available for query.
+ Check if an existing streamlit connection is 'healthy' and available for query.
Returns:
- A boolean indicating if the connection being used is in a
- 'healthy' state
+ A boolean indicating if the connection being used is in a 'healthy' state
Example:
@@ -159,31 +147,30 @@ def is_healthy(self) -> bool:
True
```
"""
- _check_beta('Streamlit Integration')
_check_streamlit()
if not isinstance(self._connection, SyncQConnection):
raise QError('Unable to validate uninitialized connection')
if self._connection.closed:
- warnings.warn('Connection closed')
+ warn('Connection closed')
return False
try:
self.query('::')
return True
except BaseException as err:
- warnings.warn('Unhealthy connection detected with error: ' + str(err))
+ warn('Unhealthy connection detected with error: ' + str(err))
return False
def query(self, query: str, *args, format='q', **kwargs):
"""
- Evaluate a query on the connected q process over IPC.
+ Query the connected q process over IPC.
Parameters:
- query: A q expression to be evaluated.
- *args: Arguments to the q query. Each argument will be converted into a `pykx.K`
- object. Up to 8 arguments can be provided, as that is the maximum
- supported by q.
- format: What execution format is to be used, should the function use the `qsql`
- interface, execute a `sql` query or run `q` code.
+ query: A q expression to be evaluated. This must be valid q, qSQL or SQL in the KX
+ Insights style.
+ *args: Arguments to the query. Each argument will be converted into a `#!python pykx.K`
+ object. Up to 8 arguments can be provided (maximum supported by q functions).
+ format: Description of query format for internal pre-processing before the query is sent
+ to the server. This must be one of 'q', 'qsql' or 'sql'.
Raises:
RuntimeError: A closed IPC connection was used.
@@ -195,8 +182,7 @@ def query(self, query: str, *args, format='q', **kwargs):
Examples:
- Connect to a q process at `localhost` on port `5050` as a streamlit connection,
- querying using q
+ Open a connection to a locally running q process on port 5050 and query using 'q' format.
```python
>>> import streamlit as st
@@ -207,8 +193,8 @@ def query(self, query: str, *args, format='q', **kwargs):
>>> st.dataframe(df)
```
- Connect to a q process at `localhost` on port `5050` as a streamlit connection,
- querying using qsql
+ Open a connection to a locally running q process on port 5050 and query using 'qsql'
+ format.
```python
>>> import streamlit as st
@@ -219,8 +205,8 @@ def query(self, query: str, *args, format='q', **kwargs):
>>> st.dataframe(df)
```
- Connect to a q process at `localhost` on port `5050` as a streamlit connection,
- querying using sql
+ Connect to a locally running q process on port 5050 and query using 'sql'
+ format.
```python
>>> import streamlit as st
@@ -231,7 +217,6 @@ def query(self, query: str, *args, format='q', **kwargs):
>>> st.dataframe(df)
```
"""
- _check_beta('Streamlit Integration')
_check_streamlit()
def _query(query: str, format, args, kwargs):
diff --git a/src/pykx/system.py b/src/pykx/system.py
index 5bfcbfd..3c10e22 100644
--- a/src/pykx/system.py
+++ b/src/pykx/system.py
@@ -3,7 +3,7 @@
from pathlib import Path
from warnings import warn
-from . import Q, wrappers as k
+from . import help, Q, wrappers as k
from .exceptions import PyKXWarning, QError
@@ -27,6 +27,10 @@ def __init__(self, q: Q):
def __call__(self, x):
return self._q('{system x}', k.CharVector(x))
+ @property
+ def __doc__(self):
+ return help.qhelp('system')
+
def tables(self, namespace=None):
"""Lists the tables associated with a namespace/dictionary
diff --git a/src/pykx/tick.py b/src/pykx/tick.py
new file mode 100644
index 0000000..bb088ca
--- /dev/null
+++ b/src/pykx/tick.py
@@ -0,0 +1,1679 @@
+"""
+Functionality for the generation and management of streaming infrastructures using PyKX.
+Fully described [here](../user-guide/advanced/streaming/index.md), this allows users to ingest,
+persist and query vast amounts of real-time and historical data in a unified data-format.
+"""
+
+from .exceptions import QError
+from .util import start_q_subprocess
+from .ipc import SyncQConnection
+from . import wrappers as k
+
+import inspect
+import os
+import time
+from typing import Callable, Union
+
+import dill
+
+__all__ = [
+ 'TICK',
+ 'RTP',
+ 'HDB',
+ 'GATEWAY',
+ 'BASIC'
+]
+
+
+def _init(_q):
+ global q
+ q = _q
+
+
+def __dir__():
+ return __all__
+
+
+class STREAMING:
+ """
+ The `STREAMING` class acts as a base parent class for the TICK, RTP, HDB and GATEWAY
+ class objects. Each of these child classes inherit and may modify the logic of this parent.
+ In all cases the functions [`libraries`](#pykx.tick.STREAMING.libraries) and
+ [`register_api`](#pykx.tick.STREAMING.register_api) for example have the same definition
+ and are available to all process types.
+
+ Unless provided with a separate definition as is the case for `start` in all class types
+ a user should assume that the logic used for use of `register_api` is consistent across
+ process types.
+ """
+ def __init__(self,
+ port: int = 5010,
+ *,
+ process_logs: Union[str, bool] = False,
+ libraries: dict = None,
+ apis: dict = None,
+ init_args=None
+ ) -> None:
+ self._port = port
+ self._libraries = libraries
+ self._apis = apis
+ self._init_args = init_args
+ self._init_config = None
+ self._connection = None
+ try:
+ if not self._initialized:
+ self._initalized=False
+ except BaseException:
+ self._initialized=False
+ self.server = start_q_subprocess(self._port, load_file='pykx.q', init_args=init_args)
+ if self.server is None:
+ try:
+ self.server = start_q_subprocess(self._port,
+ load_file='pykx.q',
+ init_args=init_args)
+ except BaseException as err:
+ raise QError(f'Unable to initialize q process with error {str(err.value)}')
+ try:
+ connection = SyncQConnection(port=port)
+ self._connection = connection
+ self._process_logs = process_logs
+ if isinstance(process_logs, str):
+ connection('{system"1 ",string[x];system"2 ",string x}', process_logs)
+ if libraries is not None:
+ self.libraries(libraries)
+ if isinstance(apis, dict):
+ for key, value in apis.items():
+ self.register_api(key, value)
+ except BaseException as err:
+ self.stop()
+ raise err
+
+ def __call__(self, *args) -> k.K:
+ """
+ Execute a synchronous call against the connected process
+
+ Parameters:
+ *args: Pass supplied arguments to the `pykx.SyncQConnection`
+ object
+
+ Returns:
+ The result of the executed call on the connection object
+
+ Example:
+
+ ```python
+ >>> import pykx as kx
+ >>> tick = kx.tick.TICK(port=5030)
+ >>> tick('1+1').py()
+ 2
+ ```
+ """
+ return self._connection(*args)
+
+ def start(self,
+ config: dict = None,
+ print_init: bool = True,
+ custom_start: str = '') -> None:
+ """
+ Start/initialise processing of messages on the associated sub-process.
+ This allows users to split the process initialisation from processing
+ of data to allow additional configuration/setup to be completed before
+ messages begin to be processed.
+
+ Parameters:
+ config: A dictionary passed to the sub-process which is used by
+ the function `.tick.init` when the process is started, the
+ supported parameters for this function will be different
+ depending on process type.
+ print_init: A boolean indicating if during initialisation
+ we should print a message stating that the process is being
+ initialised successfully.
+
+ Returns:
+ On successful start this functionality will return None,
+ otherwise will raise an error
+
+ Example:
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> tick = kx.tick.TICK(port=5030, tables={'trade': trade})
+ Initialising Tickerplant process on port: 5030
+ Tickerplant initialised successfully on port: 5030
+ >>> tick.start()
+ Starting Tickerplant data processing on port: 5030
+ Tickerplant process successfully started on port: 5030
+ ```
+ """
+ if print_init:
+ print(f'Starting {self._name} {custom_start} on port: {self._port}')
+ if config is None:
+ config={}
+ if not isinstance(config, dict):
+ raise TypeError('Supplied configuration must be a "dict" object')
+ self._init_config=config
+ self._connection('.pykx.setdefault["k"]')
+ self._connection('.tick.init', config)
+ if print_init:
+ print(f'{self._name} {custom_start} successfully started on port: {self._port}\n')
+ self._initialized=True
+
+ def stop(self):
+ """
+ Stop processing on the sub-process and kill the process.
+ This allows the port on which the process is deployed to be reclaimed
+ and the process to be restarted if appropriate.
+
+ Example:
+
+ ```python
+ >>> import pykx as kx
+ >>> tick = kx.tick.TICK(port=5030)
+ Initialising Tickerplant process on port: 5030
+ Tickerplant initialised successfully on port: 5030
+ >>> tick.stop()
+ Tickerplant process on port 5030 being stopped
+ Tickerplant successfully shutdown on port 5030
+ ```
+ """
+ print(f'{self._name} process on port {self._port} being stopped')
+ self.server.stdin.close()
+ self.server.kill()
+ time.sleep(1)
+ print(f'{self._name} successfully shutdown on port {self._port}\n')
+
+ def libraries(self, libs: dict = None) -> None:
+ """
+ Specify and load the Python libraries which should be available on a
+ process, the libraries should be supplied as a dictionary mapping
+ the alias used for calling the library to the library name.
+
+ Parameters:
+ libs: A dictionary mapping the alias by which a Python library will be
+ referred to the name of library
+
+ Example:
+
+ - In the following example we denote that the process should have access
+ to the Python libraries `numpy` and `pykx` which when called by a user
+ will be referred to as `np` and `kx` respectively
+
+ ```python
+ >>> import pykx as kx
+ >>> tick = kx.tick.TICK(port=5030)
+ Initialising Tickerplant process on port: 5030
+ Tickerplant initialised successfully on port: 5030
+ >>> tick.libraries({'np': 'numpy', 'kx': 'pykx'})
+ ```
+ """
+ if libs is None:
+ raise ValueError('No libraries provided')
+ if not isinstance(libs, dict):
+ raise TypeError('Provided libraries not of type dict')
+ for key, value in libs.items():
+ self._connection('{.pykx.pyexec"import ",string[y]," as ",string x}',
+ key,
+ value)
+
+ def register_api(self, api_name: str, function: Callable) -> None:
+ """
+ Define a registered API to be callable by name on a process,
+ this API can be a Python function or a PyKX
+ lambda/projection.
+
+ Parameters:
+ api_name: The name by which the provided function will be called
+ on the process
+ function: The function which is to be defined as a callable API on
+ the process, in the case of a Python function this must be a
+ single independent function which is callable using libraries
+ available on the process
+
+ Example:
+
+ ```python
+ >>> import pykx as kx
+ >>> def custom_func(num_vals, added_value):
+ ... return added_value + kx.q.til(num_vals)
+ >>> hdb = kx.tick.HDB(port=5031)
+ >>> hdb.libraries({'kx': 'pykx'})
+ >>> hdb.register_api('custom_api', custom_func)
+ >>> hdb('custom_api', 5, 10)
+ pykx.LongVector(pykx.q('10 11 12 13 14'))
+ ```
+ """
+ print(f"Registering callable function '{api_name}' on port {self._port}")
+ if isinstance(function, k.Function):
+ self._connection('set', api_name, function)
+ else:
+ try:
+ src = dill.source.getsource(function)
+ except BaseException:
+ src = inspect.getsource(function)
+ self._connection('{.pykx.pyexec x;z set .pykx.get[y;<]}',
+ bytes(src, 'UTF-8'),
+ function.__name__,
+ api_name)
+ print(f"Successfully registed callable function '{api_name}' on port {self._port}")
+
+ def set_timer(self, timer: int = 1000) -> None:
+ """
+ Set a timer on the connected process, this allows users to configure
+ the intervals at which data is published for example.
+
+ Parameters:
+ timer: The interval at which the timer is triggered in milliseconds.
+
+ Returns:
+ On successful execution this will return None
+ """
+ self._connection('{system"t ",string[x]}', timer)
+
+
+class TICK(STREAMING):
+ """
+ Initialise a tickerplant subprocess establishing a communication connection.
+ This can either be a process which publishes data to subscribing processes only
+ (chained) or a process which logs incoming messages for replay and triggers
+ end-of-day events on subscribing processes.
+
+ Parameters:
+ port: The port on which the tickerplant process will be established
+ process_logs: Should the logs of the generated tickerplant process be published
+ to standard-out of the Python process (True), suppressed (False) or
+ published to a supplied file-name
+ tables: A dictionary mapping the names of tables and their schemas which are
+ used to denote the tables which the tickerplant will process
+ hard_reset: Reset logfiles for the current date when starting tickerplant
+ log_directory: The location of the directory to which logfiles will be published
+ chained: If the tickerplant is 'chained' or not, if chained the process will
+ not log messages or run end-of-day processing
+ init_args: A list of arguments passed to the initialized q process at startup
+ denoting the command line options to be used for the initialized q process
+ see [here](https://code.kx.com/q/basics/cmdline/) for a full breakdown.
+
+ Returns:
+ On successful initialisation will initialise the tickerplant process and set
+ appropriate configuration
+
+ Examples:
+
+ Initialise a tickerplant on port 5030, defining a trade table.
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> tick = kx.tick.TICK(port=5030, tables={'trade': trade})
+ Initialising Tickerplant process on port: 5030
+ Tickerplant initialised successfully on port: 5030
+ ```
+
+ Initialise a chained tickerplant on port 5031 receiving messages from an upstream
+ tickerplant on port 5030. Publish stdout/stderr from the process to a file
+ 'test.log'.
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> tick = kx.tick.TICK(port=5030, tables={'trade': trade}, process_logs='test.log')
+ Initialising Tickerplant process on port: 5030
+ Tickerplant initialised successfully on port: 5030
+ >>> tick.start()
+ Starting Tickerplant data processing on port: 5030
+ Tickerplant process successfully started on port: 5030
+ >>>
+ >>> tick_chained = kx.tick.TICK(port=5031, chained=True)
+ Initialising Tickerplant process on port: 5031
+ Tickerplant initialised successfully on port: 5031
+ >>> tick_chained.start({'tickerplant': 'localhost:5030'})
+ Starting Tickerplant data processing on port: 5031
+ Tickerplant process successfully started on port: 5031
+ ```
+ """
+ def __init__(self,
+ port: int = 5010,
+ *,
+ process_logs: Union[bool, str] = True,
+ tables: dict = None,
+ log_directory: str = None,
+ hard_reset: bool = False,
+ chained: bool = False,
+ init_args: list = None) -> None:
+ self._chained = chained
+ self._tables=tables
+ self._name = 'Tickerplant'
+
+ print(f'Initialising {self._name} process on port: {port}')
+ super().__init__(port, process_logs=process_logs, init_args=init_args)
+ self._log_directory = os.getcwd() if log_directory is None else log_directory
+ try:
+ self._connection('{.tick.hardReset:x}', hard_reset)
+ self._connection('{.tick.logdir:$[x~(::);();string[x]]}', log_directory)
+ if chained:
+ self._connection('.pykx.loadExtension["chained_tick"]')
+ else:
+ self._connection('.pykx.loadExtension["plant"]')
+ if isinstance(tables, dict):
+ self.set_tables(tables)
+ except BaseException as err:
+ print(f'{self._name} failed to initialise on port: {port}\n')
+ if self._connection is not None:
+ self.server.stop()
+ raise err
+ print(f'{self._name} initialised successfully on port: {port}\n')
+
+ def start(self, config: dict = None) -> None:
+ """
+ Start/initialise processing of messages on the associated tickerplant sub-process.
+ This allows users to split the process initialisation from processing
+ of data to allow additional configuration/setup to be completed before
+ messages begin to be processed.
+
+ Parameters:
+ config: A dictionary passed to the sub-process which is used by
+ the function `.tick.init` when the process is started, the
+ supported parameters for this function will be different
+ depending on process type.
+
+ Returns:
+ On successful start this functionality will return None,
+ otherwise will raise an error
+
+ Example:
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> tick = kx.tick.TICK(port=5030, tables={'trade': trade})
+ >>> tick.start()
+ ```
+ """
+ print(f'Starting {self._name} data processing on port: {self._port}')
+ if not self._chained:
+ if self._connection('.tick.tabs').py() == []:
+ raise QError('Unable to initialise TICKERPLANT without tables '
+ 'set using "set_tables"')
+ super().start(config, print_init=False, custom_start='data processing')
+ print(f'{self._name} process successfully started on port: {self._port}\n')
+
+ def restart(self) -> None:
+ """
+ Restart and re-initialise the Tickerplant, this will
+ start the processes with all tables defined on the expected port
+
+ Example:
+
+ Restart a Tickerplant validating that the expected tables are
+ appropriately defined
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> tick = kx.tick.TICK(port=5030, tables={'trade': trade})
+ Initialising Tickerplant process on port: 5030
+ Tickerplant initialised successfully on port: 5030
+ >>> tick.start()
+ Starting Tickerplant data processing on port: 5030
+ Tickerplant process successfully started on port: 5030
+ >>> tick.restart()
+ Restarting Tickerplant on port 5030
+
+ Tickerplant process on port 5030 being stopped
+ Tickerplant successfully shutdown on port 5030
+
+ Initialising Tickerplant process on port: 5030
+ Tickerplant initialised successfully on port: 5030
+
+ Tickerplant on port 5030 successfully restarted
+ >>> tick('trade')
+ pykx.Table(pykx.q('
+ time sym exchange sz px
+ -----------------------
+ '))
+ ```
+ """
+ print(f'Restarting {self._name} on port {self._port}\n')
+ self.stop()
+ self.__init__(port=self._port,
+ process_logs=self._process_logs,
+ tables=self._tables,
+ log_directory=self._log_directory,
+ chained=self._chained)
+ if self._init_config is not None:
+ self.init(config=self._init_config)
+ print(f'{self._name} on port {self._port} successfully restarted\n')
+
+ def set_tables(self, tables: dict) -> None:
+ """
+ Define the tables to be used for consuming and serving messages on
+ the tickerplant process.
+
+ Parameters:
+ tables: A dictionary mapping the name of a table to be defined on
+ the process to the table schema
+
+ Returns:
+ On the tickerplant persist the table schema as the supplied name
+
+ Example:
+
+ Set a table 'trade' with a supplied schema on a tickerplant process
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> tick = kx.tick.TICK(port=5030)
+ >>> tick.set_tables({'trade': trade})
+ >>> tick('trade')
+ pykx.Table(pykx.q('
+ time sym exchange sz px
+ -----------------------
+ '))
+ ```
+ """
+ for key, value in tables.items():
+ if not isinstance(key, str):
+ raise QError('Provided table name must be an "str"')
+ if not isinstance(value, k.Table):
+ raise QError('Provided table schema must be an "kx.Table"')
+ if not q('~', ['time', 'sym'], value.columns[:2]):
+ raise QError("'time' and 'sym' must be first two columns "
+ f"in Table: {key}")
+ self._connection('.tick.set_tables', key, value)
+
+ def set_snap(self, snap_function: Callable) -> None:
+ """
+ Define a 'snap' function used by KX Dashboards UI to manage the data
+ presented to a Dashboard process when subscribing to data from a
+ Tickerplant process.
+
+ Parameters:
+ snap_function: A Python function or callable PyKX Lambda which takes
+ a single argument and returns the expected tabular dataset for
+ display
+
+ Returns:
+ On successful execution will set the streaming function `.u.snap` and
+ return None
+
+ Example:
+
+ Implement a ring-buffer to provide the most recent 1,000 datapoints
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> tick = kx.tick.TICK(port=5030)
+ >>> def buffer_ring(x):
+ ... if 1000 < len(kx.q['trade']):
+ ... return trade
+ ... else:
+ ... kx.q['trade'][-1000:]
+ >>> tick.set_
+ ```
+ """
+ if not isinstance(snap_function, Callable):
+ raise QError('Provided snap_function is not a callable function')
+ self.register_api('.u.snap', snap_function)
+
+
+# The below is named real-time processing to allow for a distinction between an RDB and RTE
+# to not be required at initialisation ... naming is hard
+class RTP(STREAMING):
+ """
+ Initialise a Real-Time Processor (RTP), establishing a communication connection to this
+ process. An RTP at it's most fundamental level comprises the following actions and is
+ known as a 'vanilla' RTP:
+
+ 1. Receives messages from an upstream tickerplant process via subscription.
+ 2. Inserts data into an in-memory table which will be written to disk at a defined
+ time interval.
+ 3. Triggers end-of-day processing which writes the data to disk and telling connected
+ historical databases to reload if needed.
+
+ In a more complex case an RTP will run analytics on data prior to and post data insert
+ as noted in step 2 above. These analytics can either be Python or q/PyKX functions.
+ Additionally users can define 'apis' on the server which can be called explicitly
+ by users.
+
+ Parameters:
+ port: The port on which the RTP process will be established
+ process_logs: Should the logs of the generated RTP process be published
+ to standard-out of the Python process (True), suppressed (False) or
+ published to a supplied file-name
+ libraries: A dictionary mapping the alias by which a Python library will be
+ referred to the name of library
+ subscriptions: A list of tables (str) from which to receive updates, if None
+ the RTP will receive updates from all tables
+ apis: A dictionary mapping the names to be used by users when calling a
+ defined API to the callable Python functions or PyKX lambdas/projections
+ which will be called.
+ vanilla: In the case that the RTP is defined as 'vanilla' data received
+ from a downstream tickerplant will be inserted into an in-memory table.
+ If vanilla is False then a 'pre_processor' and 'post_processor' function
+ can be defined using the below parameters to modify data prior to and post
+ insert.
+ pre_processor: A function taking the name of a table and message as parameters,
+ this function should/can modify the message prior to insertion into an
+ in-memory table. If this function returns `None` the processing of that
+ message will be terminated and the data will not be inserted to the table.
+ post_processor: A function taking the name of a table and message as parameters,
+ this function can publish data to other processes, update global variables etc.
+ In most examples post_processor functions are used to publish data to a
+ tickerplant or persist derived analytics for use by other users.
+ init_args: A list of arguments passed to the initialized q process at startup
+ denoting the command line options to be used for the initialized q process
+ see [here](https://code.kx.com/q/basics/cmdline/) for a full breakdown.
+
+ Returns:
+ On successful initialisation will initialise the RTP process and set
+ appropriate configuration
+
+ Examples:
+
+ Initialise a vanilla Real-Time Processor on port 5032 subscribing to all messages
+ from a tickerplant on port 5030.
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> tick = kx.tick.TICK(port=5030, tables={'trade': trade}, process_logs='test.log')
+ Initialising Tickerplant process on port: 5030
+ Tickerplant initialised successfully on port: 5030
+ >>> tick.start()
+ Starting Tickerplant data processing on port: 5030
+ Tickerplant process successfully started on port: 5030
+ >>>
+ >>> rdb = kx.tick.RTP(port=5032)
+ Initialising Real-time processor on port: 5032
+ Real-time processor initialised successfully on port: 5032
+ >>> rdb.start({'tickerplant': 'localhost:5030'})
+ Starting Real-time processing on port: 5032
+ Real-time processing successfully started on port: 5032
+ ```
+
+ Initialise a vanilla Real-Time Processor on port 5032 logging process logs to 'test.log',
+ subscribing to a table `trade` only and defining a query API named `custom_query`.
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> tick = kx.tick.TICK(port=5030, tables={'trade': trade})
+ >>> tick.start()
+ >>>
+ >>> def query_api(table):
+ ... return kx.q.qsql.select(table)
+ >>> rdb = kx.tick.RTP(
+ ... port=5032,
+ ... process_logs='test.log',
+ ... libraries = {'kx': 'pykx'},
+ ... api={'custom_query': query_api}
+ ... )
+ Initialising Real-time processor on port: 5032
+ Registering callable function 'custom_query' on port 5032
+ Successfully registed callable function 'custom_query' on port 5032
+ Real-time processor initialised successfully on port: 5032
+ >>> rdb.start({'tickerplant': 'localhost:5030'})
+ Starting Real-time processing on port: 5032
+ Real-time processing successfully started on port: 5032
+ ```
+
+ Initialise a complex Real-Time Processor which includes data pre-processing
+ prior to insertion of data into the Real-Time Database and which contains a
+ post-processing step to derive analytics after data has been inserted into the
+ in-memory table.
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> tick = kx.tick.TICK(port=5030, tables={'trade': trade})
+ >>> tick.start()
+ >>> def pre_process(table, message):
+ ... if table in ['trade', 'quote']:
+ ... return message
+ ... else:
+ ... return None
+ >>> def post_process(table, message):
+ ... tradeagg = kx.q.qsql.select('trade',
+ ... columns={'trdvol': 'sum px*sz',
+ ... 'maxpx': 'max px',
+ ... 'minpx': 'min px'},
+ ... by='sym')
+ ... quoteagg = kx.q.qsql.select('quote',
+ ... columns={'maxbpx': 'max bid',
+ ... 'minapx': 'min ask',
+ ... 'baspread': 'max[bid]-min[ask]'},
+ ... by='sym')
+ ... tab = tradeagg.merge(quoteagg, how='left', q_join=True).reset_index()
+ ... tab['time'] = kx.TimespanAtom('now')
+ ... kx.q['aggregate'] = kx.q.xcols(['time', 'sym'], tab)
+ ... return None
+ >>> rte = kx.tick.RTP(port=5031,
+ ... libraries = {'kx': 'pykx'},
+ ... subscriptions = ['trade', 'quote'],
+ ... pre_processor = q_preproc,
+ ... post_processor = rte_post_analytic,
+ ... vanilla=False)
+ >>> rte.start({'tickerplant': 'localhost:5030'})
+ ```
+ """
+ def __init__(self,
+ port: int = 5011,
+ *,
+ process_logs: Union[bool, str] = True,
+ libraries: dict = None,
+ subscriptions: str = None,
+ apis: dict = None,
+ vanilla: bool = True,
+ pre_processor: Callable = None,
+ post_processor: Callable = None,
+ init_args: list = None) -> None:
+ self._subscriptions=subscriptions
+ self._pre_processor=pre_processor
+ self._post_processor=post_processor
+ self._vanilla = vanilla
+ self._name = 'Real-time'
+
+ print(f'Initialising {self._name} processor on port: {port}')
+ try:
+ super().__init__(port,
+ process_logs=process_logs,
+ libraries=libraries,
+ apis=apis,
+ init_args=init_args)
+ self._connection('{.tick.vanilla:x}', vanilla)
+ self._connection('.pykx.loadExtension["rdb"]')
+ if pre_processor is not None:
+ self.pre_processor(pre_processor)
+ if post_processor is not None:
+ self.post_processor(post_processor)
+ if subscriptions is not None:
+ self.subscriptions(subscriptions)
+ except BaseException as err:
+ print(f'{self._name} processor failed to initialise on port: {port}\n')
+ if self._connection is not None:
+ self.server.stop()
+ raise err
+ print(f'{self._name} processor initialised successfully on port: {port}\n')
+
+ def start(self, config: dict = None) -> None:
+ """
+ Start/initialise processing of messages on the Real-Time Processor.
+ This splits the process initialisation from processing of data to allow
+ additional configuration/setup to be completed before messages begin to
+ be processed.
+
+ Parameters:
+ config: A dictionary passed to the sub-process which is used by
+ the function `.tick.init` when the process is started. The following
+ are the supported config options for RTP processes
+
+ 1. `tickerplant`: a string denoting the host+port of the
+ tickerplant from which messages are received. By default
+ port 5010 will be used
+ 2. `hdb`: a string denoting the host+port of the HDB
+ which will be re-loaded at end-of-day
+ 3. `database: a string denoting the directory where your current
+ days data will be persisted. This should be the same directory
+ as the `database` keyword for your HDB process should it be used.
+ By default the location "db" will be used in the directory PyKX was
+ imported.
+
+ Returns:
+ On successful start this functionality will return None,
+ otherwise will raise an error
+
+ Example:
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> tick = kx.tick.TICK(port=5030, tables={'trade': trade})
+ >>> tick.start()
+ >>> rdb = kx.tick.RTP(port=5032,
+ ... subscriptions = ['trade', 'quote']
+ ... )
+ >>> rdb.start({
+ ... 'tickerplant': 'localhost:5030',
+ ... 'hdb': 'localhost:5031',
+ ... 'database': 'db'})
+ ```
+ """
+ super().start(config, custom_start='processing')
+
+ def restart(self) -> None:
+ """
+ Restart and re-initialise the Real-Time Processor, this will
+ start the processes with all subscriptions, processing functions
+ etc as defined in the initial configuration of the processes.
+
+ Example:
+
+ Restart an RTP process validating that defined API's in the restarted
+ process are appropriately defined
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> tick = kx.tick.TICK(port=5030, tables={'trade': trade})
+ Initialising Tickerplant process on port: 5030
+ Tickerplant initialised successfully on port: 5030
+ >>> tick.start()
+ Starting Tickerplant data processing on port: 5030
+ Tickerplant process successfully started on port: 5030
+ >>>
+ >>> def query_api(table):
+ ... return kx.q.qsql.select(table)
+ >>> rdb = kx.tick.RTP(
+ ... port=5032,
+ ... process_logs='test.log',
+ ... libraries = {'kx': 'pykx'},
+ ... api={'custom_query': query_api}
+ ... )
+ Initialising Real-time processor on port: 5032
+ Registering callable function 'custom_query' on port 5032
+ Successfully registed callable function 'custom_query' on port 5032
+ Real-time processor initialised successfully on port: 5032
+ >>> rdb.start({'tickerplant': 'localhost:5030'})
+ Starting Real-time processing on port: 5032
+ Real-time processing successfully started on port: 5032
+ >>> rdb.restart()
+ Restarting Real-time processor on port 5032
+
+ Real-time processor process on port 5032 being stopped
+ Real-time processor successfully shutdown on port 5032
+
+ Initialising Real-time processor on port: 5032
+ Registering callable function 'custom_query' on port 5032
+ Successfully registed callable function 'custom_query' on port 5032
+ Real-time processor initialised successfully on port: 5032
+
+ Starting Real-time processing on port: 5032
+ Real-time processing successfully started on port: 5032
+
+ Real-time processor on port 5032 successfully restarted
+ >>> rdb('tab:([]5?1f;5?1f)')
+ >>> rdb('custom_query', 'tab')
+ pykx.Table(pykx.q('
+ x x1
+ -------------------
+ 0.3017723 0.3927524
+ 0.785033 0.5170911
+ 0.5347096 0.5159796
+ 0.7111716 0.4066642
+ 0.411597 0.1780839
+ '))
+ ```
+ """
+ print(f'Restarting {self._name} processor on port {self._port}\n')
+ self.stop()
+ self.__init__(port=self._port,
+ process_logs=self._process_logs,
+ libraries=self._libraries,
+ subscriptions=self._subscriptions,
+ apis=self._apis,
+ vanilla=self._vanilla,
+ pre_processor=self._pre_processor,
+ post_processor=self._post_processor)
+ if self._init_config is not None:
+ self.init(config=self._init_config)
+ print(f'{self._name} processor on port {self._port} successfully restarted\n')
+
+ def pre_processor(self, function: Callable) -> None:
+ """
+ Define a pre-processing function on the RTP process which is
+ called prior to inserting data into the Real-Time Database.
+
+ This function must take two parameters:
+
+ 1. table: The name of the table to which data will be inserted
+ 2. message: The data which is to be inserted into the table
+
+ If this function returns `None` or `kx.q('::')` then data processing
+ will not continue for that message and it will not be inserted into
+ the database.
+
+ The pre-processing function should return
+
+ Parameters:
+ function: A callable function or PyKX Lambda taking 2 arguments
+ the name of the table as a `str` and the message to be processed
+
+ Returns:
+ On successful execution of this method the data pre-processing function
+ defined on the RTP server will be updated
+
+ Example:
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> tick = kx.tick.TICK(port=5030, tables={'trade': trade})
+ >>> tick.start()
+ >>> def preprocess(table, message):
+ ... if table in ['trade', 'quote']:
+ ... return message
+ ... else:
+ ... return None
+ >>> rte = kx.tick.RTP(port=5034,
+ ... libraries = {'kx': 'pykx'},
+ ... subscriptions = ['trade', 'quote'],
+ ... vanilla=False)
+ >>> rte.pre_processor(preprocess)
+ ```
+ """
+ if self._vanilla:
+ raise QError('Pre-processing of incoming message not '
+ 'supported in vanilla real-time processor')
+ if isinstance(function, k.Function):
+ self._connection('set', '.tick.RTPPreProc', function)
+ return None
+ try:
+ src = dill.source.getsource(function)
+ except BaseException:
+ src = inspect.getsource(function)
+ self._connection('{.pykx.pyexec x;z set .pykx.get[y;<]}',
+ bytes(src, 'UTF-8'),
+ function.__name__,
+ '.tick.RTPPreProc')
+
+ def post_processor(self, function: Callable) -> None:
+ """
+ Define a post-processing function on the RTP process which is
+ called after inserting data into the Real-Time Database.
+
+ This function must take two parameters:
+
+ 1. table: The name of the table to which data will be inserted
+ 2. message: The data which is to be inserted into the table
+
+ This function can have side-effects and does not expect a return
+
+ Parameters:
+ function: A callable function or PyKX Lambda taking 2 arguments
+ the name of the table as a `str` and the message to be processed
+
+ Returns:
+ On successful execution of this method the data pre-processing function
+ defined on the RTP server will be updated
+
+ Example:
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> tick = kx.tick.TICK(port=5030, tables={'trade': trade})
+ >>> tick.start()
+ >>> def postprocess(table, message):
+ ... tradeagg = kx.q.qsql.select('trade',
+ ... columns={
+ ... 'trdvol': 'sum px*sz',
+ ... 'maxpx': 'max px',
+ ... 'minpx': 'min px'},
+ ... by='sym')
+ ... quoteagg = kx.q.qsql.select('quote',
+ ... columns={
+ ... 'maxbpx': 'max bid',
+ ... 'minapx': 'min ask',
+ ... 'baspread': 'max[bid]-min[ask]'},
+ ... by='sym')
+ ... kx.q['aggregate'] = kx.q.xcols(['time', 'sym'], tab)
+ ... return None
+ >>> rte = kx.tick.RTP(port=5034,
+ ... libraries = {'kx': 'pykx'},
+ ... subscriptions = ['trade', 'quote'],
+ ... vanilla=False)
+ >>> rte.post_processor(postprocess)
+ ```
+ """
+ if self._vanilla:
+ raise QError('Post-processing of incoming message not '
+ 'supported in vanilla real-time processor')
+ if isinstance(function, k.Function):
+ self._connection('set', '.tick.RTPPostProc', function)
+ return None
+ try:
+ src = dill.source.getsource(function)
+ except BaseException:
+ src = inspect.getsource(function)
+ self._connection('{.pykx.pyexec x;z set .pykx.get[y;<]}',
+ bytes(src, 'UTF-8'),
+ function.__name__,
+ '.tick.RTPPostProc')
+
+ def subscriptions(self, sub_list):
+ self._connection('{.tick.subscriptions:x}', sub_list)
+
+
+class HDB(STREAMING):
+ """
+ Initialise a Historical Database (HDB) subprocess establishing a communication connection.
+ This process may contain a loaded database and APIs used for analytic transformations on
+ historical data
+
+ Parameters:
+ port: The port on which the tickerplant process will be established
+ process_logs: Should the logs of the generated tickerplant process be published
+ to standard-out of the Python process (True), suppressed (False) or
+ published to a supplied file-name
+ libraries: A dictionary mapping the alias by which a Python library will be
+ referred to the name of library
+ apis: A dictionary mapping the names to be used by users when calling a
+ defined API to the callable Python functions or PyKX lambdas/projections
+ which will be called.
+ init_args: A list of arguments passed to the initialized q process at startup
+ denoting the command line options to be used for the initialized q process
+ see [here](https://code.kx.com/q/basics/cmdline/) for a full breakdown.
+
+ Returns:
+ On successful initialisation will initialise the HDB process and set
+ appropriate configuration
+
+ Examples:
+
+ Initialise a HDB on port 5035
+
+ ```python
+ >>> import pykx as kx
+ >>> hdb = kx.tick.HDB(port=5035)
+ Initialising HDB process on port: 5035
+ HDB initialised successfully on port: 5035
+ ```
+
+ Initialise a HDB on port 5035, defining a custom api on the process
+ and stating that the library `pykx` must be available.
+
+ ```python
+ >>> import pykx as kx
+ >>> def custom_api(values):
+ ... return kx.q(values)
+ >>> hdb = kx.tick.HDB(
+ ... port=5035,
+ ... libraries={'kx': 'pykx'},
+ ... apis={'hdb_query': custom_api}
+ ... )
+ Initialising HDB process on port: 5035
+ Registering callable function 'hdb_query' on port 5035
+ Successfully registed callable function 'hdb_query' on port 5035
+ HDB initialised successfully on port: 5035
+ >>> hdb('hdb_query', '1+1')
+ pykx.LongAtom(pykx.q('2'))
+ ```
+ """
+ def __init__(self,
+ port: int = 5012,
+ *,
+ process_logs: Union[str, bool] = True,
+ libraries: dict = None,
+ apis: dict = None,
+ init_args: list = None):
+ self._name = 'HDB'
+ self._libraries = libraries
+ self._apis = apis
+ print(f'Initialising {self._name} process on port: {port}')
+ try:
+ super().__init__(port,
+ process_logs=process_logs,
+ apis=apis,
+ libraries=libraries,
+ init_args=init_args)
+ self._connection('.pykx.loadExtension["hdb"]')
+ except BaseException as err:
+ print(f'{self._name} failed to initialise on port: {port}\n')
+ if self._connection is not None:
+ self.server.stop()
+ raise err
+ print(f'{self._name} initialised successfully on port: {port}\n')
+
+ def start(self, database: str = None, config: dict = None) -> None:
+ """
+ Start the Historical Database (HDB) process for analytic/query availability.
+ This command allows for the loading of the Database to be used by the process.
+
+ Parameters:
+ database: The path to the database which is to be loaded on the process.
+ config: A dictionary passed to the sub-process which can be used by
+ the function `.tick.init` when the process is started.
+
+ Returns:
+ On successful start this functionality will return None and load
+ the specified database, otherwise will raise an error.
+
+ Example:
+
+ ```python
+ >>> import pykx as kx
+ >>> hdb = kx.tick.HDB(port=5031)
+ >>> hdb.start(database='/tmp/db')
+ ```
+ """
+ print(f'Starting {self._name} process to allow historical query')
+ if config is None:
+ config = {}
+ self._database=database
+ if database is None:
+ raise QError(f"{self._name} initialisation requires defined 'database'")
+ config['database'] = database
+ super().start(config, print_init=False, custom_start='load')
+ print(f'{self._name} process successfully started\n')
+
+ def restart(self) -> None:
+ """
+ Restart and re-initialise the HDB Process, this will
+ start the processes with validation and api functions
+ etc as defined in the initial configuration of the processes.
+
+ Example:
+
+ Restart a HDB process validating that defined API's in the restarted
+ process are appropriately defined
+
+ ```python
+ >>> import pykx as kx
+ >>> def hdb_api(value):
+ ... return kx.q(value)
+ >>> hdb = kx.tick.HDB(
+ ... port=5035,
+ ... libraries={'kx': 'pykx'},
+ ... apis={'custom_api': gateway_api})
+ Initialising HDB process on port: 5035
+ Registering callable function 'custom_api' on port 5035
+ Successfully registed callable function 'custom_api' on port 5035
+ HDB process initialised successfully on port: 5035
+ >>> hdb('custom_api', '1+1')
+ pykx.LongAtom(pykx.q('2'))
+ >>> hdb.restart()
+ Restarting HDB on port 5035
+
+ HDB process on port 5035 being stopped
+ HDB successfully shutdown on port 5035
+
+ Initialising HDB process on port: 5035
+ Registering callable function 'custom_api' on port 5035
+ Successfully registed callable function 'custom_api' on port 5035
+ HDB process initialised successfully on port: 5035
+
+ HDB process on port 5035 successfully restarted
+ >>> hdb('custom_api', '1+1')
+ pykx.LongAtom(pykx.q('2'))
+ ```
+ """
+ print(f'Restarting {self._name} on port {self._port}\n')
+ self.stop()
+ self.__init__(port=self._port,
+ process_logs=self._process_logs,
+ libraries=self._libraries,
+ apis=self._apis)
+ if self._init_config is not None:
+ self.init(self._database, self._init_config)
+ print(f'{self._name} on port {self._port} successfully restarted\n')
+
+
+class GATEWAY(STREAMING):
+ """
+ Initialise a Gateway subprocess establishing a communication connection.
+ A gateway provides a central location for external users to query named
+ API's within a streaming infrastructure which retrieves data from multiple
+ processes within the infrastructure.
+
+ A gateway within this implementation provides helper functions for the
+ application of basic user validation and functionality to allow custom
+ API's to call named process connections.
+
+ Parameters:
+ port: The port on which the tickerplant process will be established
+ process_logs: Should the logs of the generated tickerplant process be published
+ to standard-out of the Python process (True), suppressed (False) or
+ published to a supplied file-name
+ libraries: A dictionary mapping the alias by which a Python library will be
+ referred to the name of library
+ apis: A dictionary mapping the names to be used by users when calling a
+ defined API to the callable Python functions or PyKX lambdas/projections
+ which will be called.
+ connections: A dictionary passed to the sub-process which is used by
+ maps a key denoting the 'name' to be assigned
+ to a process with the connection string as follows.
+ `:::` where `username` and
+ `password` are optional.
+ connection_validator: A function taking username and password which returns
+ `True` or `False depending on whether connecting user should be
+ allowed to connect or not.
+ init_args: A list of arguments passed to the initialized q process at startup
+ denoting the command line options to be used for the initialized q process
+ see [here](https://code.kx.com/q/basics/cmdline/) for a full breakdown.
+
+ Returns:
+ On successful initialisation will initialise the Gateway process and set
+ appropriate configuration
+
+ Examples:
+
+ Initialise a Gateway defining a callable API against a HDB and RDB process.
+ This will allow free-form function calls on both processes.
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> tick = kx.tick.TICK(port=5030, tables={'trade': trade})
+ >>> tick.start()
+ >>> hdb = kx.tick.HDB(port=5031)
+ >>> hdb.start(database='/tmp/db')
+ >>> rdb = kx.tick.RTP(port=5032)
+ >>> rdb.start({'tickerplant': 'localhost:5030'})
+ >>> def gateway_func(x):
+ ... # The 'module' gateway is a populated class
+ ... # on the PyKX Gateway processes
+ ... rdb_data = gateway.call_port('rdb', b'{x+1}', x)
+ ... hdb_data = gateway.call_port('hdb', b'{x+2}', x)
+ ... return([rdb_data, hdb_data])
+ >>> gw = kx.tick.GATEWAY(
+ ... port=5033,
+ ... connections={'rdb': 'localhost:5032', 'hdb: 'localhost:5031'},
+ ... apis={'custom_api': gateway_func}
+ ... )
+ >>> gw.start()
+ >>> with kx.SyncQConnection(port=5033) as q:
+ ... print(q('custom_api', 2))
+ ```
+ """
+ def __init__(self,
+ port: int = 5010,
+ *,
+ process_logs: Union[str, bool] = False,
+ libraries: dict = None,
+ apis: dict = None,
+ connections: dict = None,
+ connection_validator: Callable = None,
+ init_args: list = None) -> None:
+ self._name = 'Gateway'
+ self._connections=connections
+ self._connection_validator=connection_validator
+
+ print(f'Initialising {self._name} process on port: {port}')
+ super().__init__(port,
+ process_logs=process_logs,
+ libraries=libraries,
+ apis=apis,
+ init_args=init_args)
+ try:
+ self._connection('.pykx.loadExtension["gateway"]')
+ if connection_validator is not None:
+ self.connection_validation(connection_validator)
+ except BaseException as err:
+ print(f'{self._name} failed to initialise on port: {port}\n')
+ if self._connection is not None:
+ self.server.stop()
+ raise err
+ if connections is not None:
+ self._connection('{.gw.ports:x}', connections)
+ print(f'{self._name} process initialised successfully on port: {port}\n')
+
+ def start(self, config: dict = None) -> None:
+ """
+ Start the gateway processes connections to external processes.
+ This supplied configuration will be used to create 'named'
+ inter-process connections with remote processes which can
+ be called by users in their gateway functions.
+
+ Parameters:
+ config: UNUSED
+
+ Returns:
+ On successful start this functionality will return None,
+ otherwise will raise an error
+
+ Example:
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> tick = kx.tick.TICK(port=5030, tables={'trade': trade})
+ >>> def gateway_api(value):
+ ... gw.call('tp', b'{x+1}', value)
+ >>> gw = kx.tick.GATEWAY(
+ ... port=5031,
+ ... connections={'tp': 'localhost:5030'},
+ ... apis={'custom_api': gateway_api})
+ >>> gw.start()
+ ```
+ """
+ super().start(config, custom_start='access')
+
+ def add_connection(self, connections: dict = None):
+ """
+ Add additional callable named connections to a gateway process
+ this functionality is additive to the connections (if established)
+ when configuring a `GATEWAY` process. If the same name is used for
+ two connections the last added connection will be used in function
+ execution.
+
+ Parameters:
+ connections: A dictionary which maps a key denoting the 'name' to
+ be assigned to a process with the connection string containing the
+ host/port information as follows:
+ `:::` where `username` and
+ `password` are optional.
+
+ Example:
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> tick = kx.tick.TICK(port=5030, tables={'trade': trade})
+ >>> def gateway_api(value):
+ ... gw.call('tp', b'{x+1}', value)
+ >>> gw = kx.tick.GATEWAY(
+ ... port=5031,
+ ... apis={'custom_api': gateway_api})
+ >>> gw.add_connection({'tp': 'localhost:5030'})
+ ```
+ """
+ if (connections is None) or not isinstance(connections, dict):
+ raise TypeError('connections must be supplied as a dict object')
+ self._connection('.tick.addConnection', connections)
+
+ def restart(self) -> None:
+ """
+ Restart and re-initialise the Gateway Process, this will
+ start the processes with validation and api functions
+ etc as defined in the initial configuration of the processes.
+
+ Example:
+
+ Restart a Gateway process validating that defined API's in the restarted
+ process are appropriately defined
+
+ ```python
+ >>> import pykx as kx
+ >>> def gateway_api(value):
+ ... return kx.q(value)
+ >>> gateway = kx.tick.GATEWAY(
+ ... port=5035,
+ ... libraries={'kx': 'pykx'},
+ ... apis={'custom_api': gateway_api})
+ Initialising Gateway process on port: 5035
+ Registering callable function 'custom_function' on port 5035
+ Successfully registed callable function 'custom_function' on port 5035
+ Gateway process initialised successfully on port: 5035
+ >>> gateway.start()
+ >>> gateway('gateway_api', '1+1')
+ pykx.LongAtom(pykx.q('2'))
+ >>> gateway.restart()
+ Restarting Gateway on port 5035
+
+ Gateway process on port 5035 being stopped
+ Gateway successfully shutdown on port 5035
+
+ Initialising Gateway process on port: 5035
+ Registering callable function 'custom_function' on port 5035
+ Successfully registed callable function 'custom_function' on port 5035
+ Gateway process initialised successfully on port: 5035
+
+ Gateway process on port 5035 successfully restarted
+ >>> gateway('gateway_api', '1+1')
+ pykx.LongAtom(pykx.q('2'))
+ ```
+ """
+ print(f'Restarting {self._name} on port {self._port}\n')
+ self.stop()
+ self.__init__(port=self._port,
+ process_logs=self._process_logs,
+ libraries=self._libraries,
+ apis=self._apis,
+ connection_validator=self._connection_validator)
+ if self._init_config is not None:
+ self.init(self._init_config)
+ print(f'{self._name} on port {self._port} successfully restarted\n')
+
+ def connection_validation(self, function: Callable) -> None:
+ """
+ Define a function to be used on the Gateway process which validates
+ users connecting to the process. This function should take two
+ inputs, username and password and validate a user connecting is
+ allowed to do so.
+
+ This function should return `True` if a user is permitted to establish
+ a connection and `False` if they are not.
+
+ Parameters:
+ function: A function taking two parameters (username and password) which
+ validates that a user connecting to the process is permitted or not
+ to establish a callable connection.
+
+ Example:
+
+ Define a function on the gateway process to only accept users with the name
+ 'new_user'.
+
+ ```python
+ >>> import pykx as kx
+ >>> def custom_validation(username, password):
+ ... if username != 'new_user':
+ ... return False
+ ... else:
+ ... return True
+ >>> gateway = kx.tick.GATEWAY(port=5034, connection_validator=custom_validation)
+ >>> with kx.SyncQConnection(port=5034, username='user') as q:
+ ... q('1+1')
+ QError: access
+ >>> with kx.SyncQConnection(port=5034, username='new_user') as q:
+ ... q('1+1')
+ pykx.LongAtom(pykx.q('2'))
+ ```
+ """
+ if isinstance(function, k.Function):
+ self._connection('set', '.z.pw', function)
+ return None
+ try:
+ src = dill.source.getsource(function)
+ except BaseException:
+ src = inspect.getsource(function)
+ self._connection('{.pykx.pyexec x;z set .pykx.get[y;<]}',
+ bytes(src, 'UTF-8'),
+ function.__name__,
+ '.z.pw')
+
+
+_default_ports = {'tickerplant': 5010,
+ 'rdb': 5011,
+ 'hdb': 5012}
+
+
+class BASIC:
+ """
+ Initialise a configuration for a basic PyKX streaming workflow.
+
+ This configuration will be used to (by default) start the following processes:
+
+ 1. A Tickerplant process on port 5010 to which messages can be published
+ for logging and consumption by down-stream subscribers.
+ 2. A Real-Time Database process (RDB) on port 5011 which subscribes to the
+ tickerplant and maintains an in-memory representation of all the data
+ consumed that day.
+ 3. If a database is denoted at initialisation initialise a Historical Database (HDB)
+ process which loads the database and makes available historical data to a user.
+
+ With this basic infrastructure users can then add functionality to increase overall
+ complexity of their system.
+
+ Parameters:
+ tables: A dictionary mapping the names of tables and their schemas which are
+ used to denote the tables which the tickerplant will process
+ log_directory: The location of the directory to which logfiles will be published
+ database: The path to the database which is to be loaded on the HDB process and
+ the working directory of the RDB process
+ hard_reset: Reset logfiles for the current date when starting tickerplant
+ ports: A dictionary mapping the process type to the IPC communication port on which it
+ should be made available. Dictionary "Values" must be supplied as integers denoting
+ the desired port while "Keys" should be a str object of value "tickerplant", "rdb"
+ and "hdb".
+
+ Returns:
+ On successful initialisation will initialise the Tickerplant, RDB and HDB processes,
+ setting appropriate configuration
+
+ Examples:
+
+ Configure a Tickerplant and RDB process using default parameters
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> basic = kx.tick.BASIC(tables={'trade': trade})
+ ```
+
+ Configure a Tickerplant, RDB and HDB process architecture loading a database
+ at the location `'/tmp/db'` and persisting the tickerplant logs to
+ the folder `logs`
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> basic = kx.tick.BASIC(
+ ... tables={'trade': trade},
+ ... database='/tmp/db',
+ ... log_directory='logs')
+ ```
+
+ Configure a Tickerplant, RDB and HDB process setting these processes on the
+ ports 5030, 5031 and 5032 respectively
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> basic = kx.tick.BASIC(
+ ... tables={'trade': trade},
+ ... ports={'tickerplant': 5030, 'rdb': 5031, 'hdb': 5032}
+ ```
+ """
+ def __init__(
+ self,
+ tables,
+ *,
+ log_directory='.',
+ hard_reset=False,
+ database=None,
+ ports=_default_ports):
+ self._ports = ports
+ self._tables = tables
+ self._log_directory = log_directory,
+ self._database = database
+ self._hard_reset = hard_reset
+ self.tick = None
+ self.rdb = None
+ self.hdb = None
+ pass
+
+ def start(self) -> None:
+ """
+ Start a basic streaming architecture configured using `kx.tick.BASIC`
+
+ With this basic infrastructure users can then add functionality to increase overall
+ complexity of their system.
+
+ Returns:
+ On successful initialisation will start the Tickerplant, RDB and HDB processes,
+ setting appropriate configuration
+
+ Examples:
+
+ Configure and start a Tickerplant and RDB process using default parameters
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> basic = kx.tick.BASIC(tables={'trade': trade})
+ >>> basic.start()
+ ```
+
+ Configure and start a Tickerplant, RDB and HDB process architecture loading a database
+ at the location `'/tmp/db'` and persisting the tickerplant logs to
+ the folder `logs`
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> basic = kx.tick.BASIC(
+ ... tables={'trade': trade},
+ ... database='/tmp/db',
+ ... log_directory='logs')
+ >>> basic.start()
+ ```
+
+ Configure and start a Tickerplant, RDB and HDB process setting these processes on the
+ ports 5030, 5031 and 5032 respectively
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> basic = kx.tick.BASIC(
+ ... tables={'trade': trade},
+ ... ports={'tickerplant': 5030, 'rdb': 5031, 'hdb': 5032}
+ ```
+ """
+ # Initialise tickerplant
+ try:
+ tick = TICK(
+ port=self._ports['tickerplant'],
+ tables=self._tables,
+ hard_reset=self._hard_reset,
+ log_directory=self._log_directory)
+ self.tick = tick
+ self.tick.start()
+ except BaseException as err:
+ if self.tick is not None:
+ self.tick.stop()
+ raise err
+
+ # Initialise HDB
+ if self._database is not None:
+ try:
+ hdb = HDB(port=self._ports['hdb'])
+ self.hdb = hdb
+ self.hdb.start(database=self._database)
+ except BaseException as err:
+ self.tick.stop()
+ if self.hdb is not None:
+ self.hdb.stop()
+ raise err
+
+ # Initialise RDB
+ try:
+ rdb = RTP(port=self._ports['rdb'])
+ self.rdb = rdb
+ rdb_config = {
+ 'tickerplant': f'localhost:{self._ports["tickerplant"]}',
+ 'hdb': f'localhost:{self._ports["hdb"]}'}
+ if self._database is not None:
+ rdb_config['database'] = self._database
+ self.rdb.start(rdb_config)
+ except BaseException as err:
+ self.tick.stop()
+ if self.hdb is not None:
+ self.hdb.stop()
+ if self.rdb is not None:
+ self.rdb.stop()
+ raise err
+
+ def stop(self):
+ """
+ Stop processing and kill all processes within the streaming workflow.
+ This allows the port on which the process is deployed to be reclaimed
+ and the process to be restarted if appropriate.
+
+ Example:
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> basic = kx.tick.BASIC(
+ ... tables={'trade': trade},
+ ... database='/tmp/db',
+ ... log_directory='logs')
+ >>> basic.start()
+ >>> basic.stop()
+ ```
+ """
+ self.tick.stop()
+ if self.hdb is not None:
+ self.hdb.stop()
+ self.rdb.stop()
+
+ def restart(self):
+ """
+ Restart and re-initialise a Basic streaming infrastructure, this will
+ start the processes with the configuration initially supplied.
+
+ Example:
+
+ ```python
+ >>> import pykx as kx
+ >>> trade = kx.schema.builder({
+ ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom,
+ ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ ... 'px': kx.FloatAtom})
+ >>> basic = kx.tick.BASIC(
+ ... tables={'trade': trade},
+ ... database='/tmp/db',
+ ... log_directory='logs')
+ >>> basic.start()
+ >>> basic.restart()
+ ```
+ """
+ self.tick.restart()
+ if self.hdb is not None:
+ self.hdb.restart()
+ self.rdb.restart()
diff --git a/src/pykx/toq.pyx b/src/pykx/toq.pyx
index 8db005a..cb0baa8 100644
--- a/src/pykx/toq.pyx
+++ b/src/pykx/toq.pyx
@@ -105,7 +105,8 @@ from ._pyarrow import pyarrow as pa
from .cast import *
from . import config
from .config import find_core_lib, k_allocator, licensed, pandas_2, system
-from .constants import INF_INT16, INF_INT32, INF_INT64, NULL_INT16, NULL_INT32, NULL_INT64
+from .constants import NULL_INT16, NULL_INT32, NULL_INT64
+from .constants import INF_INT16, INF_INT32, INF_INT64, INF_NEG_INT16, INF_NEG_INT32, INF_NEG_INT64
from .exceptions import LicenseException, PyArrowUnavailable, PyKXException, QError
from .util import df_from_arrays, slice_to_range
@@ -264,9 +265,9 @@ def _resolve_k_type(ktype: KType) -> Optional[k.K]:
raise TypeError(f'ktype {ktype!r} unrecognized')
-def _default_converter(x, ktype: Optional[KType] = None, *, cast: bool = False, handle_nulls: bool = False):
+def _default_converter(x, ktype: Optional[KType] = None, *, cast: bool = False, handle_nulls: bool = False, strings_as_char: bool = False):
if os.environ.get('PYKX_UNDER_Q', '').lower() == "true":
- return from_pyobject(x, ktype, cast, handle_nulls)
+ return from_pyobject(x, ktype, cast, handle_nulls, strings_as_char=strings_as_char)
raise _conversion_TypeError(x, type(x), ktype)
@@ -275,6 +276,7 @@ def from_none(x: None,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.Identity:
"""Converts `None` into a `pykx.Identity` object.
@@ -309,6 +311,8 @@ def from_none(x: None,
kx = core.ke(math.nan)
elif ktype == k.FloatAtom:
kx = core.kf(math.nan)
+ elif ktype == k.DatetimeAtom:
+ kx = core.kz(math.nan)
elif ktype == k.CharAtom:
kx = core.kc(b' ')
elif ktype == k.SymbolAtom:
@@ -340,6 +344,97 @@ def from_none(x: None,
kx.j = 0
return factory(kx, False)
+def create_inf(ktype: KType):
+ """Create an infinite value of KType.
+
+ Parameters:
+ ktype: Desired `pykx.K` subclass (or type number) for the returned value.
+
+ Returns:
+ An infinite of type KType
+ """
+ cdef core.K kx
+
+ if ktype == k.ShortAtom:
+ kx = core.kh(INF_INT16)
+ elif ktype == k.IntAtom:
+ kx = core.ki(INF_INT32)
+ elif ktype == k.LongAtom:
+ kx = core.kj(INF_INT64)
+ elif ktype == k.RealAtom:
+ kx = core.ke(math.inf)
+ elif ktype == k.FloatAtom:
+ kx = core.kf(math.inf)
+ elif ktype == k.DatetimeAtom:
+ kx = core.kz(math.inf)
+ elif ktype == k.TimestampAtom:
+ kx = core.ktj(-12, INF_INT64)
+ elif ktype == k.MonthAtom:
+ kx = core.ki(INF_INT32)
+ kx.t = -13
+ elif ktype == k.DateAtom:
+ kx = core.ki(INF_INT32)
+ kx.t = -14
+ elif ktype == k.TimespanAtom:
+ kx = core.ktj(-16, INF_INT64)
+ elif ktype == k.MinuteAtom:
+ kx = core.ki(INF_INT32)
+ kx.t = -17
+ elif ktype == k.SecondAtom:
+ kx = core.ki(INF_INT32)
+ kx.t = -18
+ elif ktype == k.TimeAtom:
+ kx = core.ki(INF_INT32)
+ kx.t = -19
+ else:
+ raise NotImplementedError("Retrieval of infinite values not supported for this type")
+ return factory(kx, False)
+
+def create_neg_inf(ktype: KType):
+ """Create a negative infinite value of KType.
+
+ Parameters:
+ ktype: Desired `pykx.K` subclass (or type number) for the returned value.
+
+ Returns:
+ A negative infinite of type KType
+ """
+ cdef core.K kx
+
+ if ktype == k.ShortAtom:
+ kx = core.kh(INF_NEG_INT16)
+ elif ktype == k.IntAtom:
+ kx = core.ki(INF_NEG_INT32)
+ elif ktype == k.LongAtom:
+ kx = core.kj(INF_NEG_INT64)
+ elif ktype == k.RealAtom:
+ kx = core.ke(-math.inf)
+ elif ktype == k.FloatAtom:
+ kx = core.kf(-math.inf)
+ elif ktype == k.DatetimeAtom:
+ kx = core.kz(-math.inf)
+ elif ktype == k.TimestampAtom:
+ kx = core.ktj(-12, INF_NEG_INT64)
+ elif ktype == k.MonthAtom:
+ kx = core.ki(INF_NEG_INT32)
+ kx.t = -13
+ elif ktype == k.DateAtom:
+ kx = core.ki(INF_NEG_INT32)
+ kx.t = -14
+ elif ktype == k.TimespanAtom:
+ kx = core.ktj(-16, INF_NEG_INT64)
+ elif ktype == k.MinuteAtom:
+ kx = core.ki(INF_NEG_INT32)
+ kx.t = -17
+ elif ktype == k.SecondAtom:
+ kx = core.ki(INF_NEG_INT32)
+ kx.t = -18
+ elif ktype == k.TimeAtom:
+ kx = core.ki(INF_NEG_INT32)
+ kx.t = -19
+ else:
+ raise NotImplementedError("Retrieval of infinite values not supported for this type")
+ return factory(kx, False)
_ktype_to_type_number_str = {
k.List: "0h",
@@ -387,6 +482,7 @@ def from_pykx_k(x: k.K,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.K:
"""Converts a `pykx.K` object into a `pykx.K` object.
@@ -515,6 +611,7 @@ def from_int(x: Any,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.IntegralNumericAtom:
"""Converts an `int` into an instance of a subclass of `pykx.IntegralNumericAtom`.
@@ -598,6 +695,7 @@ def from_float(x: Any,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.NonIntegralNumericAtom:
"""Converts a `float` into an instance of a subclass of `pykx.NonIntegralNumericAtom`.
@@ -644,6 +742,7 @@ def from_str(x: str,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> Union[k.CharAtom, k.CharVector, k.SymbolAtom]:
"""Converts a `str` into an instance of a string-like subclass of `pykx.K`.
@@ -671,8 +770,12 @@ def from_str(x: str,
"""
cdef core.K kx
cdef bytes as_bytes = x.encode('utf-8')
+
if ktype is None or issubclass(ktype, k.SymbolAtom):
- kx = core.ks(as_bytes)
+ if strings_as_char:
+ kx = core.kpn(as_bytes, len(as_bytes))
+ else:
+ kx = core.ks(as_bytes)
elif ktype is k.CharAtom:
if len(as_bytes) != 1:
raise ValueError(
@@ -694,6 +797,7 @@ def from_bytes(x: bytes,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> Union[k.SymbolAtom, k.SymbolVector, k.CharAtom]:
"""Converts a `bytes` object into an instance of a string-like subclass of `pykx.K`.
@@ -745,6 +849,7 @@ def from_uuid_UUID(x: UUID,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.GUIDAtom:
"""Converts a `uuid.UUID` into a `pykx.GUIDAtom`.
@@ -785,6 +890,7 @@ def from_list(x: list,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.Vector:
"""Converts a `list` into an instance of a subclass of `pykx.Vector`.
@@ -844,13 +950,13 @@ def from_list(x: list,
if ktype is k.TimestampVector and config.keep_local_times:
x = [y.replace(tzinfo=None) for y in x]
- return from_numpy_ndarray(np.array(x, dtype=np_type), ktype, cast=cast, handle_nulls=handle_nulls)
+ return from_numpy_ndarray(np.array(x, dtype=np_type), ktype, cast=cast, handle_nulls=handle_nulls, strings_as_char=strings_as_char)
except TypeError as ex:
raise _conversion_TypeError(x, 'Python list', ktype) from ex
cdef core.K kx = core.ktn(0, len(x))
for i, item in enumerate(x):
# No good way to specify the ktype for nested types
- kk = toq(item, cast=cast, handle_nulls=handle_nulls)
+ kk = toq(item, cast=cast, handle_nulls=handle_nulls, strings_as_char=strings_as_char)
(kx.G0)[i] = core.r1(_k(kk))
res = factory(kx, False)
if licensed:
@@ -868,6 +974,7 @@ def from_tuple(x: tuple,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.Vector:
"""Converts a `tuple` into an instance of a subclass of `pykx.Vector`.
@@ -921,7 +1028,7 @@ def from_tuple(x: tuple,
"""
if ktype is not None and not issubclass(ktype, k.Vector):
raise _conversion_TypeError(x, 'Python tuple', ktype)
- return from_list(list(x), ktype=ktype, cast=cast, handle_nulls=handle_nulls)
+ return from_list(list(x), ktype=ktype, cast=cast, handle_nulls=handle_nulls, strings_as_char=strings_as_char)
def from_dict(x: dict,
@@ -929,6 +1036,7 @@ def from_dict(x: dict,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.Dictionary:
"""Converts a `dict` into a `pykx.Dictionary`.
@@ -966,7 +1074,7 @@ def from_dict(x: dict,
cast=cast, handle_nulls=handle_nulls)
else:
k_keys = from_list(list(x.keys()), cast=cast, handle_nulls=handle_nulls)
- k_values = from_list(list(x.values()), cast=cast, handle_nulls=handle_nulls)
+ k_values = from_list(list(x.values()), cast=cast, handle_nulls=handle_nulls, strings_as_char=strings_as_char)
kx = core.xD(core.r1(_k(k_keys)), core.r1(_k(k_values)))
return factory(kx, False)
@@ -1011,9 +1119,13 @@ def _listify(x: np.ndarray):
_dtype_to_ktype = {
np.dtype('bool'): k.BooleanVector,
np.dtype('uint8'): k.ByteVector,
+ np.dtype('uint16'): k.IntVector,
+ np.dtype('uint32'): k.LongVector,
+ np.dtype('int8'): k.ShortVector,
np.dtype('int16'): k.ShortVector,
np.dtype('int32'): k.IntVector,
np.dtype('int64'): k.LongVector,
+ np.dtype('float16'): k.RealVector,
np.dtype('float32'): k.RealVector,
np.dtype('float64'): k.FloatVector,
np.dtype('datetime64[s]'): k.TimestampVector,
@@ -1022,6 +1134,7 @@ _dtype_to_ktype = {
np.dtype('datetime64[ns]'): k.TimestampVector,
np.dtype('datetime64[M]'): k.MonthVector,
np.dtype('datetime64[D]'): k.DateVector,
+ np.dtype('timedelta64[us]'): k.TimespanVector,
np.dtype('timedelta64[ns]'): k.TimespanVector,
np.dtype('timedelta64[m]'): k.MinuteVector,
np.dtype('timedelta64[s]'): k.SecondVector,
@@ -1062,6 +1175,7 @@ def from_numpy_ndarray(x: np.ndarray,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.Vector:
"""Converts a `numpy.ndarray` into a `pykx.Vector`.
@@ -1238,6 +1352,9 @@ def from_numpy_ndarray(x: np.ndarray,
Returns:
An instance of a subclass of `pykx.Vector`.
"""
+ if str(x.dtype) == "pykx.uuid":
+ x = x.array
+
ktype = _resolve_ndarray_k_type(x, ktype)
if cast:
@@ -1253,7 +1370,7 @@ def from_numpy_ndarray(x: np.ndarray,
# q doesn't support n-dimensional vectors, so we treat them as lists to preserve the shape
if len(x.shape) > 1:
- return from_list(_listify(x), ktype=k.List, cast=cast, handle_nulls=handle_nulls)
+ return from_list(_listify(x), ktype=k.List, cast=cast, handle_nulls=handle_nulls, strings_as_char=strings_as_char)
elif isinstance(x, np.ma.MaskedArray):
if x.dtype.kind != 'i':
@@ -1263,7 +1380,7 @@ def from_numpy_ndarray(x: np.ndarray,
x = np.ma.MaskedArray(x, copy=False, fill_value=-2 ** (x.itemsize * 8 - 1)).filled()
elif ktype is k.List:
- return from_list(x.tolist(), ktype=k.List, cast=cast, handle_nulls=handle_nulls)
+ return from_list(x.tolist(), ktype=k.List, cast=cast, handle_nulls=handle_nulls, strings_as_char=strings_as_char)
elif ktype is k.CharVector:
if str(x.dtype).endswith('U1'):
@@ -1271,7 +1388,7 @@ def from_numpy_ndarray(x: np.ndarray,
elif str(x.dtype).endswith('S1'):
return from_bytes(b''.join(x))
elif 'S' == x.dtype.char:
- return from_list(x.tolist(), ktype=k.List, cast=None, handle_nulls=None)
+ return from_list(x.tolist(), ktype=k.List, cast=None, handle_nulls=None, strings_as_char=strings_as_char)
raise _conversion_TypeError(x, repr('numpy.ndarray'), ktype)
cdef long long n = x.size
@@ -1291,6 +1408,8 @@ def from_numpy_ndarray(x: np.ndarray,
return factory(kx, False)
elif ktype is k.SymbolVector:
+ if strings_as_char:
+ return from_list(x.tolist(), ktype=k.List, cast=cast, handle_nulls=handle_nulls, strings_as_char=strings_as_char)
kx = core.ktn(ktype.t, n)
for i in range(n):
if x[i] is None:
@@ -1306,7 +1425,7 @@ def from_numpy_ndarray(x: np.ndarray,
dtype = x.dtype
x = x.view(np.int64)
mul = None
- if dtype == np.dtype(' Union[k.Table, k.KeyedTable]:
"""Converts a `pandas.DataFrame` into a `pykx.Table` or `pykx.KeyedTable` as appropriate.
@@ -1525,7 +1647,8 @@ def from_pandas_dataframe(x: pd.DataFrame,
kk = from_dict(
{k: _to_numpy_or_categorical(x[k], k, x) for k in x.columns},
cast=cast,
- handle_nulls=handle_nulls
+ handle_nulls=handle_nulls,
+ strings_as_char=strings_as_char
)
kx = core.xT(core.r1(_k(kk)))
if kx == NULL:
@@ -1538,7 +1661,7 @@ def from_pandas_dataframe(x: pd.DataFrame,
# The trick below helps create a pd.MultiIndex from another base Index
idx = pd.DataFrame(index=[x.index]).index
k_keys = from_pandas_index(idx, cast=cast, handle_nulls=handle_nulls)
- k_values = from_pandas_dataframe(x.reset_index(drop=True), cast=cast, handle_nulls=handle_nulls)
+ k_values = from_pandas_dataframe(x.reset_index(drop=True), cast=cast, handle_nulls=handle_nulls, strings_as_char=strings_as_char)
kx = core.xD(core.r1(_k(k_keys)), core.r1(_k(k_values)))
if kx == NULL:
raise PyKXException('Failed to create k dictionary (keyed table)')
@@ -1554,6 +1677,7 @@ def from_pandas_series(x: pd.Series,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.Vector:
"""Converts a `pandas.Series` into an instance of a subclass of `pykx.Vector`.
@@ -1580,7 +1704,7 @@ def from_pandas_series(x: pd.Series,
"""
arr = _to_numpy_or_categorical(x)
if isinstance(arr, np.ndarray):
- return toq(arr[0] if (1,) == arr.shape else arr, ktype=ktype)
+ return toq(arr[0] if (1,) == arr.shape else arr, ktype=ktype, strings_as_char=strings_as_char)
else:
return arr
@@ -1603,6 +1727,7 @@ def from_pandas_index(x: pd.Index,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> Union[k.Vector, k.Table]:
"""Converts a `pandas.Index` into a `pykx.Vector` or `pykx.Table` as appropriate.
@@ -1659,6 +1784,7 @@ def from_pandas_categorical(x: pd.Categorical,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.Vector:
"""Converts a `pandas.Categorical` into a `pykx.EnumVector`.
@@ -1689,8 +1815,8 @@ def from_pandas_categorical(x: pd.Categorical,
x.categories)
ENUMS.append(name)
else:
- res = q(f"{{if[any not y in {name}; `cast]; `{name}$y@x}}",
- x.codes.astype('int32'),
+ res = q(f"{{if[any not y in {name}; `cast]; `{name}$y@x}}",
+ x.codes.astype('int32'),
x.categories)
return res
@@ -1700,6 +1826,7 @@ def from_pandas_nat(x: type(pd.NaT),
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.TemporalAtom:
"""Converts a `pandas.NaT` into an instance of a subclass of `pykx.TemporalAtom`.
@@ -1763,6 +1890,7 @@ def from_pandas_timedelta(
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.K:
x = x.to_numpy()
if ktype is None:
@@ -1775,6 +1903,7 @@ def from_arrow(x: Union['pa.Array', 'pa.Table'],
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> Union[k.Vector, k.Table]:
"""Converts PyArrow arrays/tables into PyKX vectors/tables, respectively.
@@ -1816,12 +1945,39 @@ def from_arrow(x: Union['pa.Array', 'pa.Table'],
raise _conversion_TypeError(x, 'Arrow extension array', ktype)
return toq(x.to_pandas(), ktype=ktype, cast=cast, handle_nulls=handle_nulls)
+def from_arrow_py(x,
+ ktype: Optional[KType] = None,
+ *,
+ cast: bool = False,
+ handle_nulls: bool = False,
+ strings_as_char: bool = False,
+) -> Union[k.Vector, k.Table]:
+ """Converts PyArrow scalars into PyKX objects.
+
+ Conversions from PyArrow to q are performed by converting the PyArrow object to python
+ first.
+
+ Parameters:
+ x: The `pyarrow` object to be converted.
+ ktype: Desired `pykx.K` subclass (or type number) for the returned value. If `None`,
+ the type is inferred from `x`.
+ cast: Unused.
+ handle_nulls: Unused.
+
+ Returns:
+ A `pykx` object.
+ """
+ if pa is None:
+ raise PyArrowUnavailable
+ return toq(x.as_py(), ktype=ktype, cast=cast, handle_nulls=handle_nulls)
+
def from_datetime_date(x: Any,
ktype: Optional[KType] = None,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.TemporalFixedAtom:
"""Converts a `datetime.date` into an instance of a subclass of `pykx.TemporalFixedAtom`.
@@ -1858,7 +2014,6 @@ def from_datetime_date(x: Any,
Returns:
An instance of a subclass of `pykx.TemporalFixedAtom`.
"""
- # TODO: the `cast is None` should be removed at the next major release (KXI-12945)
if (cast is None or cast) and type(x) is not datetime.date:
x = cast_to_python_date(x)
@@ -1873,6 +2028,7 @@ def from_datetime_time(x: Any,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.TemporalFixedAtom:
if (cast is None or cast) and type(x) is not datetime.time:
x = cast_to_python_time(x)
@@ -1885,10 +2041,11 @@ def from_datetime_datetime(x: Any,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.TemporalFixedAtom:
"""Converts a `datetime.datetime` into an instance of a subclass of `pykx.TemporalFixedAtom`.
- Note: Setting environment variable `KEEP_LOCAL_TIMES` will result in the use of local time zones not UTC time.
+ Note: Setting environment variable `PYKX_KEEP_LOCAL_TIMES` will result in the use of local time zones not UTC time.
By default this function will convert any `datetime.datetime` objects with time zone
information to UTC before converting it to `q`. If you set the environment vairable to 1,
true or True, then the objects with time zone information will not be converted to UTC and
@@ -1928,7 +2085,6 @@ def from_datetime_datetime(x: Any,
Returns:
An instance of a subclass of `pykx.TemporalFixedAtom`.
"""
- # TODO: the `cast is None` should be removed at the next major release (KXI-12945)
if (cast is None or cast) and type(x) is not datetime.datetime:
x = cast_to_python_datetime(x)
@@ -1961,6 +2117,7 @@ def from_datetime_timedelta(x: Any,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.TemporalSpanAtom:
"""Converts a `datetime.timedelta` into an instance of a subclass of `pykx.TemporalSpanAtom`.
@@ -1996,7 +2153,6 @@ def from_datetime_timedelta(x: Any,
Returns:
An instance of a subclass of `pykx.TemporalSpanAtom`.
"""
- # TODO: the `cast is None` should be removed at the next major release (KXI-12945)
if (cast is None or cast) and type(x) is not datetime.timedelta:
x = cast_to_python_timedelta(x)
@@ -2022,6 +2178,7 @@ def from_numpy_datetime64(x: np.datetime64,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.TemporalFixedAtom:
"""Converts a `numpy.datetime64` into an instance of a subclass of `pykx.TemporalFixedAtom`.
@@ -2080,6 +2237,7 @@ def from_numpy_timedelta64(x: np.timedelta64,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.TemporalSpanAtom:
"""Converts a `numpy.timedelta64` into an instance of a subclass of `pykx.TemporalSpanAtom`.
@@ -2135,6 +2293,7 @@ def from_slice(x: slice,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.IntegralNumericVector:
"""Converts a `slice` into an instance of a subclass of `pykx.IntegralNumericVector`.
@@ -2193,6 +2352,7 @@ def from_range(x: range,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.IntegralNumericVector:
"""Converts a `range` into an instance of a subclass of `pykx.IntegralNumericVector`.
@@ -2245,6 +2405,7 @@ def from_pathlib_path(x: Path,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.SymbolAtom:
"""Converts a `pathlib.Path` into a q handle symbol.
@@ -2285,6 +2446,7 @@ def from_ellipsis(x: Ellipsis,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.ProjectionNull:
"""Converts an `Ellipsis` (`...`) into a q projection null.
@@ -2342,6 +2504,7 @@ def from_fileno(x: Any,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.IntAtom:
"""Converts an object with a `fileno` attribute to a `pykx.IntAtom`.
@@ -2388,6 +2551,7 @@ def from_callable(x: Callable,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
) -> k.Composition:
"""Converts a callable object into a q composition.
@@ -2455,6 +2619,7 @@ cpdef from_pyobject(p: object,
ktype: Optional[KType] = None,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
):
# q foreign objects internally are a 2 value list, where the type number has been set to 112
# The first value is a destructor function to be called when q drops the object
@@ -2470,19 +2635,44 @@ def _from_iterable(x: Any,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
):
if type(x) is np.ndarray:
- return from_numpy_ndarray(x, ktype, cast=cast, handle_nulls=handle_nulls)
+ return from_numpy_ndarray(x,
+ ktype,
+ cast=cast,
+ handle_nulls=handle_nulls,
+ strings_as_char=strings_as_char)
elif type(x) is list:
- return from_list(x, ktype, cast=cast, handle_nulls=handle_nulls)
+ return from_list(x,
+ ktype,
+ cast=cast,
+ handle_nulls=handle_nulls,
+ strings_as_char=strings_as_char)
elif type(x) is tuple:
- return from_tuple(x, ktype, cast=cast, handle_nulls=handle_nulls)
+ return from_tuple(x,
+ ktype,
+ cast=cast,
+ handle_nulls=handle_nulls,
+ strings_as_char=strings_as_char)
elif type(x) is dict:
- return from_dict(x, ktype, cast=cast, handle_nulls=handle_nulls)
+ return from_dict(x,
+ ktype,
+ cast=cast,
+ handle_nulls=handle_nulls,
+ strings_as_char=strings_as_char)
elif type(x) is range:
- return from_range(x, ktype, cast=cast, handle_nulls=handle_nulls)
+ return from_range(x,
+ ktype,
+ cast=cast,
+ handle_nulls=handle_nulls,
+ strings_as_char=strings_as_char)
elif type(x) is slice:
- return from_slice(x, ktype, cast=cast, handle_nulls=handle_nulls)
+ return from_slice(x,
+ ktype,
+ cast=cast,
+ handle_nulls=handle_nulls,
+ strings_as_char=strings_as_char)
else:
raise _conversion_TypeError(x, type(x), ktype)
@@ -2492,11 +2682,14 @@ def _from_str_like(x: Any,
*,
cast: bool = False,
handle_nulls: bool = False,
+ strings_as_char: bool = False,
):
if type(x) is str:
- return from_str(x, ktype)
+ return from_str(x, ktype, strings_as_char=strings_as_char)
elif type(x) is bytes:
return from_bytes(x, ktype)
+ elif type(x) is np.bytes_:
+ return from_bytes(x.tolist(), ktype)
elif type(x) is np.ndarray:
return from_numpy_ndarray(x, ktype, cast=cast)
elif type(x) is list:
@@ -2626,12 +2819,11 @@ _converter_from_python_type = {
if not pandas_2:
_converter_from_python_type[pd.core.indexes.numeric.Int64Index] = from_pandas_index
_converter_from_python_type[pd.core.indexes.numeric.Float64Index] = from_pandas_index
-else:
- _converter_from_python_type[pd._libs.tslibs.timedeltas.Timedelta] = from_pandas_timedelta
+
+_converter_from_python_type[pd._libs.tslibs.timedeltas.Timedelta] = from_pandas_timedelta
class ToqModule(ModuleType):
- # TODO: `cast` should be set to False at the next major release (KXI-12945)
- def __call__(self, x: Any, ktype: Optional[KType] = None, *, cast: bool = None, handle_nulls: bool = False) -> k.K:
+ def __call__(self, x: Any, ktype: Optional[KType] = None, *, cast: bool = None, handle_nulls: bool = False, strings_as_char: bool = False) -> k.K:
ktype = _resolve_k_type(ktype)
check_ktype = False
@@ -2672,12 +2864,29 @@ class ToqModule(ModuleType):
converter = from_ellipsis
elif pa is not None and type(x).__module__.startswith('pyarrow') and hasattr(x, 'to_pandas'):
converter = from_arrow
+ elif pa is not None and type(x).__module__.startswith('pyarrow') and hasattr(x, 'as_py'):
+ converter = from_arrow_py
elif hasattr(x, 'fileno'):
converter = from_fileno
elif callable(x): # Check this last because many Python objects are incidentally callable.
converter = from_callable
elif isinstance(x, k.GroupbyTable):
return self(x.tab, ktype=ktype, cast=cast, handle_nulls=handle_nulls)
+ elif isinstance(x, k.Column):
+ return self(x._value)
+ elif isinstance(x, k.QueryPhrase):
+ return self(x._phrase)
+ elif isinstance(x, k.Variable):
+ return self(x._name)
+ elif isinstance(x, k.ParseTree):
+ return self(x._tree)
+ elif isinstance(x, pd._libs.missing.NAType):
+ converter = from_none
+ elif isinstance(x, np.bytes_):
+ x = x.tolist()
+ converter = from_bytes
+ elif isinstance(x, k.PandasUUIDArray):
+ converter = from_numpy_ndarray
else:
converter = _default_converter
if type(ktype)==dict:
@@ -2689,7 +2898,7 @@ class ToqModule(ModuleType):
else:
if not type(x) == pd.DataFrame:
raise TypeError(f"'ktype' not supported as dictionary for {type(x)}")
- return converter(x, ktype, cast=cast, handle_nulls=handle_nulls)
+ return converter(x, ktype, cast=cast, handle_nulls=handle_nulls, strings_as_char=strings_as_char)
# Set the module type for this module to `ToqModule` so that it can be called via `__call__`.
diff --git a/src/pykx/util.py b/src/pykx/util.py
index 9234a76..22b3ba3 100644
--- a/src/pykx/util.py
+++ b/src/pykx/util.py
@@ -1,19 +1,42 @@
from contextlib import contextmanager
from functools import wraps
import inspect
+import io
import os
+from pathlib import Path
import platform
+import signal
+import shutil
+import subprocess
+import sys
+import time
from typing import Any, Callable, Dict, Union
+from zipfile import ZipFile
+from warnings import warn
import pandas as pd
from pandas.core.internals import BlockManager, make_block
-
-from .config import qargs, qhome, qlic
+import requests
+import toml
+
+from .config import (
+ _executable, _get_qexecutable, _get_qhome, allocator, beta_features, ignore_qhome,
+ jupyterq, k_gc, keep_local_times, licensed, load_pyarrow_unsafe, max_error_length,
+ no_pykx_signal, no_qce, pykx_4_1, pykx_config_location, pykx_config_profile,
+ pykx_debug_insights, pykx_dir, pykx_lib_dir, pykx_qdebug, pykx_threading, q_executable, qargs,
+ qhome, qlic, release_gil, skip_under_q, suppress_warnings, use_q_lock)
from ._version import version as __version__
from .exceptions import PyKXException
from .reimporter import PyKXReimport
+try:
+ import psutil
+ _psutil_available = True
+except ImportError:
+ _psutil_available = False
+
+
__all__ = [
'num_available_cores',
'BlockManagerUnconsolidated',
@@ -27,11 +50,20 @@
'normalize_to_bytes',
'normalize_to_str',
'once',
+ 'detect_bad_columns',
'slice_to_range',
'subclasses',
+ 'jupyter_qfirst_enable',
+ 'jupyter_qfirst_disable',
+ 'kill_q_process'
]
+def _init(_q):
+ global q
+ q = _q
+
+
def __dir__():
return sorted(__all__)
@@ -281,53 +313,44 @@ def debug_environment(detailed: bool = False, return_info: bool = False) -> Unio
pykx.qhome: /usr/local/anaconda3/envs/qenv/q
pykx.qlic: /usr/local/anaconda3/envs/qenv/q
pykx.licensed: True
- pykx.__version__: 2.4.3
+ pykx.__version__: 2.5.3.dev646+gfe6232c7.d20241002
pykx.file: /usr/local/anaconda3/lib/python3.8/site-packages/pykx/util.py
**** Python information ****
- sys.version: 3.8.3 (default, Jul 2 2020, 11:26:31)
- [Clang 10.0.0 ]
- pandas: 2.0.3
- numpy: 1.24.4
- pytz: 2023.3.post1
+ sys.version: 3.12.3 (v3.12.3:f6650f9ad7, Apr 9 2024, 08:18:48)
+ pandas: 1.5.3
+ numpy: 1.26.2
+ pytz: 2024.1
which python: /usr/local/bin/python
which python3: /Library/Frameworks/Python.framework/Versions/3.12/bin/python3
- find_libpython: /usr/local/anaconda3/lib/libpython3.8.dylib
+ find_libpython: /Library/Frameworks/Python.framework/Versions/3.12/Python
**** Platform information ****
- platform.platform: macOS-10.16-x86_64-i386-64bit
-
- **** PyKX Environment Variables ****
- PYKX_IGNORE_QHOME:
- PYKX_KEEP_LOCAL_TIMES:
- PYKX_ALLOCATOR:
- PYKX_GC:
- PYKX_LOAD_PYARROW_UNSAFE:
- PYKX_MAX_ERROR_LENGTH:
- PYKX_NOQCE:
- PYKX_Q_LIB_LOCATION:
- PYKX_RELEASE_GIL:
- PYKX_Q_LOCK:
+ platform.platform: macOS-13.0.1-x86_64-i386-64bit
+
+ **** PyKX Configuration Variables ****
+ PYKX_IGNORE_QHOME: False
+ PYKX_KEEP_LOCAL_TIMES: False
+ PYKX_ALLOCATOR: False
+ PYKX_GC: False
+ PYKX_LOAD_PYARROW_UNSAFE: False
+ PYKX_MAX_ERROR_LENGTH: 256
+ PYKX_NOQCE: False
+ PYKX_RELEASE_GIL: False
+ PYKX_Q_LIB_LOCATION: /usr/local/anaconda3/lib/python3.8/site-packages/pykx/lib
+ PYKX_Q_LOCK: False
+ PYKX_SKIP_UNDERQ: False
+ PYKX_Q_EXECUTABLE: /usr/local/anaconda3/envs/qenv/q/m64/q
+ PYKX_THREADING: False
+ PYKX_4_1_ENABLED: False
+ PYKX_QDEBUG: False
+ PYKX_DEBUG_INSIGHTS_LIBRARIES: False
PYKX_DEFAULT_CONVERSION:
- PYKX_SKIP_UNDERQ:
- PYKX_UNSET_GLOBALS:
- PYKX_DEBUG_INSIGHTS_LIBRARIES:
- PYKX_EXECUTABLE: /usr/local/anaconda3/bin/python
+ PYKX_EXECUTABLE: /Library/Frameworks/Python.framework/Versions/3.12/bin/python3.12
PYKX_PYTHON_LIB_PATH:
PYKX_PYTHON_BASE_PATH:
PYKX_PYTHON_HOME_PATH:
- PYKX_DIR: /usr/local/anaconda3/lib/python3.8/site-packages/pykx
- PYKX_QDEBUG:
- PYKX_THREADING:
- PYKX_4_1_ENABLED:
-
- **** PyKX Deprecated Environment Variables ****
- SKIP_UNDERQ:
- UNSET_PYKX_GLOBALS:
- KEEP_LOCAL_TIMES:
- IGNORE_QHOME:
- UNDER_PYTHON:
- PYKX_NO_SIGINT:
+ PYKX_DIR: /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pykx
**** q Environment Variables ****
QARGS:
@@ -342,15 +365,8 @@ def debug_environment(detailed: bool = False, return_info: bool = False) -> Unio
pykx.qlic lics: ['k4.lic']
**** q information ****
- which q: /usr/local/anaconda3/envs/qenv/q/q
- q info:
- (`m64;4f;2020.05.04)
- "insights.lib.embedq insights.lib.pykx..
+ which q: None
```
-
-
-
-
"""
debug_info = ""
debug_info += pykx_information()
@@ -371,7 +387,6 @@ def pykx_information():
pykx_info += f"pykx.qhome: {qhome}\n"
pykx_info += f"pykx.qlic: {qlic}\n"
- from .config import licensed
pykx_info += f"pykx.licensed: {licensed}\n"
pykx_info += f"pykx.__version__: {__version__}\n"
pykx_info += f"pykx.file: {__file__}\n"
@@ -381,7 +396,6 @@ def pykx_information():
def python_information():
py_info = '\n**** Python information ****\n'
try:
- import sys
py_info += f"sys.version: {sys.version}\n"
import importlib.metadata
@@ -409,25 +423,33 @@ def platform_information():
def env_information():
- env_info = '\n**** PyKX Environment Variables ****\n'
-
- envs = ['PYKX_IGNORE_QHOME', 'PYKX_KEEP_LOCAL_TIMES', 'PYKX_ALLOCATOR',
- 'PYKX_GC', 'PYKX_LOAD_PYARROW_UNSAFE', 'PYKX_MAX_ERROR_LENGTH',
- 'PYKX_NOQCE', 'PYKX_Q_LIB_LOCATION', 'PYKX_RELEASE_GIL', 'PYKX_Q_LOCK',
- 'PYKX_DEFAULT_CONVERSION', 'PYKX_SKIP_UNDERQ', 'PYKX_UNSET_GLOBALS',
- 'PYKX_DEBUG_INSIGHTS_LIBRARIES', 'PYKX_EXECUTABLE', 'PYKX_PYTHON_LIB_PATH',
- 'PYKX_PYTHON_BASE_PATH', 'PYKX_PYTHON_HOME_PATH', 'PYKX_DIR', 'PYKX_QDEBUG',
- 'PYKX_THREADING', 'PYKX_4_1_ENABLED'
- ]
-
- for x in envs:
- env_info += f"{x}: {os.getenv(x, '')}\n"
-
- env_info += '\n**** PyKX Deprecated Environment Variables ****\n'
- deps = ['SKIP_UNDERQ', 'UNSET_PYKX_GLOBALS', 'KEEP_LOCAL_TIMES', 'IGNORE_QHOME',
- 'UNDER_PYTHON', 'PYKX_NO_SIGINT']
-
- for x in deps:
+ env_info = '\n**** PyKX Configuration Variables ****\n'
+
+ global_config = {'PYKX_IGNORE_QHOME': ignore_qhome, 'PYKX_KEEP_LOCAL_TIMES': keep_local_times,
+ 'PYKX_ALLOCATOR': allocator, 'PYKX_GC': k_gc,
+ 'PYKX_LOAD_PYARROW_UNSAFE': load_pyarrow_unsafe,
+ 'PYKX_MAX_ERROR_LENGTH': max_error_length, 'PYKX_NOQCE': no_qce,
+ 'PYKX_RELEASE_GIL': release_gil, 'PYKX_Q_LIB_LOCATION': pykx_lib_dir,
+ 'PYKX_Q_LOCK': use_q_lock, 'PYKX_SKIP_UNDERQ': skip_under_q,
+ 'PYKX_Q_EXECUTABLE': q_executable, 'PYKX_THREADING': pykx_threading,
+ 'PYKX_4_1_ENABLED': pykx_4_1, 'PYKX_QDEBUG': pykx_qdebug,
+ 'PYKX_DEBUG_INSIGHTS_LIBRARIES': pykx_debug_insights,
+ 'PYKX_CONFIGURATION_LOCATION': pykx_config_location,
+ 'PYKX_NO_SIGNAL': no_pykx_signal,
+ 'PYKX_CONFIG_PROFILE': pykx_config_profile,
+ 'PYKX_BETA_FEATURES': beta_features, 'PYKX_JUPYTERQ': jupyterq,
+ 'PYKX_SUPPRESS_WARNINGS': suppress_warnings}
+
+ env_only = ['PYKX_DEFAULT_CONVERSION',
+ 'PYKX_EXECUTABLE', 'PYKX_PYTHON_LIB_PATH',
+ 'PYKX_PYTHON_BASE_PATH', 'PYKX_PYTHON_HOME_PATH', 'PYKX_DIR',
+ 'PYKX_USE_FIND_LIBPYTHON'
+ ]
+
+ for k, v in global_config.items():
+ env_info += f"{k}: {v}\n"
+
+ for x in env_only:
env_info += f"{x}: {os.getenv(x, '')}\n"
env_info += '\n**** q Environment Variables ****\n'
@@ -483,3 +505,248 @@ def q_information():
except Exception as e:
q_info += f"Failed to gather q information: {e}"
return q_info
+
+
+def _run_all_cell_with_magics(lines):
+ if "%%python" == lines[0].strip():
+ return lines[1:]
+ elif "%%q" in lines[0].strip():
+ return lines
+ else:
+ return (["%%q \n"]+lines)
+
+
+def jupyter_qfirst_enable():
+ qfirst_modify("q")
+
+
+def jupyter_qfirst_disable():
+ qfirst_modify("python")
+
+
+def qfirst_modify(state):
+ try:
+ ipython = get_ipython()
+ if _run_all_cell_with_magics in ipython.input_transformers_cleanup and state == "python":
+ ipython.input_transformers_cleanup.remove(_run_all_cell_with_magics)
+ print("""PyKX now running in 'python' mode (default). All cells by default will be run as python code.
+Include '%%q' at the beginning of each cell to run as q code. """) # noqa
+ elif _run_all_cell_with_magics not in ipython.input_transformers_cleanup and state == "q":
+ ipython.input_transformers_cleanup.append(_run_all_cell_with_magics)
+ print("""PyKX now running in 'jupyter_qfirst' mode. All cells by default will be run as q code.
+Include '%%python' at the beginning of each cell to run as python code. """) # noqa
+ else:
+ print(f"PyKX already running in '{state}' mode")
+ except NameError:
+ print("Not running under IPython/Jupyter")
+
+
+def add_to_config(config, folder='~'):
+ """
+ Add configuration options to the file '.pykx-config' in a specified folder
+
+ Parameters:
+ config: A dictionary mapping the configuration options to their associated value
+ folder: The folder where the users '.pykx-config' file is to be updated
+
+ Examples:
+
+ ```python
+ >>> import pykx as kx
+ >>> kx.util.add_to_config({'PYKX_GC': 'True', 'PYKX_BETA_FEATURES': 'True'})
+ Configuration updated at: /usr/local/.pykx-config.
+ Profile updated: default.
+ Successfully added:
+ - PYKX_GC = True
+ - PYKX_BETA_FEATURES = True
+ ```
+ """
+ if not isinstance(config, dict):
+ raise TypeError(f'Supplied config must be of type dict, supplied type: {type(config)}')
+ fpath = str(Path(os.path.expanduser(folder)) / '.pykx-config')
+ try:
+ os.access(fpath, os.W_OK)
+ except FileNotFoundError:
+ pass
+ except PermissionError:
+ raise PermissionError(f"You do not have sufficient permissions to write to: {fpath}")
+ print_config = f"\nConfiguration updated at: {fpath}.\nProfile updated: "\
+ f"{pykx_config_profile}.\nSuccessfully added:\n"
+ if os.path.exists(fpath):
+ with open(fpath, 'r') as file:
+ data = toml.load(file)
+ else:
+ data = {pykx_config_profile: {}}
+ for k, v in config.items():
+ data[pykx_config_profile][k] = v
+ print_config += f'\t- {k} = {v}\n'
+ os.environ[k] = v
+ with open(fpath, 'w') as file:
+ toml.dump(data, file)
+ print(print_config)
+
+
+_user_os = {'Linux': 'l64', 'Darwin': 'm64', 'Windows': 'w64'}
+
+_user_arch = {'x86_64': '', 'aarch64': 'arm'}
+
+_kdb_url = 'https://portal.dl.kx.com/assets/raw/kdb+/4.0'
+
+
+def install_q(location: str = '~/q',
+ overwrite_config: bool = False,
+ prompted: bool = False,
+ date: str = '2024.07.08'):
+ """
+ Install q to a specified location.
+
+ Parameters:
+ location: The location to which q will be installed
+ overwrite_config: Should a configuration file in your HOME directory be overwritten?
+ prompted: Should a user be prompted for input requesting configuration overwrite
+ this is used specifically when other functions would be installing q
+ date: The dated version of kdb+ 4.0 which is to be installed
+ """
+ global qhome
+ my_os = _user_os[platform.uname()[0]]
+ if my_os == 'l64':
+ my_os += _user_arch[platform.uname()[4]]
+ location = Path(os.path.expanduser(location))
+ url = f'{_kdb_url}/{date}/{my_os}.zip'
+ r = requests.get(url)
+ if not r.status_code == 200:
+ raise RuntimeError(f'Request for download of q unsuccessful with code: {r.status_code}')
+ zf = ZipFile(io.BytesIO(r.content), 'r')
+ zf.extractall(location)
+ try:
+ executable = 'q.exe' if my_os == 'w64' else 'q'
+ executable_loc = location/my_os/executable
+ os.chmod(executable_loc, 0o777)
+ except BaseException:
+ raise RuntimeError(f"Unable to set execute permissions on file: {executable_loc}")
+ shutil.copy(pykx_dir/'pykx.q', location/'pykx.q')
+ shutil.copy(pykx_dir/'lib/s.k_', location/'s.k_')
+
+ add_to_config({
+ 'PYKX_Q_EXECUTABLE': str(executable_loc),
+ 'QHOME': str(location)})
+ print('Please restart your process to use this executable.')
+
+
+def start_q_subprocess(port: int,
+ load_file: str = '',
+ init_args: list = None,
+ process_logs: bool = True,
+ return_server: bool = True,
+ prompt: bool = True):
+ """
+ Initialize a q subprocess using a supplied path to an executable on a specified port
+
+ Parameters:
+ port: The port on which the q process will be started.
+ init_args: A list denoting any arguments to be passed when starting
+ the q process.
+ process_logs: Should stdout/stderr be printed to in the parent process
+ prompt: Should a user be prompted for input relating to how/where install of q
+ should be completed if not originally available.
+
+ Returns:
+ The subprocess object which was generated on initialisation
+ """
+ q_executable = _get_qexecutable()
+ if q_executable is None:
+ my_os = _user_os[platform.uname()[0]]
+ if my_os == 'l64':
+ my_os += _user_arch[platform.uname()[4]]
+ qhome = _get_qhome()
+ loc = qhome / my_os / _executable
+ if loc.is_file():
+ q_executable = str(loc)
+ else:
+ raise RuntimeError(
+ 'Unable to locate an appropriate q executable\n'
+ 'Please install q using the function "kx.util.install_q" or following the '
+ 'instructions at:\nhttps://code.kx.com/pykx/getting-started/installing.html'
+ )
+ with PyKXReimport():
+ qinit = [q_executable, load_file, '-p', f'{port}']
+ if init_args is not None:
+ if not isinstance(init_args, list):
+ raise TypeError('Supplied additional startup arguments must be a list')
+ if not all(isinstance(s, str) for s in init_args):
+ raise TypeError('All supplied arguments to init_args must be str type objects')
+ qinit.extend(init_args)
+ server = subprocess.Popen(
+ qinit,
+ stdin=subprocess.PIPE,
+ stdout=None if process_logs else subprocess.DEVNULL,
+ stderr=None)
+ time.sleep(2)
+ return server
+
+
+def kill_q_process(port: int) -> bool:
+ """
+ Kill a q process running on a specified port, this allows users to
+ to kill sub-processes running q in the case access to the port has been
+ lost due to parent process
+
+ Parameters:
+ port: The port which is to be killed
+
+ Returns:
+ Kill a process and return None
+ """
+ if not _psutil_available:
+ raise ImportError(
+ 'psutil library not available, install psutil with pip/conda as follows :\n'
+ ' pip -> pip install psutil\n'
+ ' conda -> conda install conda-forge::psutil'
+ )
+ processes = [proc for proc in psutil.process_iter() if proc.name()
+ == 'q']
+ for p in processes:
+ for c in p.connections():
+ if c.status == 'LISTEN' and c.laddr.port == port:
+ try:
+ os.kill(p.pid, signal.SIGKILL)
+ return True
+ except BaseException:
+ return False
+ return False
+
+
+def detect_bad_columns(table, return_cols: bool = False):
+ """
+ Validate that the columns of a table conform to expected naming conventions for kdb+
+ and do not contain duplicates.
+
+ Parameters:
+ table: The `pykx.Table`, `pykx.KeyedTable`, `pykx.SplayedTable` or
+ `pykx.PartitionedTable` object which is to be checked
+ return_cols: Should the invalid columns from the table be returned
+
+ Returns:
+ Raises a warning indicating the issue with the column(s) and returns `True` or `False`
+ if the columns are invalid (`True`) or not (`False`).
+ """
+ cols = []
+ bad_cols = q('.pykx.util.html.detectbadcols', table).py()
+ hasDups, hasInvalid = [len(x) for x in bad_cols.values()]
+ if hasDups or hasInvalid:
+ warn_string = '\nDuplicate columns or columns with reserved characters detected:'
+ if hasDups:
+ warn_string += f'\n\tDuplicate columns: {bad_cols["dup"]}'
+ if hasInvalid:
+ warn_string += f'\n\tInvalid columns: {bad_cols["invalid"]}'
+ warn_string += '\nSee https://code.kx.com/pykx/help/troubleshooting.html to learn more about updating your table' # noqa
+ warn(warn_string, RuntimeWarning)
+ if return_cols:
+ for i in bad_cols.values():
+ cols.extend(i)
+ return cols
+ else:
+ return True
+ if return_cols:
+ return cols
+ return False
diff --git a/src/pykx/wrappers.py b/src/pykx/wrappers.py
index 734ef41..50d19ce 100644
--- a/src/pykx/wrappers.py
+++ b/src/pykx/wrappers.py
@@ -1,4 +1,5 @@
-"""Wrappers for q data structures, with conversion functions to Python/Numpy/Pandas/Arrow.
+"""
+Wrappers for q data structures, with conversion functions to Python/Numpy/Pandas/Arrow.
Under PyKX, q has its own memory space in which it stores q data structures in the same way it is
stored within a regular q process. PyKX provides Pythonic wrappers around these objects in q
@@ -164,27 +165,28 @@
from abc import ABCMeta
from collections import abc
from datetime import datetime, timedelta
+import importlib
from inspect import signature
import math
from numbers import Integral, Number, Real
import operator
from uuid import UUID
from typing import Any, Optional, Tuple, Union
-import warnings
+from warnings import warn
from io import StringIO
import numpy as np
import pandas as pd
import pytz
-from . import _wrappers
+from . import _wrappers, help
from ._pyarrow import pyarrow as pa
-from .config import k_gc, licensed, pandas_2
+from .config import k_gc, licensed, pandas_2, suppress_warnings
from .core import keval as _keval
-from .constants import INF_INT16, INF_INT32, INF_INT64, NULL_INT16, NULL_INT32, NULL_INT64
+from .constants import INF_INT16, INF_INT32, INF_INT64, INF_NEG_INT16, INF_NEG_INT32, INF_NEG_INT64
+from .constants import NULL_INT16, NULL_INT32, NULL_INT64
from .exceptions import LicenseException, PyArrowUnavailable, PyKXException, QError
-from .util import cached_property, classproperty, df_from_arrays, slice_to_range
-
+from .util import cached_property, classproperty, detect_bad_columns, df_from_arrays, slice_to_range
q_initialized = False
@@ -237,55 +239,6 @@ def _rich_convert(x: 'K', stdlib: bool = True, raw=False):
return x.np(raw=raw)
-def _null_gen(x):
- def null():
- """Generate the pykx null representation associated with an atom type
-
- Examples:
-
- ```python
- >>> import pykx as kx
- >>> kx.TimeAtom.null
- pykx.TimeAtom(pykx.q('0Nt'))
- >>> kx.GUIDAtom.null
- pykx.GUIDAtom(pykx.q('00000000-0000-0000-0000-000000000000'))
- ```
- """
- if licensed and x is not None:
- return q(f'{x}')
- elif not licensed:
- raise QError('Generation of null data not supported in unlicensed mode')
- else:
- raise NotImplementedError('Retrieval of null values not supported for this type')
- return null
-
-
-def _inf_gen(x):
- def inf(neg=False):
- """Generate the pykx infinite value associated with an atom type
-
- Parameters:
- neg: Should the return value produce the negative infinity value
-
- Examples:
-
- ```python
- >>> import pykx as kx
- >>> kx.TimeAtom.inf
- pykx.TimeAtom(pykx.q('0Wt'))
- >>> kx.TimeAtom.inf(neg=True)
- pykx.TimeAtom(pykx.q('-0Wt'))
- ```
- """
- if licensed and x is not None:
- return q('{[p]$[p;neg;]'+f'{x}'+'}', neg)
- elif not licensed:
- raise QError('Generation of infinite data not supported in unlicensed mode')
- else:
- raise NotImplementedError('Retrieval of infinite values not supported for this type')
- return inf
-
-
# HACK: This gets overwritten by the toq module to avoid a circular import error.
def toq(*args, **kwargs): # nocov
raise NotImplementedError
@@ -297,12 +250,11 @@ class K:
Parameters:
x (Any): An object that will be converted into a `pykx.K` object via [`pykx.toq`][].
"""
- # TODO: `cast` should be set to False at the next major release (KXI-12945)
- def __new__(cls, x: Any, *, cast: bool = None, **kwargs):
+ def __new__(cls, x: Any, *args, cast: bool = None, **kwargs):
return toq(x, ktype=None if cls is K else cls, cast=cast) # TODO: 'strict' and 'cast' flags
- # TODO: `cast` should be set to False at the next major release (KXI-12945)
- def __init__(self, x: Any, *, cast: bool = None, **kwargs): # Signature must match `__new__`
+ # Signature must match `__new__`
+ def __init__(self, x: Any, *args, cast: bool = None, **kwargs):
pass
def __del__(self):
@@ -558,6 +510,26 @@ def is_inf(self) -> bool:
return False
return q(f'{{any -0W 0W{type_char}~\\:x}}')(self).py()
+ @property
+ def is_pos_inf(self) -> bool:
+ if self.t in {-1, -2, -4, -10, -11}:
+ return False
+ try:
+ type_char = ' bg xhijefcspmdznuvts'[abs(self.t)]
+ except IndexError:
+ return False
+ return q(f'{{0W{type_char}~x}}')(self).py()
+
+ @property
+ def is_neg_inf(self) -> bool:
+ if self.t in {-1, -2, -4, -10, -11}:
+ return False
+ try:
+ type_char = ' bg xhijefcspmdznuvts'[abs(self.t)]
+ except IndexError:
+ return False
+ return q(f'{{-0W{type_char}~x}}')(self).py()
+
def __hash__(self):
return _wrappers.k_hash(self)
@@ -596,9 +568,41 @@ def __rxor__(self, other):
class EnumAtom(Atom):
- """Wrapper for q enum atoms."""
+ """Wrapper for q enum atoms.
+
+ Parameters:
+ variable: The name of a list in q memory.
+ index: An index used in [Enumeration](https://code.kx.com/q/ref/enumeration/).
+ value: An item that is used in [Enumerate](https://code.kx.com/q/ref/enumerate/) and [Enum Extend](https://code.kx.com/q/ref/enum-extend/).
+ extend: A boolean set to True to use [Enum Extend](https://code.kx.com/q/ref/enum-extend/) and False to use [Enumerate](https://code.kx.com/q/ref/enumerate/).
+ """ # noqa: E501
t = -20
+ def __new__(cls, variable, index=None, value=None, extend=False):
+ if not isinstance(variable, (str, SymbolAtom)):
+ raise TypeError("Variable name must be of type String or Symbol.")
+ if not (index is None) ^ (value is None):
+ raise AttributeError("Can only set one of 'value' and 'index' at one time.")
+ if index is not None:
+ return q('!', variable, index)
+ if value is not None:
+ if extend:
+ return q('?', variable, value)
+ else:
+ return q('$', variable, value)
+
+ def value(self):
+ """Returns the value of the enumeration"""
+ return q.value(self)
+
+ def domain(self):
+ """Returns the name of the domain of the enum"""
+ return q.key(self)
+
+ def index(self):
+ """Returns the index of the enum in the q list"""
+ return q('`long$', self)
+
def py(self, *, raw: bool = False, has_nulls: Optional[bool] = None, stdlib: bool = True):
if raw:
return _wrappers.k_j(self)
@@ -652,6 +656,8 @@ class TemporalFixedAtom(TemporalAtom):
def py(self, *, raw: bool = False, has_nulls: Optional[bool] = None, stdlib: bool = True):
if raw:
return self.np(raw=True)
+ if self.is_null:
+ return pd.NaT
return self.np().astype(datetime)
def np(self,
@@ -666,7 +672,7 @@ def np(self,
if self.is_null:
return np.datetime64('NaT')
if self.t == -12:
- epoch_offset = 0 if self.is_inf else self._epoch_offset
+ epoch_offset = 0 if self.is_pos_inf else self._epoch_offset
return np.datetime64(_wrappers.k_j(self) + epoch_offset, self._np_type)
return np.datetime64(_wrappers.k_i(self) + self._epoch_offset, self._np_type)
@@ -690,7 +696,6 @@ class TimeAtom(TemporalSpanAtom):
_np_type = 'ms'
_np_dtype = 'timedelta64[ms]'
- # TODO: `cast` should be set to False at the next major release (KXI-12945)
def __new__(cls, x: Any, *, cast: bool = None, **kwargs):
if (type(x) == str) and x == 'now': # noqa: E721
if licensed:
@@ -702,11 +707,15 @@ def _prototype(self=None):
@classproperty
def null(cls): # noqa: B902
- return _null_gen(cls._null)()
+ return toq.from_none(None, cls)
@classproperty
def inf(cls): # noqa: B902
- return _inf_gen(cls._inf)()
+ return toq.create_inf(cls)
+
+ @classproperty
+ def inf_neg(cls): # noqa: B902
+ return toq.create_neg_inf(cls)
@property
def is_null(self) -> bool:
@@ -716,6 +725,14 @@ def is_null(self) -> bool:
def is_inf(self) -> bool:
return abs(_wrappers.k_i(self)) == INF_INT32
+ @property
+ def is_pos_inf(self) -> bool:
+ return _wrappers.k_i(self) == INF_INT32
+
+ @property
+ def is_neg_inf(self) -> bool:
+ return _wrappers.k_i(self) == INF_NEG_INT32
+
class SecondAtom(TemporalSpanAtom):
"""Wrapper for q second atoms."""
@@ -730,11 +747,15 @@ def _prototype(self=None):
@classproperty
def null(cls): # noqa: B902
- return _null_gen(cls._null)()
+ return toq.from_none(None, cls)
@classproperty
def inf(cls): # noqa: B902
- return _inf_gen(cls._inf)()
+ return toq.create_inf(cls)
+
+ @classproperty
+ def inf_neg(cls): # noqa: B902
+ return toq.create_neg_inf(cls)
@property
def is_null(self) -> bool:
@@ -744,6 +765,14 @@ def is_null(self) -> bool:
def is_inf(self) -> bool:
return abs(_wrappers.k_i(self)) == INF_INT32
+ @property
+ def is_pos_inf(self) -> bool:
+ return _wrappers.k_i(self) == INF_INT32
+
+ @property
+ def is_neg_inf(self) -> bool:
+ return _wrappers.k_i(self) == INF_NEG_INT32
+
class MinuteAtom(TemporalSpanAtom):
"""Wrapper for q minute atoms."""
@@ -758,11 +787,15 @@ def _prototype(self=None):
@classproperty
def null(cls): # noqa: B902
- return _null_gen(cls._null)()
+ return toq.from_none(None, cls)
@classproperty
def inf(cls): # noqa: B902
- return _inf_gen(cls._inf)()
+ return toq.create_inf(cls)
+
+ @classproperty
+ def inf_neg(cls): # noqa: B902
+ return toq.create_neg_inf(cls)
@property
def is_null(self) -> bool:
@@ -772,6 +805,14 @@ def is_null(self) -> bool:
def is_inf(self) -> bool:
return abs(_wrappers.k_i(self)) == INF_INT32
+ @property
+ def is_pos_inf(self) -> bool:
+ return _wrappers.k_i(self) == INF_INT32
+
+ @property
+ def is_neg_inf(self) -> bool:
+ return _wrappers.k_i(self) == INF_NEG_INT32
+
class TimespanAtom(TemporalSpanAtom):
"""Wrapper for q timespan atoms."""
@@ -781,16 +822,38 @@ class TimespanAtom(TemporalSpanAtom):
_np_type = 'ns'
_np_dtype = 'timedelta64[ns]'
+ def __new__(cls, x: Any, *args, cast: bool = None, **kwargs):
+ if (type(x) == str) and x == 'now': # noqa: E721
+ if licensed:
+ return q('.z.N')
+ if type(x) == int:
+ if not licensed:
+ raise LicenseException('Cannot create object from numerical values, convert from "datetime.timedelta"') # noqa: E501
+ if not all(isinstance(i, int) for i in args):
+ raise TypeError("All values must be of type int when creating a TimespanAtom using numeric values") # noqa: E501
+ if len(args) != 4:
+ if len(args) > 4:
+ raise TypeError("Too many values. Numeric TimespanAtom creation requires 4 values only") # noqa: E501
+ else:
+ raise TypeError("Too few values. Numeric TimespanAtom creation requires 4 values only") # noqa: E501
+ elif all(isinstance(i, int) for i in args):
+ return q('{[D;h;m;s;n]sum (1D;0D01;0D00:01;0D00:00:01;0D00:00:00.000000001) * (D;h;m;s;n)}', x, args[0], args[1], args[2], args[3]) # noqa: E501
+ return toq(x, ktype=None if cls is K else cls, cast=cast) # TODO: 'strict' and 'cast' flags
+
def _prototype(self=None):
return TimespanAtom(np.timedelta64(3796312051664551936, 'ns'))
@classproperty
def null(cls): # noqa: B902
- return _null_gen(cls._null)()
+ return toq.from_none(None, cls)
@classproperty
def inf(cls): # noqa: B902
- return _inf_gen(cls._inf)()
+ return toq.create_inf(cls)
+
+ @classproperty
+ def inf_neg(cls): # noqa: B902
+ return toq.create_neg_inf(cls)
@property
def is_null(self) -> bool:
@@ -800,6 +863,14 @@ def is_null(self) -> bool:
def is_inf(self) -> bool:
return abs(_wrappers.k_j(self)) == INF_INT64
+ @property
+ def is_pos_inf(self) -> bool:
+ return _wrappers.k_j(self) == INF_INT64
+
+ @property
+ def is_neg_inf(self) -> bool:
+ return _wrappers.k_j(self) == INF_NEG_INT64
+
class DatetimeAtom(TemporalFixedAtom):
"""Wrapper for q datetime atoms.
@@ -815,14 +886,35 @@ class DatetimeAtom(TemporalFixedAtom):
@classproperty
def null(cls): # noqa: B902
- return _null_gen(cls._null)()
+ return toq.from_none(None, cls)
+
+ @property
+ def is_null(self) -> bool:
+ return math.isnan(self.py(raw=True))
@classproperty
def inf(cls): # noqa: B902
- return _inf_gen(cls._inf)()
+ return toq.create_inf(cls)
+
+ @property
+ def is_inf(self) -> bool:
+ aspy = self.py(raw=True)
+ return (math.inf == aspy) or (-math.inf == aspy)
+
+ @property
+ def is_pos_inf(self) -> bool:
+ return math.inf == self.py(raw=True)
+
+ @classproperty
+ def inf_neg(cls): # noqa: B902
+ return toq.create_neg_inf(cls)
+
+ @property
+ def is_neg_inf(self) -> bool:
+ return -math.inf == self.py(raw=True)
def __init__(self, *args, **kwargs):
- warnings.warn('The q datetime type is deprecated', DeprecationWarning)
+ warn('The q datetime type is deprecated', DeprecationWarning)
super().__init__(*args, **kwargs)
def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None):
@@ -847,11 +939,23 @@ class DateAtom(TemporalFixedAtom):
_epoch_offset = DATE_OFFSET
_np_dtype = 'datetime64[D]'
- # TODO: `cast` should be set to False at the next major release (KXI-12945)
- def __new__(cls, x: Any, *, cast: bool = None, **kwargs):
- if (type(x) == str) and x == 'today': # noqa: E721
- if licensed:
- return q('.z.D')
+ def __new__(cls, x: Any, *args, cast: bool = None, **kwargs):
+ if type(x) == str:
+ if x == 'today':
+ if licensed:
+ return q('.z.D')
+ if type(x) == int:
+ if not licensed:
+ raise LicenseException('Cannot create object from numerical values, convert from "datetime.date"') # noqa: E501
+ if not all(isinstance(i, int) for i in args):
+ raise TypeError("All values must be of type int when creating a DateAtom using numeric values") # noqa: E501
+ if len(args) != 2:
+ if len(args) > 2:
+ raise TypeError("Too many values. Numeric DateAtom creation requires 3 values only") # noqa: E501
+ else:
+ raise TypeError("Too few values. Numeric DateAtom creation requires 3 values only") # noqa: E501
+ elif all(isinstance(i, int) for i in args) and (len(args) == 2):
+ return q('{[y;m;d]"D"$"." sv string (y;m;d)}', x, args[0], args[1])
return toq(x, ktype=None if cls is K else cls, cast=cast) # TODO: 'strict' and 'cast' flags
def _prototype(self=None):
@@ -859,11 +963,15 @@ def _prototype(self=None):
@classproperty
def null(cls): # noqa: B902
- return _null_gen(cls._null)()
+ return toq.from_none(None, cls)
@classproperty
def inf(cls): # noqa: B902
- return _inf_gen(cls._inf)()
+ return toq.create_inf(cls)
+
+ @classproperty
+ def inf_neg(cls): # noqa: B902
+ return toq.create_neg_inf(cls)
@property
def is_null(self) -> bool:
@@ -873,6 +981,14 @@ def is_null(self) -> bool:
def is_inf(self) -> bool:
return abs(_wrappers.k_i(self)) == INF_INT32
+ @property
+ def is_pos_inf(self) -> bool:
+ return _wrappers.k_i(self) == INF_INT32
+
+ @property
+ def is_neg_inf(self) -> bool:
+ return _wrappers.k_i(self) == INF_NEG_INT32
+
class MonthAtom(TemporalFixedAtom):
"""Wrapper for q month atoms."""
@@ -888,11 +1004,15 @@ def _prototype(self=None):
@classproperty
def null(cls): # noqa: B902
- return _null_gen(cls._null)()
+ return toq.from_none(None, cls)
@classproperty
def inf(cls): # noqa: B902
- return _inf_gen(cls._inf)()
+ return toq.create_inf(cls)
+
+ @classproperty
+ def inf_neg(cls): # noqa: B902
+ return toq.create_neg_inf(cls)
@property
def is_null(self) -> bool:
@@ -902,6 +1022,14 @@ def is_null(self) -> bool:
def is_inf(self) -> bool:
return abs(_wrappers.k_i(self)) == INF_INT32
+ @property
+ def is_pos_inf(self) -> bool:
+ return _wrappers.k_i(self) == INF_INT32
+
+ @property
+ def is_neg_inf(self) -> bool:
+ return _wrappers.k_i(self) == INF_NEG_INT32
+
class TimestampAtom(TemporalFixedAtom):
"""Wrapper for q timestamp atoms."""
@@ -912,11 +1040,24 @@ class TimestampAtom(TemporalFixedAtom):
_epoch_offset = TIMESTAMP_OFFSET
_np_dtype = 'datetime64[ns]'
- # TODO: `cast` should be set to False at the next major release (KXI-12945)
- def __new__(cls, x: Any, *, cast: bool = None, **kwargs):
+ def __new__(cls, x: Any, *args, cast: bool = None, **kwargs):
if (type(x) == str) and x == 'now': # noqa: E721
if licensed:
return q('.z.P')
+ if type(x) == int:
+ if not licensed:
+ raise LicenseException('Cannot create object from numerical values, convert from "datetime.datetime"') # noqa: E501
+ if not all(isinstance(i, int) for i in args):
+ raise TypeError("All values must be of type int when creating a TimestampAtom using numeric values") # noqa: E501
+ if len(args) != 6:
+ if len(args) > 6:
+ raise TypeError("Too many values. Numeric TimestampAtom creation requires 7 values only") # noqa: E501
+ else:
+ raise TypeError("Too few values. Numeric TimestampAtom creation requires 7 values only") # noqa: E501
+ elif all(isinstance(i, int) for i in args):
+ return q('''{[Y;M;D;h;m;s;n]
+ ("D"$"." sv string (Y;M;D))+sum(0D01;0D00:01;0D00:00:01;0D00:00:00.000000001)*(h;m;s;n)}''', # noqa: E501
+ x, args[0], args[1], args[2], args[3], args[4], args[5])
return toq(x, ktype=None if cls is K else cls, cast=cast) # TODO: 'strict' and 'cast' flags
def _prototype(self=None):
@@ -924,11 +1065,15 @@ def _prototype(self=None):
@classproperty
def null(cls): # noqa: B902
- return _null_gen(cls._null)()
+ return toq.from_none(None, cls)
@classproperty
def inf(cls): # noqa: B902
- return _inf_gen(cls._inf)()
+ return toq.create_inf(cls)
+
+ @classproperty
+ def inf_neg(cls): # noqa: B902
+ return toq.create_neg_inf(cls)
@property
def is_null(self) -> bool:
@@ -938,6 +1083,14 @@ def is_null(self) -> bool:
def is_inf(self) -> bool:
return abs(_wrappers.k_j(self)) == INF_INT64
+ @property
+ def is_pos_inf(self) -> bool:
+ return _wrappers.k_j(self) == INF_INT64
+
+ @property
+ def is_neg_inf(self) -> bool:
+ return _wrappers.k_j(self) == INF_NEG_INT64
+
@property
def date(self):
return q('{`date$x}', self)
@@ -983,6 +1136,8 @@ def py(self,
# convert to datetime64[us] before converting to datetime objects
if raw:
return _wrappers.k_j(self)
+ if self.is_null:
+ return pd.NaT
if tzinfo is not None:
if tzshift:
return self\
@@ -1020,11 +1175,15 @@ def _prototype(self=None):# noqa
@classproperty
def null(cls): # noqa: B902
- return _null_gen(cls._null)()
+ return toq.from_none(None, cls)
@classproperty
def inf(cls): # noqa: B902
- return _inf_gen(cls._inf)()
+ return toq.create_inf(cls)
+
+ @classproperty
+ def inf_neg(cls): # noqa: B902
+ return toq.create_neg_inf(cls)
@property
def is_null(self) -> bool:
@@ -1034,6 +1193,14 @@ def is_null(self) -> bool:
def is_inf(self) -> bool:
return False
+ @property
+ def is_pos_inf(self) -> bool:
+ return False
+
+ @property
+ def is_neg_inf(self) -> bool:
+ return False
+
def __bytes__(self):
return _wrappers.k_s(self)
@@ -1067,17 +1234,27 @@ class CharAtom(Atom):
_null = '" "'
_inf = None
_np_dtype = None
+ _name = ''
+
+ @property
+ def __doc__(self):
+ if self._name != '':
+ return help.qhelp(self._name)
def _prototype(self=None):
return CharAtom(b' ')
@classproperty
def null(cls): # noqa: B902
- return _null_gen(cls._null)()
+ return toq.from_none(None, cls)
@classproperty
def inf(cls): # noqa: B902
- return _inf_gen(cls._inf)()
+ return toq.create_inf(cls)
+
+ @classproperty
+ def inf_neg(cls): # noqa: B902
+ return toq.create_neg_inf(cls)
@property
def is_null(self) -> bool:
@@ -1087,6 +1264,14 @@ def is_null(self) -> bool:
def is_inf(self) -> bool:
return False
+ @property
+ def is_pos_inf(self) -> bool:
+ return False
+
+ @property
+ def is_neg_inf(self) -> bool:
+ return False
+
def __bytes__(self):
return self.py()
@@ -1098,7 +1283,7 @@ def __getitem__(self, key):
raise LicenseException('index into K object')
if key != 0:
raise IndexError('index out of range')
- return q('first', self)
+ return self
def py(self, *, raw: bool = False, has_nulls: Optional[bool] = None, stdlib: bool = True):
if raw:
@@ -1132,6 +1317,14 @@ def is_null(self) -> bool:
def is_inf(self) -> bool:
return math.isinf(self.py())
+ @property
+ def is_pos_inf(self) -> bool:
+ return np.isposinf(self.py())
+
+ @property
+ def is_neg_inf(self) -> bool:
+ return np.isneginf(self.py())
+
def __round__(self, ndigits=None):
return round(self.py(), ndigits)
@@ -1167,11 +1360,15 @@ def _prototype(self=None):
@classproperty
def null(cls): # noqa: B902
- return _null_gen(cls._null)()
+ return toq.from_none(None, cls)
@classproperty
def inf(cls): # noqa: B902
- return _inf_gen(cls._inf)()
+ return toq.create_inf(cls)
+
+ @classproperty
+ def inf_neg(cls): # noqa: B902
+ return toq.create_neg_inf(cls)
def py(self, *, raw: bool = False, has_nulls: Optional[bool] = None, stdlib: bool = True):
return _wrappers.k_f(self)
@@ -1192,11 +1389,15 @@ def _prototype(self=None):
@classproperty
def null(cls): # noqa: B902
- return _null_gen(cls._null)()
+ return toq.from_none(None, cls)
@classproperty
def inf(cls): # noqa: B902
- return _inf_gen(cls._inf)()
+ return toq.create_inf(cls)
+
+ @classproperty
+ def inf_neg(cls): # noqa: B902
+ return toq.create_neg_inf(cls)
def py(self, *, raw: bool = False, has_nulls: Optional[bool] = None, stdlib: bool = True):
return _wrappers.k_e(self)
@@ -1230,21 +1431,22 @@ def __ceil__(self):
return self.py()
def _py_null_or_inf(self, default, raw: bool):
- if not raw and (self.is_null or self.is_inf):
- # By returning the wrapper around the q null/inf when a Python object is requested, we
- # propagate q's behavior around them - for better and for worse. Notably this ensures
- # symmetric conversions.
- return self
- return default
-
- def _np_null_or_inf(self, default, raw: bool):
if not raw:
if self.is_null:
- raise PyKXException('Numpy does not support null atomic integral values')
- if self.is_inf:
- raise PyKXException('Numpy does not support infinite atomic integral values')
+ return pd.NA
+ elif self.is_pos_inf:
+ return math.inf
+ elif self.is_neg_inf:
+ return -math.inf
return default
+ def pd(self, *, raw: bool = False, has_nulls: Optional[bool] = None,
+ as_arrow: Optional[bool] = False):
+ if not raw and self.is_null:
+ return pd.NA
+ else:
+ return self.np(raw=raw)
+
class LongAtom(IntegralNumericAtom):
"""Wrapper for q long (i.e. 64 bit signed integer) atoms."""
@@ -1258,11 +1460,15 @@ def _prototype(self=None):
@classproperty
def null(cls): # noqa: B902
- return _null_gen(cls._null)()
+ return toq.from_none(None, cls)
@classproperty
def inf(cls): # noqa: B902
- return _inf_gen(cls._inf)()
+ return toq.create_inf(cls)
+
+ @classproperty
+ def inf_neg(cls): # noqa: B902
+ return toq.create_neg_inf(cls)
@property
def is_null(self) -> bool:
@@ -1272,11 +1478,19 @@ def is_null(self) -> bool:
def is_inf(self) -> bool:
return abs(_wrappers.k_j(self)) == INF_INT64
+ @property
+ def is_pos_inf(self) -> bool:
+ return _wrappers.k_j(self) == INF_INT64
+
+ @property
+ def is_neg_inf(self) -> bool:
+ return _wrappers.k_j(self) == INF_NEG_INT64
+
def py(self, *, raw: bool = False, has_nulls: Optional[bool] = None, stdlib: bool = True):
return self._py_null_or_inf(_wrappers.k_j(self), raw)
def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None):
- return self._np_null_or_inf(np.int64(_wrappers.k_j(self)), raw)
+ return np.int64(_wrappers.k_j(self))
class IntAtom(IntegralNumericAtom):
@@ -1291,11 +1505,15 @@ def _prototype(self=None):
@classproperty
def null(cls): # noqa: B902
- return _null_gen(cls._null)()
+ return toq.from_none(None, cls)
@classproperty
def inf(cls): # noqa: B902
- return _inf_gen(cls._inf)()
+ return toq.create_inf(cls)
+
+ @classproperty
+ def inf_neg(cls): # noqa: B902
+ return toq.create_neg_inf(cls)
@property
def is_null(self) -> bool:
@@ -1305,11 +1523,19 @@ def is_null(self) -> bool:
def is_inf(self) -> bool:
return abs(_wrappers.k_i(self)) == INF_INT32
+ @property
+ def is_pos_inf(self) -> bool:
+ return _wrappers.k_i(self) == INF_INT32
+
+ @property
+ def is_neg_inf(self) -> bool:
+ return _wrappers.k_i(self) == INF_NEG_INT32
+
def py(self, *, raw: bool = False, has_nulls: Optional[bool] = None, stdlib: bool = True):
return self._py_null_or_inf(_wrappers.k_i(self), raw)
def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None):
- return self._np_null_or_inf(np.int32(_wrappers.k_i(self)), raw)
+ return np.int32(_wrappers.k_i(self))
class ShortAtom(IntegralNumericAtom):
@@ -1324,11 +1550,15 @@ def _prototype(self=None):
@classproperty
def null(cls): # noqa: B902
- return _null_gen(cls._null)()
+ return toq.from_none(None, cls)
@classproperty
def inf(cls): # noqa: B902
- return _inf_gen(cls._inf)()
+ return toq.create_inf(cls)
+
+ @classproperty
+ def inf_neg(cls): # noqa: B902
+ return toq.create_neg_inf(cls)
@property
def is_null(self) -> bool:
@@ -1338,11 +1568,19 @@ def is_null(self) -> bool:
def is_inf(self) -> bool:
return abs(_wrappers.k_h(self)) == INF_INT16
+ @property
+ def is_pos_inf(self) -> bool:
+ return _wrappers.k_h(self) == INF_INT16
+
+ @property
+ def is_neg_inf(self) -> bool:
+ return _wrappers.k_h(self) == INF_NEG_INT16
+
def py(self, *, raw: bool = False, has_nulls: Optional[bool] = None, stdlib: bool = True):
return self._py_null_or_inf(_wrappers.k_h(self), raw)
def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None):
- return self._np_null_or_inf(np.int16(_wrappers.k_h(self)), raw)
+ return np.int16(_wrappers.k_h(self))
class ByteAtom(IntegralNumericAtom):
@@ -1357,11 +1595,15 @@ def _prototype(self=None):
@classproperty
def null(cls): # noqa: B902
- return _null_gen(cls._null)()
+ raise NotImplementedError('Retrieval of null values not supported for this type')
@classproperty
def inf(cls): # noqa: B902
- return _inf_gen(cls._inf)()
+ return toq.create_inf(cls)
+
+ @classproperty
+ def inf_neg(cls): # noqa: B902
+ return toq.create_neg_inf(cls)
@property
def is_null(self) -> bool:
@@ -1371,6 +1613,14 @@ def is_null(self) -> bool:
def is_inf(self) -> bool:
return False
+ @property
+ def is_pos_inf(self) -> bool:
+ return False
+
+ @property
+ def is_neg_inf(self) -> bool:
+ return False
+
def py(self, *, raw: bool = False, has_nulls: Optional[bool] = None, stdlib: bool = True):
return _wrappers.k_g(self)
@@ -1390,11 +1640,15 @@ def _prototype(self=None):
@classproperty
def null(cls): # noqa: B902
- return _null_gen(cls._null)()
+ return toq.from_none(None, cls)
@classproperty
def inf(cls): # noqa: B902
- return _inf_gen(cls._inf)()
+ return toq.create_inf(cls)
+
+ @classproperty
+ def inf_neg(cls): # noqa: B902
+ return toq.create_neg_inf(cls)
@property
def is_null(self) -> bool:
@@ -1404,6 +1658,14 @@ def is_null(self) -> bool:
def is_inf(self) -> bool:
return False
+ @property
+ def is_pos_inf(self) -> bool:
+ return False
+
+ @property
+ def is_neg_inf(self) -> bool:
+ return False
+
def py(self, *, raw: bool = False, has_nulls: Optional[bool] = None, stdlib: bool = True):
return _wrappers.guid_atom_py(self, raw, has_nulls, stdlib)
@@ -1420,11 +1682,15 @@ def _prototype(self=None):
@classproperty
def null(cls): # noqa: B902
- return _null_gen(cls._null)()
+ raise NotImplementedError('Retrieval of null values not supported for this type')
@classproperty
def inf(cls): # noqa: B902
- return _inf_gen(cls._inf)()
+ return toq.create_inf(cls)
+
+ @classproperty
+ def inf_neg(cls): # noqa: B902
+ return toq.create_neg_inf(cls)
@property
def is_null(self) -> bool:
@@ -1434,6 +1700,14 @@ def is_null(self) -> bool:
def is_inf(self) -> bool:
return False
+ @property
+ def is_pos_inf(self) -> bool:
+ return False
+
+ @property
+ def is_neg_inf(self) -> bool:
+ return False
+
def py(self, *, raw: bool = False, has_nulls: Optional[bool] = None, stdlib: bool = True):
if raw:
return _wrappers.k_g(self)
@@ -1586,6 +1860,14 @@ def pd(
as_arrow: Optional[bool] = False,
):
res = pd.Series(self.np(raw=raw, has_nulls=has_nulls), copy=False)
+ if not raw:
+ null_inds = []
+ for i in range(len(self)):
+ if isinstance(self._unlicensed_getitem(i), IntegralNumericAtom)\
+ and self._unlicensed_getitem(i).is_null:
+ null_inds.append(i)
+ if not 0 == len(null_inds):
+ res[null_inds] = pd.NA
if as_arrow:
if not pandas_2:
raise RuntimeError('Pandas Version must be at least 2.0 to use as_arrow=True')
@@ -1601,7 +1883,22 @@ def pd(
def pa(self, *, raw: bool = False, has_nulls: Optional[bool] = None):
if pa is None:
raise PyArrowUnavailable # nocov
- return pa.array(self.np(raw=raw, has_nulls=has_nulls))
+ try:
+ np_array = self.np(raw=raw, has_nulls=has_nulls)
+ if not raw and isinstance(self, List):
+ null_inds = []
+ for i in range(len(self)):
+ if isinstance(self._unlicensed_getitem(i), IntegralNumericAtom)\
+ and self._unlicensed_getitem(i).is_null:
+ null_inds.append(i)
+ if not 0 == len(null_inds):
+ np_array[null_inds] = None
+ return pa.array(np_array)
+ except (pa.lib.ArrowNotImplementedError, pa.lib.ArrowInvalid) as err:
+ if isinstance(self, List):
+ raise QError('Unable to convert pykx.List with non conforming types '
+ f'to PyArrow,\n failed with error: {err}')
+ raise err
def apply(self, func, *args, **kwargs):
if not callable(func):
@@ -1723,7 +2020,7 @@ def append(self, data):
if not q('{(0>type[y])& type[x]=abs type y}', self, data):
raise QError(f'Appending data of type: {type(K(data))} '
f'to vector of type: {type(self)} not supported')
- append_vec = q('{[orig;app]orig,$[0 bool:
def py(self, *, raw: bool = False, has_nulls: Optional[bool] = None, stdlib: bool = True):
if raw:
return self.np(raw=True, has_nulls=has_nulls).tolist()
- return [x if x.is_null else x.py() for x in self]
+ return [pd.NA if x.is_null else x.py() for x in self]
def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None):
if raw:
@@ -2426,7 +2725,7 @@ def has_infs(self) -> bool:
def py(self, *, raw: bool = False, has_nulls: Optional[bool] = None, stdlib: bool = True):
if raw:
return self.np(raw=True, has_nulls=has_nulls).tolist()
- return [x if x.is_null else x.py() for x in self]
+ return [pd.NaT if x.is_null else x.py() for x in self]
def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None):
base_array = _wrappers.k_vec_to_array(self, self._np_base_type)
@@ -2541,7 +2840,7 @@ def py(self,
if x is None:
null_pos.append(converted_vector.index(x))
for i in null_pos:
- converted_vector[i]=TimestampAtom(None)
+ converted_vector[i]=pd.NaT
return converted_vector
@@ -2584,7 +2883,7 @@ class DatetimeVector(TemporalFixedVector):
_np_dtype = np.float64
def __init__(self, *args, **kwargs):
- warnings.warn('The q datetime type is deprecated', DeprecationWarning)
+ warn('The q datetime type is deprecated', DeprecationWarning)
super().__init__(*args, **kwargs)
def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None):
@@ -2656,9 +2955,41 @@ def _prototype(self=None):
class EnumVector(Vector):
- """Wrapper for q enum vectors."""
+ """Wrapper for q enum vectors.
+
+ Parameters:
+ variable: The handle of a list in q memory.
+ indices: An list used in [Enumeration](https://code.kx.com/q/ref/enumeration/).
+ values: A list of items that is used in [Enumerate](https://code.kx.com/q/ref/enumerate/) and [Enum Extend](https://code.kx.com/q/ref/enum-extend/).
+ extend: A boolean set to True to use [Enum Extend](https://code.kx.com/q/ref/enum-extend/) and False to use [Enumerate](https://code.kx.com/q/ref/enumerate/).
+ """ # noqa: E501
t = 20
+ def __new__(cls, variable, indices=None, values=None, extend=False):
+ if not isinstance(variable, (str, SymbolAtom)):
+ raise TypeError("Variable name must be of type String or Symbol.")
+ if not (indices is None) ^ (values is None):
+ raise AttributeError("Can only set one of 'values' and 'indices' at one time.")
+ if indices is not None:
+ return q('!', variable, indices)
+ if values is not None:
+ if extend:
+ return q('?', variable, values)
+ else:
+ return q('$', variable, values)
+
+ def values(self):
+ """Returns the resolved value of the enumeration"""
+ return q.value(self)
+
+ def domain(self):
+ """Returns the name of the domain of the enum"""
+ return q.key(self)
+
+ def indices(self):
+ """Returns the indices of the enum in the q list"""
+ return q('`long$', self)
+
def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None):
if raw:
return _wrappers.k_vec_to_array(self, _wrappers.NPY_INT64)
@@ -2796,7 +3127,6 @@ class Table(PandasAPI, Mapping):
"""
t = 98
- # TODO: `cast` should be set to False at the next major release (KXI-12945)
def __new__(cls, *args, cast: bool = None, **kwargs):
if 'data' in kwargs.keys():
return toq(q.flip(Dictionary(kwargs['data'])), ktype=Table, cast=cast)
@@ -2870,8 +3200,6 @@ def pd(
raw_guids=False,
as_arrow: Optional[bool] = False,
):
- if raw_guids:
- warnings.warn("Keyword 'raw_guids' is deprecated", DeprecationWarning)
if raw_guids and not raw:
v = [x.np(raw=isinstance(x, GUIDVector), has_nulls=has_nulls) for x in self._values]
v = [PandasUUIDArray(x) if x.dtype == complex else x for x in v]
@@ -2908,7 +3236,11 @@ def pd(
def pa(self, *, raw: bool = False, has_nulls: Optional[bool] = None):
if pa is None:
raise PyArrowUnavailable # nocov
- return pa.Table.from_pandas(self.pd(raw=raw, has_nulls=has_nulls, raw_guids=True))
+ try:
+ return pa.Table.from_pandas(self.pd(raw=raw, has_nulls=has_nulls, raw_guids=True))
+ except (pa.lib.ArrowNotImplementedError, pa.lib.ArrowInvalid, pa.ArrowInvalid) as err:
+ raise QError('Unable to convert pykx.List column with non conforming types '
+ f'to PyArrow,\n failed with error: {err}')
def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None):
return self.pd(raw=raw, has_nulls=has_nulls).to_records(index=False)
@@ -2918,7 +3250,6 @@ def insert(
row: Union[list, List],
match_schema: bool = False,
test_insert: bool = False,
- replace_self: bool = True,
inplace: bool = True
):
"""Helper function around `q`'s `insert` function which inserts a row or multiple rows into
@@ -2930,9 +3261,6 @@ def insert(
test_insert: Causes the function to modify a small local copy of the table and return
the modified example, this can only be used with embedded q and will not modify the
source tables contents.
- replace_self: `Deprecated` please use `inplace` keyword.
- Causes the underlying Table python object to update itself with the
- resulting Table after the insert.
inplace: Causes the underlying Table python object to update itself with the
resulting Table after the insert.
@@ -2955,10 +3283,7 @@ def insert(
q['.pykx.i.itab'] = self
q.insert('.pykx.i.itab', row, match_schema, test_insert)
res = q('.pykx.i.itab')
- if not replace_self:
- warnings.warn("Keyword 'replace_self' is deprecated please use 'inplace'",
- DeprecationWarning)
- if replace_self and inplace:
+ if inplace:
self.__dict__.update(res.__dict__)
q('delete itab from `.pykx.i')
return res
@@ -2968,7 +3293,6 @@ def upsert(
row: Union[list, List],
match_schema: bool = False,
test_insert: bool = False,
- replace_self: bool = True,
inplace: bool = True
):
"""Helper function around `q`'s `upsert` function which inserts a row or multiple rows into
@@ -2980,9 +3304,6 @@ def upsert(
test_insert: Causes the function to modify a small local copy of the table and return
the modified example, this can only be used with embedded q and will not modify the
source tables contents.
- replace_self: `Deprecated` please use `inplace` keyword.
- Causes the underlying Table python object to update itself with the
- resulting Table after the upsert.
inplace: Causes the underlying Table python object to update itself with the
resulting Table after the upsert.
@@ -3003,10 +3324,7 @@ def upsert(
```
"""
res = q.upsert(self, row, match_schema, test_insert)
- if not replace_self:
- warnings.warn("Keyword 'replace_self' is deprecated please use 'inplace'",
- DeprecationWarning)
- if replace_self and inplace:
+ if inplace:
self.__dict__.update(res.__dict__)
return res
@@ -3029,60 +3347,434 @@ def sorted(self, cols: Union[List, str] = ''):
else:
raise e
- def unique(self, cols: Union[List, str] = ''):
- try:
- if len(cols) == 0:
- cols = q.cols(self)[0]
- if not (isinstance(cols, List) or isinstance(cols, list)):
- cols = [cols]
- for col in cols:
- res = q.qsql.update(self, {col: f'`u#{col}'})
- self.__dict__.update(res.__dict__)
- return self
- except BaseException as e:
- err_str = str(e)
- if 'u-type' in err_str:
- raise QError('Items are not unique')
- elif 'type' in err_str:
- raise QError('Object does not support the unique attribute')
- else:
- raise e
+ def sql(self, query, *args):
+ """Execute an SQL query against the supplied PyKX tabular object.
- def parted(self, cols: Union[List, str] = ''):
- try:
- if len(cols) == 0:
- cols = q.cols(self)[0]
- if not (isinstance(cols, List) or isinstance(cols, list)):
- cols = [cols]
- for col in cols:
- res = q.qsql.update(self, {col: f'`p#{col}'})
- self.__dict__.update(res.__dict__)
- return self
- except BaseException as e:
- err_str = str(e)
- if 'u-type' in err_str:
- raise QError('Items are not parted')
- elif 'type' in err_str:
- raise QError('Object does not support the parted attribute')
- else:
- raise e
+ This function expects the table object to be supplied as a parameter
+ to the query, additional parameters can be supplied as positional
+ arguments.
- def grouped(self, cols: Union[List, str] = ''):
- try:
- if len(cols) == 0:
- cols = q.cols(self)[0]
- if not (isinstance(cols, List) or isinstance(cols, list)):
- cols = [cols]
- for col in cols:
- res = q.qsql.update(self, {col: f'`g#{col}'})
- self.__dict__.update(res.__dict__)
- return self
- except BaseException as e:
- err_str = str(e)
- if 'type' in err_str:
- raise QError('Object does not support the grouped attribute')
- else:
- raise e
+ Parameters:
+ query: A str object indicating the query to be executed, this
+ must contain a required argument $1 associated with the
+ table being queried.
+ *args: Any additional positional arguments required for query
+ execution.
+
+ Returns:
+ The queried table associated with the SQL statement
+
+ Examples:
+
+ Query a simple table supplying no additional arguments
+
+ ```python
+ >>> tab = kx.Table(data = {'x': [1, 2, 3], 'x1': ['a', 'b', 'a']})
+ >>> tab.sql("select * from $1 where x1='a'")
+ pykx.Table(pykx.q('
+ x x1
+ ----
+ 1 a
+ 3 a
+ '))
+ ```
+
+ Query a simple table supplying multiple arguments
+
+ ```python
+ >>> tab = kx.Table(data = {'x': [1, 2, 3], 'x1': ['a', 'b', 'a']})
+ >>> tab.sql("select * from $1 where x1=$2 and x=$3", 'a', 1)
+ pykx.Table(pykx.q('
+ x x1
+ ----
+ 1 a
+ '))
+ ```
+ """
+ if not isinstance(query, str):
+ raise TypeError('Supplied query is not of type "str"')
+ if '$1' not in query:
+ raise QError('Supplied query does not contain argument $1')
+ return q.sql(query, self, *args)
+
+ def select(self, columns=None, where=None, by=None, inplace=False):
+ """Apply a q style select statement on the supplied table defined within the process.
+
+ This implementation follows the q functional select syntax with limitations on
+ structures supported for the various clauses a result of this.
+
+ Parameters:
+ columns: A dictionary mapping the name to be given to a column and the logic to be
+ applied in aggregation to that column both as strings.
+ where: Conditional filtering used to select subsets of the data on which by-clauses and
+ appropriate aggregations are to be applied.
+ by: A dictionary mapping the names to be assigned to the produced columns and the
+ columns whose results are used to construct the groups of the by clause.
+ inplace: Whether the result of an update is to be persisted. This operates for tables
+ referenced by name in q memory or general table objects
+ https://code.kx.com/q/basics/qsql/#result-and-side-effects.
+
+ Examples:
+
+ Define a q table in python, and give it a name in q memory
+
+ ```python
+ >>> import pykx as kx
+ >>> qtab = kx.Table(data = {
+ ... 'col1': kx.random.random(100, ['a', 'b', 'c']),
+ ... 'col2': kx.random.random(100, 1.0),
+ ... 'col3': kx.random.random(100, False),
+ ... 'col4': kx.random.random(100, 10.0)})
+ ```
+
+ Select all items in the table
+
+ ```python
+ >>> qtab.select()
+ ```
+
+ Filter table based on various where conditions
+
+ ```python
+ >>> qtab.select(where='col2<0.5')
+ ```
+
+ Retrieve statistics by grouping data on symbol columns
+
+ ```python
+ >>> qtab.select(columns={'maxCol2': 'max col2'}, by={'col1': 'col1'})
+ >>> qtab.select(columns={'avgCol2': 'avg col2', 'minCol4': 'min col4'}, by={'col1': 'col1'})
+ ```
+
+ Retrieve grouped statistics with restrictive where condition
+
+ ```python
+ >>> qtab.select(columns={'avgCol2': 'avg col2', 'minCol4': 'min col4'}, by={'col1': 'col1'}, where='col3=0b')
+ ```
+ """ # noqa: E501
+ return q.qsql.select(self, columns, where, by, inplace)
+
+ def exec(self, columns=None, where=None, by=None):
+ """
+ Apply a q style exec statement on the supplied PyKX Table.
+
+ This implementation follows the q functional exec syntax with limitations on structures
+ supported for the various clauses a result of this.
+
+ Parameters:
+ columns: A dictionary mapping the name to be given to a column and the logic to be
+ applied in aggregation to that column both as strings. A string defining a single
+ column to be retrieved from the table as a list.
+ where: Conditional filtering used to select subsets of the data on which by clauses and
+ appropriate aggregations are to be applied.
+ by: A dictionary mapping the names to be assigned to the produced columns and the
+ the columns whose results are used to construct the groups of the by clause.
+
+ Examples:
+
+ Define a PyKX Table
+
+ ```python
+ >>> qtab = kx.Table(data = {
+ ... 'col1': kx.random.random(100, ['a', 'b', 'c']),
+ ... 'col2': kx.random.random(100, 1.0),
+ ... 'col3': kx.random.random(100, False),
+ ... 'col4': kx.random.random(100, 10.0)}
+ ... )
+ ```
+
+ Select last item of the table
+
+ ```python
+ qtab.exec()
+ ```
+
+ Retrieve a column from the table as a list
+
+ ```python
+ qtab.exec('col3')
+ ```
+
+ Retrieve a set of columns from a table as a dictionary
+
+ ```python
+ qtab.exec({'symcol': 'col1'})
+ qtab.exec({'symcol': 'col1', 'boolcol': 'col3'})
+ ```
+
+ Filter columns from a table based on various where conditions
+
+ ```python
+ qtab.exec('col3', where='col1=`a')
+ qtab.exec({'symcol': 'col1', 'maxcol4': 'max col4'}, where=['col1=`a', 'col2<0.3'])
+ ```
+
+ Retrieve data grouping by data on symbol columns
+
+ ```python
+ qtab.exec('col2', by={'col1': 'col1'})
+ qtab.exec(columns={'maxCol2': 'max col2'}, by={'col1': 'col1'})
+ qtab.exec(columns={'avgCol2': 'avg col2', 'minCol4': 'min col4'}, by={'col1': 'col1'})
+ ```
+
+ Retrieve grouped statistics with restrictive where condition
+
+ ```python
+ qtab.exec(columns={'avgCol2': 'avg col2', 'minCol4': 'min col4'}, by={'col1': 'col1'}, where='col3=0b')
+ ```
+ """ # noqa: E501
+ return q.qsql.exec(self, columns, where, by)
+
+ def update(self, columns=None, where=None, by=None, inplace=False):
+ """
+ Apply a q style update statement on tables defined within the process.
+
+ This implementation follows the q functional update syntax with limitations on
+ structures supported for the various clauses a result of this.
+
+ Parameters:
+ columns: A dictionary mapping the name of a column present in the table or one to be
+ added to the contents which are to be added to the column, this content can be a
+ string denoting q data or the equivalent Python data.
+ where: Conditional filtering used to select subsets of the data on which by-clauses and
+ appropriate aggregations are to be applied.
+ by: A dictionary mapping the names to be assigned to the produced columns and the
+ columns whose results are used to construct the groups of the by clause.
+ inplace: Whether the result of an update is to be persisted. This operates for tables
+ referenced by name in q memory or general table objects
+ https://code.kx.com/q/basics/qsql/#result-and-side-effects.
+
+ Examples:
+
+ Define a q table in python and named in q memory
+
+ ```python
+ >>> qtab = kx.Table(data={
+ ... 'name': ['tom', 'dick', 'harry'],
+ ... 'age': [28, 29, 35],
+ ... 'hair': ['fair', 'dark', 'fair'],
+ ... 'eye': ['green', 'brown', 'gray']}
+ ... )
+ ```
+
+ Update all the contents of a column
+
+ ```python
+ qtab.update({'eye': '`blue`brown`green'})
+ ```
+
+ Update the content of a column restricting scope using a where clause
+
+ ```python
+ qtab.update({'eye': ['blue']}, where='hair=`fair')
+ ```
+
+ Define a q table suitable for by clause example
+
+ ```python
+ >>> bytab = kx.Table(data={
+ ... 'name': kx.random.random(100, ['nut', 'bolt', 'screw']),
+ ... 'color': kx.random.random(100, ['red', 'green', 'blue']),
+ ... 'weight': 0.5 * kx.random.random(100, 20),
+ ... 'city': kx.random.random(100, ['london', 'paris', 'rome'])})
+ ```
+
+ Apply an update grouping based on a by phrase
+
+ ```python
+ bytab.update({'weight': 'avg weight'}, by={'city': 'city'})
+ ```
+
+ Apply an update grouping based on a by phrase and persist the result using the inplace keyword
+
+ ```python
+ bytab.update(columns={'weight': 'avg weight'}, by={'city': 'city'}, inplace=True)
+ ```
+ """ # noqa: E501
+ return q.qsql.update(self, columns, where, by, inplace)
+
+ def delete(self, columns=None, where=None, inplace=False):
+ """
+ Apply a q style delete statement a PyKX table defined.
+
+ This implementation follows the q functional delete syntax with limitations on
+ structures supported for the various clauses a result of this.
+
+ Parameters:
+ columns: Denotes the columns to be deleted from a table.
+ where: Conditional filtering used to select subsets of the data which are to be
+ deleted from the table.
+ inplace: Whether the result of an update is to be persisted. This operates for tables
+ referenced by name in q memory or general table objects
+ https://code.kx.com/q/basics/qsql/#result-and-side-effects.
+
+ Examples:
+
+ Define a PyKX Table against which to run the examples
+
+ ```python
+ >>> qtab = kx.Table(data = {
+ ... 'name': ['tom', 'dick', 'harry'],
+ ... 'age': [28, 29, 35],
+ ... 'hair': ['fair', 'dark', 'fair'],
+ ... 'eye': ['green', 'brown', 'gray']}
+ ... )
+ ```
+
+ Delete all the contents of the table
+
+ ```python
+ >>> qtab.delete()
+ ```
+
+ Delete single and multiple columns from the table
+
+ ```python
+ >>> qtab.delete('age')
+ >>> qtab.delete(['age', 'eye'])
+ ```
+
+ Delete rows of the dataset based on where condition
+
+ ```python
+ >>> qtab.delete(where='hair=`fair')
+ >>> qtab.delete(where=['hair=`fair', 'age=28'])
+ ```
+
+ Delete a column from the dataset named in q memory and persist the result using the
+ inplace keyword
+
+ ```python
+ >>> qtab.delete('age', inplace=True)
+ ```
+ """ # noqa: E501
+ return q.qsql.delete(self, columns, where, inplace)
+
+ def reorder_columns(self, cols: Union[List, str], inplace: bool = False):
+ """
+ Reorder the columns of a supplied table, using a supplied list of columns.
+ This list order the columns in the supplied order, if less than the total number
+ of columns in the original table are supplied then the supplied columns will be first
+ columns in the new table
+
+ Parameters:
+ cols: The column(s) which will be used to reorder the columns of the table
+ inplace: Whether the result of an update is to be persisted. This operates for tables
+ referenced by name in q memory or general table objects
+ https://code.kx.com/q/basics/qsql/#result-and-side-effects.
+
+ Returns:
+ The resulting table after the columns have been rearranged.
+
+ Examples:
+
+ Order a single column to be the first column in a table
+
+ ```python
+ >>> tab = kx.Table(data={
+ ... 'a': [1, 2, 3],
+ ... 'b': ['a', 'b', 'c'],
+ ... 'c': [1.0, 2.0, 3.0]
+ ... })
+ >>> tab.reorder_columns('c')
+ pykx.Table(pykx.q('
+ c a b
+ -----
+ 1 1 a
+ 2 2 b
+ 3 3 c
+ '))
+ ```
+
+ Reorder all columns within a table
+
+ ```python
+ >>> tab = kx.Table(data={
+ ... 'a': [1, 2, 3],
+ ... 'b': ['a', 'b', 'c'],
+ ... 'c': [1.0, 2.0, 3.0]
+ ... })
+ >>> tab.reorder_columns(['b', 'c', 'a'])
+ pykx.Table(pykx.q('
+ b c a
+ -----
+ a 1 1
+ b 2 2
+ c 3 3
+ '))
+ ```
+ """
+ tab_cols = self.columns.py()
+ if isinstance(cols, list):
+ for i in cols:
+ if not isinstance(i, str):
+ raise QError(f'Supplied column "{i}" is not a string')
+ if i not in tab_cols:
+ raise QError(f'Supplied column "{i}" not in table columns')
+ elif isinstance(cols, str):
+ if cols not in tab_cols:
+ raise QError(f'Supplied column "{cols}" not in table columns')
+ else:
+ raise QError('Supplied column is not a string or list')
+ res = q.xcols(cols, self)
+ if inplace:
+ self.__dict__.update(res.__dict__)
+ return res
+
+ def unique(self, cols: Union[List, str] = ''):
+ try:
+ if len(cols) == 0:
+ cols = q.cols(self)[0]
+ if not (isinstance(cols, List) or isinstance(cols, list)):
+ cols = [cols]
+ for col in cols:
+ res = q.qsql.update(self, {col: f'`u#{col}'})
+ self.__dict__.update(res.__dict__)
+ return self
+ except BaseException as e:
+ err_str = str(e)
+ if 'u-type' in err_str:
+ raise QError('Items are not unique')
+ elif 'type' in err_str:
+ raise QError('Object does not support the unique attribute')
+ else:
+ raise e
+
+ def parted(self, cols: Union[List, str] = ''):
+ try:
+ if len(cols) == 0:
+ cols = q.cols(self)[0]
+ if not (isinstance(cols, List) or isinstance(cols, list)):
+ cols = [cols]
+ for col in cols:
+ res = q.qsql.update(self, {col: f'`p#{col}'})
+ self.__dict__.update(res.__dict__)
+ return self
+ except BaseException as e:
+ err_str = str(e)
+ if 'u-type' in err_str:
+ raise QError('Items are not parted')
+ elif 'type' in err_str:
+ raise QError('Object does not support the parted attribute')
+ else:
+ raise e
+
+ def grouped(self, cols: Union[List, str] = ''):
+ try:
+ if len(cols) == 0:
+ cols = q.cols(self)[0]
+ if not (isinstance(cols, List) or isinstance(cols, list)):
+ cols = [cols]
+ for col in cols:
+ res = q.qsql.update(self, {col: f'`g#{col}'})
+ self.__dict__.update(res.__dict__)
+ return self
+ except BaseException as e:
+ err_str = str(e)
+ if 'type' in err_str:
+ raise QError('Object does not support the grouped attribute')
+ else:
+ raise e
def xbar(self, values):
"""
@@ -3175,7 +3867,7 @@ def window_join(self, table, windows, cols, aggs):
... 'time': kx.q('10:01:01+til 9'),
... 'ask': [101, 103, 103, 104, 104, 107, 108, 107, 108],
... 'bid': [98, 99, 102, 103, 103, 104, 106, 106, 107]})
- >>> windows = kx.q('{-2 1+\:x}', trades['time'])
+ >>> windows = kx.q('{-2 1+\\:x}', trades['time'])
>>> trades.window_join(quotes,
... windows,
... ['sym', 'time'],
@@ -3198,27 +3890,9 @@ def _repr_html_(self):
if not licensed:
return self.__repr__()
console = q.system.console_size.py()
- qtab = q('''{[c;t]
- n:count t;
- cls:$[c[1]c[0];?[t;enlist(=;`i;(last;`i));0b;{x!x}cls];()];
- h
- }''', console, self)
+ if detect_bad_columns(self):
+ return self.__repr__()
+ qtab = q('.pykx.util.html.memsplay', console, self)
df = pd.read_json(StringIO(qtab.py().decode("utf-8")), orient='records',
convert_dates=False, dtype=False)
if len(df) == 0:
@@ -3230,11 +3904,7 @@ def _repr_html_(self):
df.set_index(['pykxTableIndex'], inplace=True)
df.index.names = ['']
ht = CharVector(df.to_html())
- ht = q('''{[c;t;h]
- $[c[0]",{reverse "," sv 3 cut reverse string x}[n]," rows × ",
- {reverse "," sv 3 cut reverse string x}[count cols t]," columns";h]
- }''', console, self, ht).py().decode("utf-8")
+ ht = q('.pykx.util.html.rowcols', console, self, ht).py().decode("utf-8")
return ht
@@ -3277,26 +3947,9 @@ def _repr_html_(self):
if not licensed:
return self.__repr__()
console = q.system.console_size.py()
- qtab = q('''{[c;t]
- n:count t;
- cls:$[c[1]c[0];?[t;enlist(=;`i;(last;`i));0b;{x!x}cls];()];
- h
- }''', console, self)
+ if detect_bad_columns(self):
+ return self.__repr__()
+ qtab = q('.pykx.util.html.memsplay', console, self)
df = pd.read_json(StringIO(qtab.py().decode("utf-8")), orient='records',
convert_dates=False, dtype=False)
if len(df) == 0:
@@ -3308,13 +3961,101 @@ def _repr_html_(self):
df.set_index(['pykxTableIndex'], inplace=True)
df.index.names = ['']
ht = CharVector(df.to_html())
- ht = q('''{[c;t;h]
- $[c[0]",{reverse "," sv 3 cut reverse string x}[n]," rows × ",
- {reverse "," sv 3 cut reverse string x}[count cols t]," columns";h]
- }''', console, self, ht).py().decode("utf-8")
+ ht = q('.pykx.util.html.rowcols', console, self, ht).py().decode("utf-8")
return ht
+ def add_prefix(self, prefix, axis=0):
+ raise AttributeError("Operation 'add_prefix' not supported for SplayedTable type")
+
+ def add_suffix(self, suffix, axis=0):
+ raise AttributeError("Operation 'add_suffix' not supported for SplayedTable type")
+
+ def agg(self, func, axis=0, *args, **kwargs):
+ raise AttributeError("Operation 'agg' not supported for SplayedTable type")
+
+ def apply(self, func, *args, axis: int = 0, raw=None, result_type=None, **kwargs):
+ raise AttributeError("Operation 'apply' not supported for SplayedTable type")
+
+ def cast(self, ktype):
+ raise AttributeError("Operation 'cast' not supported for SplayedTable type")
+
+ def count(self, axis=0, numeric_only=False):
+ raise AttributeError("Operation 'count' not supported for SplayedTable type")
+
+ def drop_duplicates(self, subset=None, keep='first', inplace=False, ignore_index=False):
+ raise AttributeError("Operation 'drop_duplicates' not supported for SplayedTable type")
+
+ def exec(self, columns=None, where=None, by=None):
+ raise AttributeError("Operation 'exec' not supported for SplayedTable type")
+
+ def groupby(self,
+ by=None,
+ axis=0,
+ level=None,
+ as_index=True,
+ sort=True,
+ group_keys=True,
+ observed=False,
+ dropna=True):
+ raise AttributeError("Operation 'groupby' not supported for SplayedTable type")
+
+ def grouped(self, cols: Union[List, str] = ''):
+ raise AttributeError("Operation 'grouped' not supported for SplayedTable type")
+
+ def has_infs(self):
+ raise AttributeError("Operation 'has_infs' not supported for SplayedTable type")
+
+ def has_nulls(self):
+ raise AttributeError("Operation 'has_nulls' not supported for SplayedTable type")
+
+ def merge(self,
+ right,
+ how='inner',
+ on=None,
+ left_on=None,
+ right_on=None,
+ left_index=False,
+ right_index=False,
+ sort=False,
+ suffixes=('_x', '_y'),
+ copy=True,
+ validate=None,
+ q_join=False):
+ raise AttributeError("Operation 'merge' not supported for SplayedTable type")
+
+ def merge_asof(self,
+ right,
+ on=None,
+ left_on=None,
+ right_on=None,
+ left_index=False,
+ right_index=False,
+ by=None,
+ left_by=None,
+ right_by=None,
+ suffixes=('_x', '_y'),
+ tolerance=None,
+ allow_exact_matches=True,
+ direction='backward'):
+ raise AttributeError("Operation 'merge_asof' not supported for SplayedTable type")
+
+ def prototype(self):
+ raise AttributeError("Operation 'prototype' not supported for SplayedTable type")
+
+ def ungroup(self):
+ raise AttributeError("Operation 'ungroup' not supported for SplayedTable type")
+
+ def upsert(self,
+ row: Union[list, List],
+ match_schema: bool = False,
+ test_insert: bool = False,
+ inplace: bool = True
+ ):
+ raise AttributeError("Operation 'upsert' not supported for SplayedTable type")
+
+ def window_join(self, table, windows, cols, aggs):
+ raise AttributeError("Operation 'window_join' not supported for SplayedTable type")
+
class PartitionedTable(SplayedTable):
"""Wrapper for q partitioned tables."""
@@ -3347,47 +4088,25 @@ def _repr_html_(self):
if not licensed:
return self.__repr__()
console = q.system.console_size.py()
+ if detect_bad_columns(self):
+ return self.__repr__()
qtab = q('''{[c;t]
- t:value flip t;
- if[not count .Q.pn t;.Q.cn get t];
- ps:sums .Q.pn t;n:last ps;
- cls:$[c[1]=n;:.j.j $[c[1]=c[0];r:();
- if[fp~0;r:?[t;((=;.Q.pf;first .Q.pv);(<;`i;c[0]-1));0b;{x!x}cls]];
- if[fp>0;
- r:?[t;enlist(in;.Q.pf;fp#.Q.pv);0b;{x!x}cls];
- r:r,?[t;((=;.Q.pf;.Q.pv fp);(<;`i;-1+c[0]-ps[fp-1]));0b;{x!x}cls];
- ];
- if[c[0]=n;
+ :.j.j
+ .pykx.util.html.extendcols[c 1;count cols t;]
+ .pykx.util.html.stringify
+ .pykx.util.html.addindex[-1+n;]
+ ?[t;();0b;cls]
+ ];
+ r:.Q.ind[t;distinct til[c 0],-1+n];
+ .j.j
+ .pykx.util.html.extendcols[c 1;count cols t]
+ .pykx.util.html.extendrows[0;1]
+ .pykx.util.html.stringify
+ .pykx.util.html.addindex[-1+n;r]
}''', console, self)
df = pd.read_json(StringIO(qtab.py().decode("utf-8")), orient='records',
convert_dates=False, dtype=False)
@@ -3400,13 +4119,71 @@ def _repr_html_(self):
df.set_index(['pykxTableIndex'], inplace=True)
df.index.names = ['']
ht = CharVector(df.to_html())
- ht = q('''{[c;t;h]
- $[c[0]",{reverse "," sv 3 cut reverse string x}[n]," rows × ",
- {reverse "," sv 3 cut reverse string x}[count cols t]," columns";h]
- }''', console, self, ht).py().decode("utf-8")
+ ht = q('.pykx.util.html.rowcols', console, self, ht).py().decode("utf-8")
return ht
+ def astype(self, dtype, copy=True, errors='raise'):
+ raise AttributeError("Operation 'astype' not supported for PartitionedTable type")
+
+ def delete(self, columns=None, where=None, inplace=False):
+ raise AttributeError("Operation 'delete' not supported for PartitionedTable type")
+
+ def drop(self, labels=None, axis=0, index=None, columns=None, # noqa: C901
+ level=None, inplace=False, errors='raise'):
+ raise AttributeError("Operation 'drop' not supported for PartitionedTable type")
+
+ def get(self, key, default=None):
+ raise AttributeError("Operation 'get' not supported for PartitionedTable type")
+
+ def head(self, n: int = 5):
+ raise AttributeError("Operation 'head' not supported for PartitionedTable type")
+
+ def iloc(self):
+ raise AttributeError("Operation 'iloc' not supported for PartitionedTable type")
+
+ def loc(self):
+ raise AttributeError("Operation 'loc' not supported for PartitionedTable type")
+
+ def mode(self, axis: int = 0, numeric_only: bool = False, dropna: bool = True):
+ raise AttributeError("Operation 'mode' not supported for PartitionedTable type")
+
+ def nlargest(self, n, columns=None, keep='first'):
+ raise AttributeError("Operation 'nlargest' not supported for PartitionedTable type")
+
+ def nsmallest(self, n, columns=None, keep='first'):
+ raise AttributeError("Operation 'nsmallest' not supported for PartitionedTable type")
+
+ def sort_values(self, by=None, ascending=True):
+ raise AttributeError("Operation 'sort_values' not supported for PartitionedTable type")
+
+ def prod(self, axis=0, skipna=True, numeric_only=False, min_count=0):
+ raise AttributeError("Operation 'prod' not supported for PartitionedTable type")
+
+ def sample(self, n=None, frac=None, replace=False, weights=None,
+ random_state=None, axis=None, ignore_index=False):
+ raise AttributeError("Operation 'sample' not supported for PartitionedTable type")
+
+ def select_dtypes(self, include=None, exclude=None):
+ raise AttributeError("Operation 'select_dtypes' not supported for PartitionedTable type")
+
+ def sorted(self, cols: Union[List, str] = ''):
+ raise AttributeError("Operation 'sorted' not supported for PartitionedTable type")
+
+ def sum(self, axis=0, skipna=True, numeric_only=False, min_count=0):
+ raise AttributeError("Operation 'sum' not supported for PartitionedTable type")
+
+ def std(self, axis: int = 0, ddof: int = 1, numeric_only: bool = False):
+ raise AttributeError("Operation 'std' not supported for PartitionedTable type")
+
+ def tail(self, n: int = 5):
+ raise AttributeError("Operation 'tail' not supported for PartitionedTable type")
+
+ def unique(self, cols: Union[List, str] = ''):
+ raise AttributeError("Operation 'unique' not supported for PartitionedTable type")
+
+ def xbar(self, values):
+ raise AttributeError("Operation 'xbar' not supported for PartitionedTable type")
+
class Dictionary(Mapping):
"""Wrapper for q dictionaries, including regular dictionaries, and keyed tables."""
@@ -3453,18 +4230,10 @@ def _repr_html_(self):
cls:$[c[1] bool:
+ return any(x.is_null if x.is_atom else x.has_nulls for x in self._values._values)
+
def ungroup(self):
return q.ungroup(self)
@@ -3608,10 +4380,14 @@ def __setitem__(self, key, val):
self.loc[key] = val
def __iter__(self):
- yield from zip(*self._keys._values)
+ for x in zip(*self._values._values):
+ if len(x)==1:
+ yield list(x).pop(0)
+ else:
+ yield list(x)
def keys(self):
- return list(self)
+ return self._keys
def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None):
if licensed:
@@ -3690,7 +4466,6 @@ def insert(
row: Union[list, List],
match_schema: bool = False,
test_insert: bool = False,
- replace_self: bool = True,
inplace: bool = True
):
"""Helper function around `q`'s `insert` function which inserts a row or multiple rows into
@@ -3702,9 +4477,6 @@ def insert(
test_insert: Causes the function to modify a small local copy of the table and return
the modified example, this can only be used with embedded q and will not modify the
source tables contents.
- replace_self: `Deprecated` please use `inplace` keyword.
- Causes the underlying Table python object to update itself with the
- resulting Table after the insert.
inplace: Causes the underlying Table python object to update itself with the
resulting Table after the insert.
@@ -3727,10 +4499,7 @@ def insert(
q['.pykx.i.itab'] = self
q.insert('.pykx.i.itab', row, match_schema, test_insert)
res = q('.pykx.i.itab')
- if not replace_self:
- warnings.warn("Keyword 'replace_self' is deprecated please use 'inplace'",
- DeprecationWarning)
- if replace_self and inplace:
+ if inplace:
self.__dict__.update(res.__dict__)
q('delete itab from `.pykx.i')
return res
@@ -3740,7 +4509,6 @@ def upsert(
row: Union[list, List],
match_schema: bool = False,
test_insert: bool = False,
- replace_self: bool = True,
inplace: bool = True
):
"""Helper function around `q`'s `upsert` function which inserts a row or multiple rows into
@@ -3753,9 +4521,6 @@ def upsert(
test_insert: Causes the function to modify a small local copy of the table and return
the modified example, this can only be used with embedded q and will not modify the
source tables contents.
- replace_self: `Deprecated` please use `inplace` keyword.
- Causes the underlying Table python object to update itself with the
- resulting Table after the insert.
inplace: Causes the underlying Table python object to update itself with the
resulting Table after the insert.
@@ -3776,10 +4541,7 @@ def upsert(
```
"""
res = q.upsert(self, row, match_schema, test_insert)
- if not replace_self:
- warnings.warn("Keyword 'replace_self' is deprecated please use 'inplace'",
- DeprecationWarning)
- if replace_self and inplace:
+ if inplace:
self.__dict__.update(res.__dict__)
return res
@@ -3857,29 +4619,330 @@ def grouped(self, cols: Union[List, str] = ''):
else:
raise e
+ def sql(self, query, *args):
+ """Execute an SQL query against the supplied PyKX KeyedTable object.
+
+ This function expects the keyed table object to be supplied as a parameter
+ to the query, additional parameters can be supplied as positional
+ arguments.
+
+ Parameters:
+ query: A str object indicating the query to be executed, this
+ must contain a required argument $1 associated with the
+ table being queried.
+ *args: Any additional positional arguments required for query
+ execution.
+
+ Returns:
+ The queried table associated with the SQL statement
+
+ Examples:
+
+ Query a keyed table supplying no additional arguments
+
+ ```python
+ >>> tab = kx.Table(
+ ... data = {'x': [1, 2, 3], 'x1': ['a', 'b', 'a']}
+ ... ).set_index('x')
+ >>> tab.sql("select * from $1 where x1='a'")
+ pykx.Table(pykx.q('
+ x x1
+ ----
+ 1 a
+ 3 a
+ '))
+ ```
+
+ Query a keyed table supplying multiple arguments
+
+ ```python
+ >>> tab = kx.Table(
+ ... data = {'x': [1, 2, 3], 'x1': ['a', 'b', 'a']}
+ ... ).set_index('x')
+ >>> tab.sql("select * from $1 where x1=$2 and x=$3", 'a', 1)
+ pykx.Table(pykx.q('
+ x x1
+ ----
+ 1 a
+ '))
+ ```
+ """
+ if not isinstance(query, str):
+ raise TypeError('Supplied query is not of type "str"')
+ if '$1' not in query:
+ raise QError('Supplied query does not contain argument $1')
+ return q.sql(query, self, *args)
+
+ def select(self, columns=None, where=None, by=None, inplace=False):
+ """Apply a q style select statement on the supplied keyed table defined within the process.
+
+ This implementation follows the q functional select syntax with limitations on
+ structures supported for the various clauses a result of this.
+
+ Parameters:
+ columns: A dictionary mapping the name to be given to a column and the logic to be
+ applied in aggregation to that column both as strings.
+ where: Conditional filtering used to select subsets of the data on which by-clauses and
+ appropriate aggregations are to be applied.
+ by: A dictionary mapping the names to be assigned to the produced columns and the
+ columns whose results are used to construct the groups of the by clause.
+ inplace: Whether the result of an update is to be persisted. This operates for tables
+ referenced by name in q memory or general table objects
+ https://code.kx.com/q/basics/qsql/#result-and-side-effects.
+
+ Examples:
+
+ Define a q table in python, and give it a name in q memory
+
+ ```python
+ >>> import pykx as kx
+ >>> qtab = kx.Table(data = {
+ ... 'col1': kx.random.random(100, ['a', 'b', 'c']),
+ ... 'col2': kx.random.random(100, 1.0),
+ ... 'col3': kx.random.random(100, False),
+ ... 'col4': kx.random.random(100, 10.0)}
+ ... ).set_index('col1')
+ ```
+
+ Select all items in the table
+
+ ```python
+ >>> qtab.select()
+ ```
+
+ Filter table based on various where conditions
+
+ ```python
+ >>> qtab.select(where='col2<0.5')
+ ```
+
+ Retrieve statistics by grouping data on symbol columns
+
+ ```python
+ >>> qtab.select(columns={'maxCol2': 'max col2'}, by={'col1': 'col1'})
+ >>> qtab.select(columns={'avgCol2': 'avg col2', 'minCol4': 'min col4'}, by={'col1': 'col1'})
+ ```
+
+ Retrieve grouped statistics with restrictive where condition
+
+ ```python
+ >>> qtab.select(columns={'avgCol2': 'avg col2', 'minCol4': 'min col4'}, by={'col1': 'col1'}, where='col3=0b')
+ ```
+ """ # noqa: E501
+ return q.qsql.select(self, columns, where, by, inplace)
+
+ def exec(self, columns=None, where=None, by=None):
+ """
+ Apply a q style exec statement on the supplied PyKX KeyedTable.
+
+ This implementation follows the q functional exec syntax with limitations on structures
+ supported for the various clauses a result of this.
+
+ Parameters:
+ columns: A dictionary mapping the name to be given to a column and the logic to be
+ applied in aggregation to that column both as strings. A string defining a single
+ column to be retrieved from the table as a list.
+ where: Conditional filtering used to select subsets of the data on which by clauses and
+ appropriate aggregations are to be applied.
+ by: A dictionary mapping the names to be assigned to the produced columns and the
+ the columns whose results are used to construct the groups of the by clause.
+
+ Examples:
+
+ Define a PyKX KeyedTable
+
+ ```python
+ >>> qtab = kx.Table(data = {
+ ... 'col1': kx.random.random(100, ['a', 'b', 'c']),
+ ... 'col2': kx.random.random(100, 1.0),
+ ... 'col3': kx.random.random(100, False),
+ ... 'col4': kx.random.random(100, 10.0)}
+ ... ).set_index('col1')
+ ```
+
+ Select last item of the table
+
+ ```python
+ qtab.exec()
+ ```
+
+ Retrieve a column from the table as a list
+
+ ```python
+ qtab.exec('col3')
+ ```
+
+ Retrieve a set of columns from a table as a dictionary
+
+ ```python
+ qtab.exec({'symcol': 'col1'})
+ qtab.exec({'symcol': 'col1', 'boolcol': 'col3'})
+ ```
+
+ Filter columns from a table based on various where conditions
+
+ ```python
+ qtab.exec('col3', where='col1=`a')
+ qtab.exec({'symcol': 'col1', 'maxcol4': 'max col4'}, where=['col1=`a', 'col2<0.3'])
+ ```
+
+ Retrieve data grouping by data on symbol columns
+
+ ```python
+ qtab.exec('col2', by={'col1': 'col1'})
+ qtab.exec(columns={'maxCol2': 'max col2'}, by={'col1': 'col1'})
+ qtab.exec(columns={'avgCol2': 'avg col2', 'minCol4': 'min col4'}, by={'col1': 'col1'})
+ ```
+
+ Retrieve grouped statistics with restrictive where condition
+
+ ```python
+ qtab.exec(columns={'avgCol2': 'avg col2', 'minCol4': 'min col4'}, by={'col1': 'col1'}, where='col3=0b')
+ ```
+ """ # noqa: E501
+ return q.qsql.exec(self, columns, where, by)
+
+ def update(self, columns=None, where=None, by=None, inplace=False):
+ """
+ Apply a q style update statement on a PyKX KeyedTable.
+
+ This implementation follows the q functional update syntax with limitations on
+ structures supported for the various clauses a result of this.
+
+ Parameters:
+ columns: A dictionary mapping the name of a column present in the table or one to be
+ added to the contents which are to be added to the column, this content can be a
+ string denoting q data or the equivalent Python data.
+ where: Conditional filtering used to select subsets of the data on which by-clauses and
+ appropriate aggregations are to be applied.
+ by: A dictionary mapping the names to be assigned to the produced columns and the
+ columns whose results are used to construct the groups of the by clause.
+ inplace: Whether the result of an update is to be persisted. This operates for tables
+ referenced by name in q memory or general table objects
+ https://code.kx.com/q/basics/qsql/#result-and-side-effects.
+
+ Examples:
+
+ Define a q table in python and named in q memory
+
+ ```python
+ >>> qtab = kx.Table(data={
+ ... 'name': ['tom', 'dick', 'harry'],
+ ... 'age': [28, 29, 35],
+ ... 'hair': ['fair', 'dark', 'fair'],
+ ... 'eye': ['green', 'brown', 'gray']}
+ ... )
+ ```
+
+ Update all the contents of a column
+
+ ```python
+ >>> qtab.update({'eye': '`blue`brown`green'})
+ ```
+
+ Update the content of a column restricting scope using a where clause
+
+ ```python
+ >>> qtab.update({'eye': ['blue']}, where='hair=`fair')
+ ```
+
+ Define a q table suitable for by clause example
+
+ ```python
+ >>> bytab = kx.Table(data={
+ ... 'name': kx.random.random(100, ['nut', 'bolt', 'screw']),
+ ... 'color': kx.random.random(100, ['red', 'green', 'blue']),
+ ... 'weight': 0.5 * kx.random.random(100, 20),
+ ... 'city': kx.random.random(100, ['london', 'paris', 'rome'])}
+ ... ).set_index('city')
+ ```
+
+ Apply an update grouping based on a by phrase
+
+ ```python
+ >>> bytab.update({'weight': 'avg weight'}, by={'city': 'city'})
+ ```
+
+ Apply an update grouping based on a by phrase and persist the result using the inplace keyword
+
+ ```python
+ >>> bytab.update(columns={'weight': 'avg weight'}, by={'city': 'city'}, inplace=True)
+ ```
+ """ # noqa: E501
+ return q.qsql.update(self, columns, where, by, inplace)
+
+ def delete(self, columns=None, where=None, inplace=False):
+ """
+ Apply a q style delete statement a PyKX keyed table defined.
+
+ This implementation follows the q functional delete syntax with limitations on
+ structures supported for the various clauses a result of this.
+
+ Parameters:
+ columns: Denotes the columns to be deleted from a table.
+ where: Conditional filtering used to select subsets of the data which are to be
+ deleted from the table.
+ inplace: Whether the result of an update is to be persisted. This operates for tables
+ referenced by name in q memory or general table objects
+ https://code.kx.com/q/basics/qsql/#result-and-side-effects.
+
+ Examples:
+
+ Define a PyKX Table against which to run the examples
+
+ ```python
+ >>> qtab = kx.Table(data = {
+ ... 'name': ['tom', 'dick', 'harry'],
+ ... 'age': [28, 29, 35],
+ ... 'hair': ['fair', 'dark', 'fair'],
+ ... 'eye': ['green', 'brown', 'gray']}
+ ... ).set_index('name')
+ ```
+
+ Delete all the contents of the table
+
+ ```python
+ >>> qtab.delete()
+ ```
+
+ Delete single and multiple columns from the table
+
+ ```python
+ >>> qtab.delete('age')
+ >>> qtab.delete(['age', 'eye'])
+ ```
+
+ Delete rows of the dataset based on where condition
+
+ ```python
+ >>> qtab.delete(where='hair=`fair')
+ >>> qtab.delete(where=['hair=`fair', 'age=28'])
+ ```
+
+ Delete a column from the dataset named in q memory and persist the result using the
+ inplace keyword
+
+ ```python
+ >>> qtab.delete('age', inplace=True)
+ ```
+ """ # noqa: E501
+ return q.qsql.delete(self, columns, where, inplace)
+
def _repr_html_(self):
if not licensed:
return self.__repr__()
keys = q('{cols key x}', self).py()
console = q.system.console_size.py()
+ if detect_bad_columns(q('0!', self)):
+ return self.__repr__()
qtab=q('''{[c;t]
- n:count t:t;
- cls:$[c[1]c[0];?[t;enlist(=;`i;(last;`i));0b;{x!x}cls];()];
- h
+ n:count t;
+ cls:{x!x}$[c[1]",{reverse "," sv 3 cut reverse string x}[n]," rows × ",
- {reverse "," sv 3 cut reverse string x}[count cols t]," columns";h]
- }''', console, self, ht).py().decode("utf-8")
+ ht = q('.pykx.util.html.rowcols', console, self, ht).py().decode("utf-8")
return ht
@@ -3928,13 +4987,8 @@ def apply(self, func, *args, **kwargs):
)
def __getitem__(self, item):
- keys = q.keys(self.tab).py()
- if isinstance(item, list):
- keys.extend(item)
- else:
- keys.append(item)
return GroupbyTable(
- q(f'{len(q.keys(self.tab))}!', self.tab[keys]),
+ self.tab[item],
True,
False,
as_vector=item
@@ -3955,6 +5009,13 @@ class Function(Atom):
[Refer to chapter 6 of Q for Mortals](https://code.kx.com/q4m3/6_Functions/) for more
information about q functions.
"""
+ _name = ''
+
+ @property
+ def __doc__(self):
+ if self._name != '':
+ return help.qhelp(self._name)
+
def __bool__(self):
return True
@@ -4030,6 +5091,12 @@ def each_left(self):
vs = each_left
+bs4_spec = importlib.util.find_spec("bs4")
+md2_spec = importlib.util.find_spec("markdown2")
+if bs4_spec is not None and md2_spec is not None:
+ Function.scan.__doc__ = help.qhelp('scan')
+
+
class Lambda(Function):
"""Wrapper for q lambda functions.
@@ -4041,11 +5108,17 @@ class Lambda(Function):
arguments, using the names of the parameters from q.
"""
t = 100
+ _name = ''
@property
def __name__(self):
return 'pykx.Lambda'
+ @property
+ def __doc__(self):
+ if self._name != '':
+ return help.qhelp(self._name)
+
@cached_property
def params(self):
# Strip "PyKXParam" from all param names if it is a prefix for all
@@ -4064,6 +5137,12 @@ class UnaryPrimitive(Function):
[`pykx.Identity`][]
"""
t = 101
+ _name = ''
+
+ @property
+ def __doc__(self):
+ if self._name != '':
+ return help.qhelp(self._name)
@property
def __name__(self):
@@ -4154,12 +5233,33 @@ class Operator(Function):
reference page: https://code.kx.com/q/ref/#operators
"""
t = 102
+ _name = ''
+
+ @property
+ def __doc__(self):
+ if self._name != '':
+ return help.qhelp(self._name)
def __call__(self, *args, **kwargs):
if kwargs:
raise TypeError('Cannot use kwargs on an operator')
return super().__call__(*args, **kwargs)
+ def __new__(self, op): # noqa:B902
+ if not isinstance(op, str):
+ raise QError('Supplied operator must be a str')
+ self.repr = op
+ if licensed:
+ gen_op = q(op)
+ if gen_op.t != 102:
+ raise QError('Generation of operator did not return correct type')
+ return q(op)
+ else:
+ raise QError('Unsupported operation in unlicensed mode')
+
+ def __init__(self, op):
+ pass
+
class Iterator(Function):
"""Wrappers for q iterator functions.
@@ -4170,6 +5270,11 @@ class Iterator(Function):
"""
t = 103
+ @property
+ def __doc__(self):
+ if self._name != '':
+ return help.qhelp(self._name)
+
def __call__(self, *args, **kwargs):
if kwargs:
raise TypeError('Cannot use kwargs on an iterator')
@@ -4190,6 +5295,12 @@ class Projection(Function):
[projection null][`pykx.ProjectionNull`]
"""
t = 104
+ _name = ''
+
+ @property
+ def __doc__(self):
+ if self._name != '':
+ return help.qhelp(self._name)
@cached_property
def params(self) -> Tuple[SymbolAtom]:
@@ -4252,6 +5363,12 @@ class Composition(Function):
referred to as "point-free" or "tacit" programming.
"""
t = 105
+ _name = ''
+
+ @property
+ def __doc__(self):
+ if self._name != '':
+ return help.qhelp(self._name)
@property
def __name__(self):
@@ -4336,10 +5453,17 @@ class AppliedIterator(Function):
themselves are of the type [`pykx.Iterator`][], but when applied to a function a new type
(which is a subclass of `AppliedIterator`) is created depending on what iterator was used.
"""
+ _name = ''
+
@property
def __name__(self):
return 'pykx.AppliedIterator'
+ @property
+ def __doc__(self):
+ if self._name != '':
+ return help.qhelp(self._name)
+
@cached_property
def params(self):
return self.func.params
@@ -4364,26 +5488,56 @@ def __call__(self, *args, **kwargs):
class Each(AppliedIterator):
"""Wrapper for functions with the 'each' iterator applied to them."""
t = 106
+ _name = ''
+
+ @property
+ def __doc__(self):
+ if self._name != '':
+ return help.qhelp(self._name)
class Over(AppliedIterator):
"""Wrapper for functions with the 'over' iterator applied to them."""
t = 107
+ _name = ''
+
+ @property
+ def __doc__(self):
+ if self._name != '':
+ return help.qhelp(self._name)
class Scan(AppliedIterator):
"""Wrapper for functions with the 'scan' iterator applied to them."""
t = 108
+ _name = ''
+
+ @property
+ def __doc__(self):
+ if self._name != '':
+ return help.qhelp(self._name)
class EachPrior(AppliedIterator):
"""Wrapper for functions with the 'each-prior' iterator applied to them."""
t = 109
+ _name = ''
+
+ @property
+ def __doc__(self):
+ if self._name != '':
+ return help.qhelp(self._name)
class EachRight(AppliedIterator):
"""Wrapper for functions with the 'each-right' iterator applied to them."""
t = 110
+ _name = ''
+
+ @property
+ def __doc__(self):
+ if self._name != '':
+ return help.qhelp(self._name)
class EachLeft(AppliedIterator):
@@ -4487,6 +5641,5158 @@ def with_execution_ctx(self, execution_ctx) -> 'SymbolicFunction':
return x
+class ParseTree:
+ """Special wrapper for a list which will be treated as a ParseTree.
+ For use with the Query API
+ """
+ def __init__(self, tree):
+ if isinstance(tree, str):
+ tree = q.parse(CharVector(tree))
+ elif isinstance(tree, (QueryPhrase, Column)):
+ tree = tree.phrase
+ elif isinstance(tree, Variable):
+ tree = tree._name
+ self._tree = tree
+
+ def __repr__(self):
+ preamble = f'pykx.{type(self).__name__}'
+ return f"{preamble}({self._tree.__repr__()})"
+
+ def enlist(self):
+ if isinstance(self._tree, K):
+ self._tree = q.enlist(self._tree)
+ else:
+ self._tree = [self._tree]
+ return self
+
+ def first(self):
+ self._tree = q.first(self._tree)
+ return self
+
+ def eval(self):
+ return q.eval(self._tree)
+
+ def reval(self):
+ return q.reval(self._tree)
+
+ def append(self, other):
+ if isinstance(other, ParseTree):
+ self._tree.append(other._tree)
+ else:
+ self._tree.append(other)
+
+ def extend(self, other):
+ if isinstance(other, ParseTree):
+ self._tree.extend(other._tree)
+ else:
+ self._tree.extend(other)
+
+ @staticmethod
+ def table(contents):
+ """Helper function to create a ParseTree for the creation of a Table.
+ If a dict is passed creates: `(flip;(!;contents.keys();enlist,contents.values()))`
+ Else creates: `(flip;(!;contents;enlist,contents))`
+ For use with the Query API, particauly for `fby` queries.
+ """
+ if isinstance(contents, (Dictionary, dict)):
+ names = list(contents.keys())
+ values = list(contents.values())
+ else:
+ names = contents
+ values = contents
+ return ParseTree([q.flip, [q('!'), [names], ParseTree.list(values)]])
+
+ @staticmethod
+ def list(values):
+ """Helper function to create a ParseTree for the creation of a List.
+ Creates: `(enlist;value0;value1...valueN)`
+ """
+ pt = [q.enlist]
+ pt.extend(values)
+ return ParseTree(pt)
+
+ @staticmethod
+ def value(contents, eval=False):
+ """Helper function to create a ParseTree which calls `value` on it's contents.
+ Creates: ``(`.q.value;contents)``"""
+ if eval and licensed:
+ return q(CharVector(contents))
+ else:
+ return ParseTree(['.q.value', CharVector(contents)])
+
+ @staticmethod
+ def fby(by, aggregate, data, by_table=False, data_table=False):
+ """Helper function to create a ParseTree of an `fby` call
+ Creates: `(fby;(enlist;aggregate;data);by)`
+ `data_table` and `by_table` can be set to True to create Table ParseTree of their input"""
+ if isinstance(aggregate, str):
+ aggregate = q.value(CharVector(aggregate))
+ if data_table or isinstance(data, (dict, Dictionary)):
+ data = ParseTree.table(data)
+ if by_table or isinstance(by, (dict, Dictionary)):
+ by = ParseTree.table(by)
+ return ParseTree([q.fby, ParseTree.list([aggregate, data]), by])
+
+
+class Variable:
+ """Helper class for passing Variable names through the Query API"""
+ def __init__(self, name):
+ self._name = name
+
+ def __repr__(self):
+ preamble = f'pykx.{type(self).__name__}'
+ return f"{preamble}('{self._name}')"
+
+ def get(self):
+ return q.get(self._name)
+
+ def value(self):
+ return q.value(self._name)
+
+ def exists(self):
+ q('{@[{get x;1b};x;{0b}]}', self._name)
+
+
+class Column:
+ """Helper class creating queries for the Query API"""
+ def __init__(self, column=None, name=None, value=None, is_tree=False):
+ if not licensed:
+ raise LicenseException("use kx.Column objects")
+ if name is not None:
+ self._name = name
+ else:
+ self._name = column
+ if value is not None:
+ self._value = value
+ else:
+ self._value = column
+ self._is_tree = is_tree
+
+ def __repr__(self):
+ preamble = f'pykx.{type(self).__name__}'
+ return f"{preamble}(name='{self._name}', value={type(self._value)})"
+
+ """Function for building up a function call off a Column"""
+ def call(self, op, *other, iterator=None, col_arg_ind=0, project_args=None):
+ params = []
+ for param in other:
+ if isinstance(param, Column):
+ param = param._value
+ elif isinstance(param, Variable):
+ param = param._name
+ else:
+ param = toq(param)
+ if (
+ isinstance(param, SymbolAtom)
+ or isinstance(param, SymbolVector)
+ ):
+ param = [param]
+ params.append(param)
+ params.insert(col_arg_ind, self._value)
+ if isinstance(op, (str, bytes, CharVector)):
+ op = op.encode() if isinstance(op, str) else op
+ cmd = [q(op)]
+ else:
+ cmd=[op]
+ if project_args is not None:
+ project=[]
+ for i in range(len(params)):
+ project.append(params[i] if i in project_args else q("(value(1;))2"))
+ cmd.extend(project)
+ cmd = [cmd]
+ for i in sorted(project_args, reverse=True):
+ params.pop(i)
+ cmd.extend(params)
+
+ if iterator is not None:
+ id = {'/': '(/)',
+ '\\': '(\\)',
+ '/:': '(/:)',
+ '\\:': '(\\:)',
+ '\\:/:': ['(\\:)', '(/:)'],
+ '/:\\:': ['(/:)', '(\\:)'],
+ }
+ if iterator in ['each', 'peach', 'over', 'scan', 'prior']:
+ i = [ParseTree.value(iterator, eval=True)]
+ i.extend(cmd)
+ elif iterator in ["':", "'"]:
+ i = [[ParseTree.value(iterator, eval=True), cmd[0]]]
+ i.extend(cmd[1:])
+ elif iterator in ['/:', '\:', '/', '\\']:
+ i = [[ParseTree.value(id[iterator], eval=True), cmd[0]]]
+ i.extend(cmd[1:])
+ elif iterator in ['/:\:', '\:/:']:
+ iterator = id[iterator]
+ i = [[ParseTree.value(iterator[1], eval=True),
+ [ParseTree.value(iterator[0], eval=True), cmd[0]]]]
+ i.extend(cmd[1:])
+ else:
+ i = iterator
+ i.extend(cmd)
+ cmd = i
+ self._value = cmd
+ return self
+
+ def __add__(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ return self.call('+', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ __radd__ = __add__
+
+ def __sub__(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ return self.call('-', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def __rsub__(self, other, iterator=None, col_arg_ind=1, project_args=None):
+ return self.call('-', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def __mul__(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ return self.call('*', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ __rmul__ = __mul__
+
+ def __floordiv__(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ return self.call('div', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def __rfloordiv__(self, other, iterator=None, col_arg_ind=1, project_args=None):
+ return self.call('div', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def __truediv__(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ return self.call('%', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def __rtruediv__(self, other, iterator=None, col_arg_ind=1, project_args=None):
+ return self.call('%', other, col_arg_ind=col_arg_ind)
+
+ def __mod__(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ return self.call('mod', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def __pow__(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ return self.call('xexp', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def __eq__(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ return self.call('=', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def __ne__(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ return self.call('<>', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def __gt__(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ return self.call('>', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def __ge__(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ return self.call('>=', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def __lt__(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ return self.call('<', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def __le__(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ return self.call('<=', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def __pos__(self, iterator=None):
+ return self.call('abs', iterator=iterator)
+
+ def __abs__(self, iterator=None):
+ return self.call('abs', iterator=iterator)
+
+ def __neg__(self, iterator=None):
+ return self.call('neg', iterator=iterator)
+
+ def __floor__(self, iterator=None):
+ return self.call('floor', iterator=iterator)
+
+ def __ceil__(self, iterator=None):
+ return self.call('ceiling', iterator=iterator)
+
+ def __and__(self, other):
+ wp = QueryPhrase(self)
+ if isinstance(other, Column):
+ wp.append(other)
+ elif isinstance(other, QueryPhrase):
+ wp.extend(other)
+ else:
+ raise TypeError(
+ f"Supplied object type '{type(other)}' cannot `&` off a `pykx.Column`.")
+ return wp
+
+ def __or__(self, other):
+ if isinstance(other, Column):
+ other = other._value
+ elif isinstance(other, QueryPhrase):
+ raise TypeError("Cannot | off a Column with a QueryPhrase")
+ elif isinstance(other, ParseTree):
+ other = other.tree
+ self._value = [q(b'or'), self._value, other]
+ return self
+
+ def abs(self, iterator=None):
+ """
+ Return the absolute value of items in a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the absolute value for all elements in a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], 2, -1]
+ ... })
+ >>> tab.select(kx.Column('a').abs())
+ pykx.Table(pykx.q('
+ a
+ -
+ 1
+ 1
+ 0
+ '))
+ ```
+ """
+ return self.call('abs', iterator=iterator)
+
+ def acos(self, iterator=None):
+ """
+ Calculate arccos for a column or items in a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the arccos value for all elements in a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('a').acos())
+ pykx.Table(pykx.q('
+ a
+ --------
+ 0
+ 3.141593
+ 1.570796
+ '))
+ ```
+
+ Calculate the arccos value for each row of a specified column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('b').acos(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ------------
+ 3.141593 0
+ 1.570796 0
+ 0
+ '))
+ ```
+ """
+ return self.call('acos', iterator=iterator)
+
+ def asc(self, iterator=None):
+ """
+ Sort the values within a column in ascending order
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Sort the values in a column ascending
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('a').asc())
+ pykx.Table(pykx.q('
+ a
+ --
+ -1
+ 0
+ 1
+ '))
+ ```
+
+ Sort each row in a column ascending:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [3, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('b').asc(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ------
+ -1 1 2
+ 1 2 3
+ 1 2 3
+ '))
+ ```
+ """
+ return self.call('asc', iterator=iterator)
+
+ def asin(self, iterator=None):
+ """
+ Calculate arcsin for a column or items in a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the arcsin value for all elements in a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('a').asin())
+ pykx.Table(pykx.q('
+ a
+ ---------
+ 1.570796
+ -1.570796
+ 0
+ '))
+ ```
+
+ Calculate the arcsin value for each row of a specified column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('b').asin(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ---------------------------
+ -1.570796 1.570796
+ 0 1.570796
+ 1.570796
+ '))
+ ```
+ """
+ return self.call('asin', iterator=iterator)
+
+ def atan(self, iterator=None):
+ """
+ Calculate arctan for a column or items in a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the arctan value for all elements in a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('a').atan())
+ pykx.Table(pykx.q('
+ a
+ ----------
+ 0.7853982
+ -0.7853982
+ 0
+ '))
+ ```
+
+ Calculate the arctan value for each row of a specified column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('b').atan(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ------------------------------
+ -0.7853982 1.107149 0.7853982
+ 0 0.7853982 1.107149
+ 0.7853982 1.107149 1.249046
+ '))
+ ```
+ """
+ return self.call('atan', iterator=iterator)
+
+ def avg(self, iterator=None):
+ """
+ Calculate the average value for a column or items in a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the value for all elements in a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0.5],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('a').avg())
+ pykx.Table(pykx.q('
+ a
+ ---------
+ 0.1666667
+ '))
+ ```
+
+ Calculate average value for each row of a specified column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('b').avg(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ---------
+ 0.6666667
+ 1
+ 2
+ '))
+ ```
+ """
+ return self.call('avg', iterator=iterator)
+
+ def avgs(self, iterator=None):
+ """
+ Calculate a running average value for a column or items in a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the running average across all elements in a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0.5],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('a').avgs())
+ pykx.Table(pykx.q('
+ a
+ ---------
+ 1
+ 0
+ 0.1666667
+ '))
+ ```
+
+ Calculate average value for each row of a specified column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('b').avgs(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ----------------
+ -1 0.5 0.6666667
+ 0 0.5 1
+ 1 1.5 2
+ '))
+ ```
+ """
+ return self.call('avgs', iterator=iterator)
+
+ def ceiling(self, iterator=None):
+ """
+ Calculate a nearest integer greater than or equal to items in a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the ceiling of all elements in a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [0.1, 0.4, 3.6],
+ ... 'b': [[-1.1, 2.2, 1.6], [0.3, 1.4, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('a').ceiling())
+ pykx.Table(pykx.q('
+ a
+ -
+ 1
+ 1
+ 4
+ '))
+ ```
+
+ Calculate the ceiling for all values in each row of a specified column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [0.1, 0.4, 3.6],
+ ... 'b': [[-1.1, 2.2, 1.6], [0.3, 1.4, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('b').ceiling(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ------
+ -1 3 2
+ 1 2 2
+ 1 2 3
+ '))
+ ```
+ """
+ return self.call('ceiling', iterator=iterator)
+
+ def cor(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ """
+ Calculate the correlation between a column and one of:
+
+ - Another column
+ - A vector of equal length to the column
+ - A PyKX variable in q memory
+
+ Parameters:
+ other: The second column or variable (Python/q) to be used
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Calculate the correlation between two columns:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> tab.exec(kx.Column('a').cor(kx.Column('b')))
+ pykx.FloatAtom(pykx.q('-0.9946109'))
+ ```
+
+ Calculate the correlation between a column and variable in q memory:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> kx.q('custom_var:100?1f')
+ >>> tab.exec(kx.Column('a').cor(kx.Variable('custom_var')))
+ pykx.FloatAtom(pykx.q('-0.1670133'))
+ ```
+
+ Calculate the correlation between a column and a Python variable:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> kx.q('custom_var:100?1f')
+ >>> tab.exec(kx.Column('a').cor(kx.random.random(100, 10.0)))
+ pykx.FloatAtom(pykx.q('-0.01448725'))
+ ```
+ """
+ return self.call('cor', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def cos(self, iterator=None):
+ """
+ Calculate cosine for a column or items in a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the cosine value for all elements in a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('a').cos())
+ pykx.Table(pykx.q('
+ a
+ ---------
+ 0.5403023
+ 0.5403023
+ 1
+ '))
+ ```
+
+ Calculate the cosine value for each row of a specified column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('b').cos(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ -------------------------------
+ 0.5403023 -0.4161468 0.5403023
+ 1 0.5403023 -0.4161468
+ 0.5403023 -0.4161468 -0.9899925
+ '))
+ ```
+ """
+ return self.call('cos', iterator=iterator)
+
+ def count(self, iterator=None):
+ """
+ Calculate the count of the number of elements in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the count of the number of elements in a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.exec(kx.Column('a').count())
+ pykx.LongAtom(pykx.q('3'))
+ ```
+
+ Count the number of elements in each row of a specified column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 2], 1]
+ ... })
+ >>> tab.exec(kx.Column('b').count(iterator='each')))
+ pykx.LongVector(pykx.q('3 2 1'))
+ ```
+ """
+ return self.call('count', iterator=iterator)
+
+ def cov(self, other, sample=False, iterator=None, col_arg_ind=0, project_args=None):
+ """
+ Calculate the covariance/sample covariance between a column and one of:
+
+ - Another column
+ - A vector of equal length to the column
+ - A PyKX variable in q memory
+
+ Parameters:
+ other: The second column or variable (Python/q) to be used
+ sample: Should calculations of covariance return the
+ sample covariance (set True) covariance (set False {default})
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Calculate the covariance between two columns:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> tab.exec(kx.Column('a').cov(kx.Column('b')))
+ pykx.FloatAtom(pykx.q('-7.87451'))
+ ```
+
+ Calculate the sample covariance between a column and variable in q memory:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> kx.q('custom_var:100?1f')
+ >>> tab.exec(kx.Column('a').cov(kx.Variable('custom_var'), sample=True))
+ pykx.FloatAtom(pykx.q('-0.1670133'))
+ ```
+
+ Calculate the covariance between a column and a Python object:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> tab.exec(kx.Column('a').cov(kx.random.random(100, 10.0)))
+ pykx.FloatAtom(pykx.q('-0.1093116'))
+ ```
+ """
+ fn = 'scov' if sample else 'cov'
+ return self.call(fn, other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def cross(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ """
+ Return the cross product (all possible combinations) between a column and:
+
+ - Another column
+ - A vector of items
+ - A PyKX variable in q memory
+
+ Parameters:
+ other: The second column or variable (Python/q) to be used
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Generate the cross product of all values in two columns:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> tab.select(kx.Column('a').cross(kx.Column('b')))
+ pykx.Table(pykx.q('
+ a
+ ------------------
+ 0.1392076 9.996082
+ 0.1392076 9.797281
+ 0.1392076 9.796094
+ ..
+ '))
+ ```
+
+ Calculate the cross product between a column and list in in q memory:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> kx.q('custom_var:til 3')
+ >>> tab.select(kx.Column('a').cross(kx.Variable('custom_var')))
+ pykx.Table(pykx.q('
+ a
+ -----------
+ 0.1392076 0
+ 0.1392076 1
+ 0.1392076 2
+ 0.2451336 0
+ ..
+ '))
+ ```
+
+ Calculate the cross product between a column and a Python object:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> tab.select(kx.Column('a').cross([1, 2, 3]))
+ pykx.Table(pykx.q('
+ a
+ -----------
+ 0.1392076 1
+ 0.1392076 2
+ 0.1392076 3
+ 0.2451336 1
+ ..
+ '))
+ ```
+ """
+ return self.call('cross', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def deltas(self, iterator=None):
+ """
+ Calculate the difference between consecutive elements in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the difference between consecutive values in a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.exec(kx.Column('a').deltas())
+ pykx.LongVector(pykx.q('1 -2 1'))
+ ```
+
+ Calculate the difference between consecutive values in each row of a specified column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('b').deltas(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ -------
+ -1 3 -1
+ 0 1 1
+ 1 1 1
+ '))
+ ```
+ """
+ return self.call('deltas', iterator=iterator)
+
+ def desc(self, iterator=None):
+ """
+ Sort the values within a column in descending order
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Sort the values in a column descending
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('a').desc())
+ pykx.Table(pykx.q('
+ a
+ --
+ 1
+ 0
+ -1
+ '))
+ ```
+
+ Sort each row in a column descending:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [3, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('b').desc(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ------
+ 2 1 -2
+ 3 2 1
+ 3 2 1
+ '))
+ ```
+ """
+ return self.call('desc', iterator=iterator)
+
+ def dev(self, sample=False, iterator=None):
+ """
+ Calculate the standard deviation or sample standard deviation
+ for items in a column or rows in a column
+
+ Parameters:
+ sample: Should calculations of standard deviation return the
+ square root of the sample variance (set True) or the square
+ root of the variance (set False {default})
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the standard deviation of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 10.0),
+ ... 'b': kx.random.random([100, 3], 10.0)
+ ... })
+ >>> tab.exec(kx.Column('a').dev())
+ pykx.FloatAtom(pykx.q('2.749494'))
+ ```
+
+ Calculate the sample standard deviation for each row in a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 10.0),
+ ... 'b': kx.random.random([100, 3], 10.0)
+ ... })
+ >>> tab.select(kx.Column('b').dev(sample=True, iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ---------
+ 3.068428
+ 3.832719
+ 2.032402
+ 2.553458
+ 2.527216
+ 1.497015
+ ..
+ '))
+ ```
+ """
+ fn = 'sdev' if sample else 'dev'
+ return self.call(fn, iterator=iterator)
+
+ def differ(self, iterator=None):
+ """
+ Find locations where items in a column or rows in a column
+ change value from one item to the next
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Determine if consecutive rows in a column are different values
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 2),
+ ... 'b': kx.random.random([100, 3], 2)
+ ... })
+ >>> tab.select(kx.Column('a').differ())
+ pykx.Table(pykx.q('
+ a
+ -
+ 1
+ 1
+ 0
+ 1
+ ..
+ '))
+ ```
+
+ Determine if consecutive values in vectors within a row have different values
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 2),
+ ... 'b': kx.random.random([100, 3], 2)
+ ... })
+ >>> tab.select(kx.Column('b').differ(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ----
+ 110b
+ 101b
+ 110b
+ 110b
+ ..
+ '))
+ ```
+ """
+ return self.call('differ', iterator=iterator)
+
+ def distinct(self, iterator=None):
+ """
+ Find unique items in a column or rows in a column
+ change value from one item to the next
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Find all unique items in a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.exec(kx.Column('a').distinct())
+ pykx.LongVector(pykx.q('0 1 2 3 4'))
+ ```
+
+ Find all unique items in each row of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.select(kx.Column('b').distinct(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ -----
+ 0 2 3
+ 1 3 4
+ 4 2
+ 1 4
+ 1 0
+ ,2
+ ..
+ '))
+ ```
+ """
+ return self.call('distinct', iterator=iterator)
+
+ def div(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ """
+ Return the greatest whole number divisor that does not exceed x%y between a column and:
+
+ - Another column
+ - An integer
+ - A vector of items equal in length to the column
+ - A PyKX variable in q memory
+
+ Parameters:
+ other: The second column or variable (Python/q) to be used
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Calculate the greatest whole number divisor between two columns:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 5)),
+ ... 'b': kx.q.desc(kx.random.random(100, 5))
+ ... })
+ >>> tab.select(kx.Column('a').div(kx.Column('b')))
+ pykx.Table(pykx.q('
+ a
+ -
+ 0
+ 0
+ 0
+ 1
+ ..
+ '))
+ ```
+
+ Calculate the greatest whole number divisor between a column and an integer:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 5)),
+ ... 'b': kx.q.desc(kx.random.random(100, 5))
+ ... })
+ >>> tab.select(kx.Column('a').div(2))
+ pykx.Table(pykx.q('
+ a
+ -
+ 1
+ 0
+ 2
+ 0
+ ..
+ '))
+ ```
+ """
+ return self.call('div', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def exp(self, iterator=None):
+ """
+ Raise the expentional constant `e` to a power determined by the elements of a column
+ or rows of the column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Raise the exponential constant `e` to the power of values within a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.exec(kx.Column('a').exp())
+ pykx.FloatVector(pykx.q('1 2.718282 7.389056 20.08554..'))
+ ```
+
+ Raise the exponential constant `e` to the power of values within each row of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.select(kx.Column('b').exp(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ --------------------------
+ 1 7.389056 20.08554
+ 2.718282 20.08554 54.59815
+ 54.59815 7.389056 7.389056
+ 2.718282 54.59815 2.718282
+ ..
+ '))
+ ```
+ """
+ return self.call('exp', iterator=iterator)
+
+ @staticmethod
+ def fby(by, aggregate, data, by_table=False, data_table=False):
+ """Helper function to create an `fby` inside a Column object
+ Creates: `(fby;(enlist;aggregate;data);by)`
+ `data_table` and `by_table` can be set to True to create Table ParseTree of their input"""
+ if by_table or isinstance(by, (dict, Dictionary)):
+ if isinstance(by, dict):
+ name = list(by.keys())[0]
+ elif isinstance(by, Dictionary):
+ name = by.keys()[0]
+ else:
+ name = by[0]
+ elif isinstance(by, Column):
+ name = by._name
+ by = by._value
+ elif isinstance(by, QueryPhrase):
+ name = by._names[0]
+ by = by.to_dict()
+ else:
+ name = by
+ if isinstance(data, QueryPhrase):
+ data = data.to_dict()
+ pt = ParseTree.fby(by, aggregate, data, by_table, data_table)
+ return Column(name=name, value=pt)
+
+ def fills(self, iterator=None):
+ """
+ Replace null values with the preceding non-null value within a column or
+ vector within rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Replace nulls in a column with preceding null values
+
+ ```python
+ >>> import pykx as kx
+ >>> value_list = kx.q.til(2)
+ >>> value_list.append(kx.LongAtom.null)
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, value_list),
+ ... 'b': kx.random.random([100, 3], value_list)
+ ... })
+ >>> tab.select(kx.Column('a').fills())
+ pykx.Table(pykx.q('
+ a
+ -
+
+ 0
+ 0
+ 1
+ 1
+ ..
+ '))
+ ```
+
+ Replace null values within each row of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> value_list = kx.q.til(2)
+ >>> value_list.append(kx.LongAtom.null)
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, value_list),
+ ... 'b': kx.random.random([100, 3], value_list)
+ ... })
+ >>> tab.select(kx.Column('b').fills(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ -----
+ 1 1 0
+ 0 1 1
+ 1 1 1
+ 1 1 1
+ 0 1 1
+ ..
+ '))
+ ```
+ """
+ return self.call('fills', iterator=iterator)
+
+ def first(self, iterator=None):
+ """
+ Retrieve the first item of a column or first item of each row
+ in a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Retrieve the first element of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.exec(kx.Column('a').first())
+ pykx.LongAtom(pykx.q('1'))
+ ```
+
+ Retrieve the first element of each row of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [3, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('b').first(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ --
+ -1
+ 0
+ 1
+ '))
+ ```
+ """
+ return self.call('first', iterator=iterator)
+
+ def floor(self, iterator=None):
+ """
+ Calculate a nearest integer less than or equal to items in a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the floor of all elements in a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [0.1, 0.4, 3.6],
+ ... 'b': [[-1.1, 2.2, 1.6], [0.3, 1.4, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('a').floor())
+ pykx.Table(pykx.q('
+ a
+ -
+ 0
+ 0
+ 3
+ '))
+ ```
+
+ Calculate the floor for all values in each row of a specified column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [0.1, 0.4, 3.6],
+ ... 'b': [[-1.1, 2.2, 1.6], [0.3, 1.4, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('b').floor(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ------
+ -2 2 1
+ 0 1 2
+ 1 2 3
+ '))
+ ```
+ """
+ return self.call('floor', iterator=iterator)
+
+ def null(self, iterator=None):
+ """
+ Determine if a value in a column or row of a column is a null value
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Find null values within a column
+
+ ```python
+ >>> import pykx as kx
+ >>> value_list = kx.q.til(2)
+ >>> value_list.append(kx.LongAtom.null)
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, value_list),
+ ... 'b': kx.random.random([100, 3], value_list)
+ ... })
+ >>> tab.select(kx.Column('a').null())
+ pykx.Table(pykx.q('
+ a
+ -
+ 1
+ 0
+ 0
+ 0
+ 1
+ ..
+ '))
+ ```
+
+ Calculate the floor for all values in each row of a specified column:
+
+ ```python
+ >>> import pykx as kx
+ >>> value_list = kx.q.til(2)
+ >>> value_list.append(kx.LongAtom.null)
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, value_list),
+ ... 'b': kx.random.random([100, 3], value_list)
+ ... })
+ >>> tab.select(kx.Column('b').null(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ----
+ 000b
+ 110b
+ 011b
+ 000b
+ ..
+ '))
+ ```
+ """
+ return self.call('null', iterator=iterator)
+
+ def iasc(self, iterator=None):
+ """
+ Return the indexes needed to sort the values in a column/row in
+ ascending order
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Find the indices needed to sort values in a column in ascending order
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, kx.q.til(10)),
+ ... 'b': kx.random.random([100, 3], kx.q.til(10))
+ ... })
+ >>> tab.select(kx.Column('a').iasc())
+ pykx.Table(pykx.q('
+ a
+ --
+ 19
+ 25
+ 30
+ 40
+ 50
+ ..
+ '))
+ ```
+
+ Find the indices needed to sort each row in a column in ascending order
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, kx.q.til(10)),
+ ... 'b': kx.random.random([100, 3], kx.q.til(10))
+ ... })
+ >>> tab.select(kx.Column('b').iasc(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ----
+ 2 0 1
+ 1 0 2
+ 0 1 2
+ 0 1 2
+ ..
+ '))
+ ```
+ """
+ return self.call('iasc', iterator=iterator)
+
+ def idesc(self, iterator=None):
+ """
+ Return the indexes needed to sort the values in a column/row in
+ descending order
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Find the indices needed to sort values in a column in descending order
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, kx.q.til(10)),
+ ... 'b': kx.random.random([100, 3], kx.q.til(10))
+ ... })
+ >>> tab.select(kx.Column('a').idesc())
+ pykx.Table(pykx.q('
+ a
+ --
+ 39
+ 43
+ 45
+ 56
+ 60
+ ..
+ '))
+ ```
+
+ Find the indices needed to sort each row in a column in descending order
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, kx.q.til(10)),
+ ... 'b': kx.random.random([100, 3], kx.q.til(10))
+ ... })
+ >>> tab.select(kx.Column('b').idesc(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ----
+ 1 0 2
+ 2 0 1
+ 1 2 0
+ 2 1 0
+ ..
+ '))
+ ```
+ """
+ return self.call('idesc', iterator=iterator)
+
+ def inter(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ """
+ Return the intersection between a column and:
+
+ - Another column
+ - A Python list/numpy array
+ - A PyKX variable in q memory
+
+ Parameters:
+ other: The second column or variable (Python/q) to be used
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Return the distinct intersection of values between two columns:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 5)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10))
+ ... })
+ >>> tab.exec(kx.Column('a').inter(kx.Column('b')).distinct())
+ pykx.LongVector(pykx.q('2 3 1 4 0'))
+ ```
+
+ Return the distinct intersection of values between a column and variable in q memory:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 5)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10))
+ ... })
+ >>> kx.q('custom_var:100?6')
+ >>> tab.exec(kx.Column('b').inter(kx.Variable('custom_var')).distinct())
+ pykx.LongVector(pykx.q('5 2 1 4 3 0'))
+ ```
+ """
+ return self.call('inter', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def isin(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ """
+ Return a list of booleans indicating if the items in a column are in a specified:
+
+ - Column
+ - Python list/numpy array
+ - A PyKX variable in q memory
+
+ Most commonly this function is used in where clauses to filter data
+
+ Parameters:
+ other: The second column or variable (Python/q) to be used
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Query a table for anywhere where the column contains the element 'AAPL':
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, ['AAPL', 'GOOG', 'MSFT']),
+ ... 'b': kx.random.random(100, 10)
+ ... })
+ >>> tab.select(where=kx.Column('a').isin(['AAPL']))
+ pykx.Table(pykx.q('
+ a b
+ ------
+ AAPL 7
+ AAPL 4
+ AAPL 2
+ ..
+ '))
+ ```
+
+ Return the distinct intersection of values between a column and variable in q memory:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, ['AAPL', 'GOOG', 'MSFT']),
+ ... 'b': kx.random.random(100, 10)
+ ... })
+ >>> kx.q('custom_var:1 2 3')
+ >>> tab.select(where=kx.Column('b').isin(kx.Variable('custom_var')))
+ pykx.Table(pykx.q('
+ a b
+ ------
+ GOOG 2
+ MSFT 1
+ MSFT 2
+ GOOG 3
+ ..
+ '))
+ ```
+ """
+ return self.call('in', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def last(self, iterator=None):
+ """
+ Retrieve the last item of a column or last item of each row
+ in a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Retrieve the last element of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.exec(kx.Column('a').last())
+ pykx.LongAtom(pykx.q('0'))
+ ```
+
+ Retrieve the last element of each row of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [3, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('b').last(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ -
+ 1
+ 2
+ 3
+ '))
+ ```
+ """
+ return self.call('last', iterator=iterator)
+
+ def like(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ """
+ Return a list of booleans indicating whether an item in a column matches a
+ supplied regex pattern. Most commonly this function is used in where
+ clauses to filter data.
+
+ Parameters:
+ other: A string/byte array defining a regex pattern to be used for query
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Query a table for anywhere where the column contains the element 'AAPL':
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, ['TEST', 'tEsTing', 'string']),
+ ... 'b': kx.random.random(100, 10)
+ ... })
+ >>> tab.select(where=kx.Column('a').like('[tT]E*'))
+ pykx.Table(pykx.q('
+ a b
+ ------
+ TEST 7
+ TEST 8
+ tEsTing 4
+ tEsTing 9
+ ..
+ '))
+ ```
+ """
+ if isinstance(other, str):
+ other = other.encode('utf-8')
+ return self.call('like', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def log(self, iterator=None):
+ """
+ Calculate the natural logarithm of values in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the natural log of values within a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.exec(kx.Column('a').log())
+ pykx.FloatVector(pykx.q('0 1.386294 1.386294 1.098612..'))
+ ```
+
+ Calculate the natural log of values within each row of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.select(kx.Column('b').log(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ----------------------------
+ 1.386294 -0w 0
+ -0w -0w 1.098612
+ 1.098612 0 0
+ 1.386294 1.386294 1.098612
+ ..
+ '))
+ ```
+ """
+ return self.call('log', iterator=iterator)
+
+ def lower(self, iterator=None):
+ """
+ Change the case of string/symbol objects within a column to be all
+ lower case
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Lower all values within a symbol list
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': ['TeStiNG', 'lOwER', 'FuncTion'],
+ ... 'b': [1, 2, 3]
+ ... })
+ >>> tab.select(kx.Column('a').lower())
+ pykx.Table(pykx.q('
+ a
+ --------
+ testing
+ lower
+ function
+ '))
+ ```
+ """
+ return self.call('lower', iterator=iterator)
+
+ def ltrim(self, iterator=None):
+ """
+ Remove whitespace at the start of character vectors(strings) within items
+ in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Remove leading whitespace from all values in a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [b' test ', b' values ', b'trim '],
+ ... 'b': [1, 2, 3]
+ ... })
+ >>> tab.select(kx.Column('a').ltrim())
+ pykx.Table(pykx.q('
+ a
+ ----------
+ "test "
+ "values "
+ "trim "
+ '))
+ ```
+ """
+ return self.call('ltrim', iterator=iterator)
+
+ def mavg(self, other, iterator=None, col_arg_ind=1, project_args=None):
+ """
+ Calculate the simple moving average of items in a column for a specified
+ window length. Any nulls after the first item are replaced by zero.
+ The results are returned as a floating point.
+
+ Parameters:
+ other: An integer denoting the window to be used for calculation of
+ the moving average
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 1.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Query a table for anywhere where the column contains the element 'AAPL':
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, ['TEST', 'tEsTing', 'string']),
+ ... 'b': kx.random.random(100, 10)
+ ... })
+ >>> tab.select(kx.Column('b').mavg(3))
+ pykx.Table(pykx.q('
+ b
+ --------
+ 7
+ 7.5
+ 6.333333
+ 5.333333
+ ..
+ '))
+ ```
+ """
+ return self.call('mavg', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def max(self, iterator=None):
+ """
+ Find the maximum value in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Find the maximum values within a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.exec(kx.Column('a').max())
+ pykx.LongAtom(pykx.q('4'))
+ ```
+
+ Find the maximum values within each row of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.select(kx.Column('b').max(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ -
+ 2
+ 3
+ 4
+ 4
+ ..
+ '))
+ ```
+ """
+ return self.call('max', iterator=iterator)
+
+ def maxs(self, iterator=None):
+ """
+ Find the running maximum value in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Find the running maximum values within a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.select(kx.Column('a').maxs())
+ pykx.Table(pykx.q('
+ a
+ -
+ 0
+ 1
+ 2
+ 3
+ 3
+ ..
+ '))
+ ```
+
+ Find the running maximum values within each row of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.select(kx.Column('b').maxs(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ -----
+ 2 2 2
+ 3 3 3
+ 4 4 4
+ 0 3 4
+ ..
+ '))
+ ```
+ """
+ return self.call('maxs', iterator=iterator)
+
+ def mcount(self, other, iterator=None, col_arg_ind=1, project_args=None):
+ """
+ Calculate the moving count of non-null items in a column for a specified
+ window length. The first 'other' items of the result are the counts
+ so far, thereafter the result is the moving average
+
+ Parameters:
+ other: An integer denoting the window to be used for calculation of
+ the moving count
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 1.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Calculate the moving count of non-null values within a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, [1, kx.LongAtom.null, 2]),
+ ... 'b': kx.random.random(100, 10)
+ ... })
+ >>> tab.select(kx.Column('a').mcount(3))
+ pykx.Table(pykx.q('
+ a
+ -
+ 0
+ 1
+ 1
+ 2
+ 1
+ ..
+ '))
+ ```
+ """
+ return self.call('mcount', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def md5(self, iterator=None):
+ """
+ Apply MD5 hash algorithm on columns/rows within a column, it is
+ suggested that this function should be used on rows rather than
+ columns if being applied
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Apply the MD5 hash algorithm on each row of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [b' test ', b' values ', b'trim ']
+ ... })
+ >>> tab.select(kx.Column('a').md5(iterator='each'))
+ pykx.Table(pykx.q('
+ a
+ ----------------------------------
+ 0x5609a772b21a22d88f3fb3d21f564eab
+ 0xbb24d929a28559cc0aa65cb326d7662e
+ 0xdeafa2fe0c90bcf8c722003bfdeb7c78
+ '))
+ ```
+ """
+ return self.call('md5', iterator=iterator)
+
+ def mdev(self, other, iterator=None, col_arg_ind=1, project_args=None):
+ """
+ Calculate the moving standard deviation for items in a column over a specified
+ window length. The first 'other' items of the result are the standard deviation
+ of items so far, thereafter the result is the moving standard deviation
+
+ Parameters:
+ other: An integer denoting the window to be used for calculation of
+ the moving standard deviation
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 1.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Calculate the moving standard deviation of values within a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, [1, kx.LongAtom.null, 2]),
+ ... 'b': kx.random.random(100, 10)
+ ... })
+ >>> tab.exec(kx.Column('b').mdev(3))
+ pykx.FloatVector(pykx.q('0 1.5 1.699673 1.247219..'))
+ ```
+ """
+ return self.call('mdev', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def med(self, iterator=None):
+ """
+ Find the median value in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Find the median value of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.exec(kx.Column('a').med())
+ pykx.FloatAtom(pykx.q('2f'))
+ ```
+
+ Find the median value for each row of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.select(kx.Column('b').med(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ -
+ 3
+ 2
+ 3
+ 3
+ ..
+ '))
+ ```
+ """
+ return self.call('med', iterator=iterator)
+
+ def min(self, iterator=None):
+ """
+ Find the minimum value in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Find the minimum values within a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.exec(kx.Column('a').max())
+ pykx.LongAtom(pykx.q('0'))
+ ```
+
+ Find the minimum values within each row of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.select(kx.Column('b').min(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ -
+ 0
+ 0
+ 2
+ 0
+ ..
+ '))
+ ```
+ """
+ return self.call('min', iterator=iterator)
+
+ def mins(self, iterator=None):
+ """
+ Find the running minimum value in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Find the running minimum values within a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.select(kx.Column('a').mins())
+ pykx.Table(pykx.q('
+ a
+ -
+ 0
+ 0
+ 0
+ 0
+ 0
+ ..
+ '))
+ ```
+
+ Find the running minimum values within each row of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.select(kx.Column('b').mins(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ -----
+ 0 0 0
+ 0 0 0
+ 2 2 2
+ 2 2 0
+ ..
+ '))
+ ```
+ """
+ return self.call('mins', iterator=iterator)
+
+ def mmax(self, other, iterator=None, col_arg_ind=1, project_args=None):
+ """
+ Calculate the moving maximum for items in a column over a specified
+ window length. The first 'other' items of the result are the maximum
+ of items so far, thereafter the result is the moving maximum
+
+ Parameters:
+ other: An integer denoting the window to be used for calculation of
+ the moving maximum
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 1.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Calculate the moving maximum of values within a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, [1, kx.LongAtom.null, 2]),
+ ... 'b': kx.random.random(100, 10)
+ ... })
+ >>> tab.exec(kx.Column('b').mmax(3))
+ pykx.LongVector(pykx.q('4 4 4 3 7..'))
+ ```
+ """
+ return self.call('mmax', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def mmin(self, other, iterator=None, col_arg_ind=1, project_args=None):
+ """
+ Calculate the moving minumum for items in a column over a specified
+ window length. The first 'other' items of the result are the minimum
+ of items so far, thereafter the result is the moving minimum
+
+ Parameters:
+ other: An integer denoting the window to be used for calculation of
+ the moving minimum
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 1.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Calculate the moving minimum of values within a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, [1, kx.LongAtom.null, 2]),
+ ... 'b': kx.random.random(100, 10)
+ ... })
+ >>> tab.exec(kx.Column('b').mmin(3))
+ pykx.LongVector(pykx.q('4 1 0 0 0 2..'))
+ ```
+ """
+ return self.call('mmin', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def mod(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ """
+ Calculate the modulus of items in a column for a given value.
+
+ Parameters:
+ other: An integer denoting the divisor to be used when calculating the modulus
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 1.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Calculate the modulus for items within a column for a value 3:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, [1, kx.LongAtom.null, 2]),
+ ... 'b': kx.random.random(100, 10)
+ ... })
+ >>> tab.exec(kx.Column('b').mod(3))
+ pykx.LongVector(pykx.q('1 2 1 1 0..'))
+ ```
+ """
+ return self.call('mod', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def msum(self, other, iterator=None, col_arg_ind=1, project_args=None):
+ """
+ Calculate the moving sum of items in a column over a specified
+ window length. The first 'other' items of the result are the sum
+ of items so far, thereafter the result is the moving sum
+
+ Parameters:
+ other: An integer denoting the window to be used for calculation of
+ the moving sum
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 1.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Calculate the moving sum of values within a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, [1, kx.LongAtom.null, 2]),
+ ... 'b': kx.random.random(100, 10)
+ ... })
+ >>> tab.exec(kx.Column('b').msum(3))
+ pykx.LongVector(pykx.q('4 5 5 4 10 12..'))
+ ```
+ """
+ return self.call('msum', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def neg(self, iterator=None):
+ """
+ Compute the negative value for all items in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Compute the negative value for all items in a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.select(kx.Column('a').neg())
+ pykx.Table(pykx.q('
+ a
+ --
+ 0
+ -3
+ -4
+ -2
+ 0
+ ..
+ '))
+ ```
+ """
+ return self.call('neg', iterator=iterator)
+
+ def prd(self, iterator=None):
+ """
+ Calculate the product of all values in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Find the product of values within a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5.0),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.exec(kx.Column('a').prd())
+ pykx.FloatAtom(pykx.q('9.076436e+25'))
+ ```
+
+ Find the product of values within each row of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.select(kx.Column('b').prd(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ -
+ 0
+ 0
+ 32
+ 0
+ 0
+ 48
+ ..
+ '))
+ ```
+ """
+ return self.call('prd', iterator=iterator)
+
+ def prds(self, iterator=None):
+ """
+ Find the running product of values in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Find the running product of values within a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5.0),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.select(kx.Column('a').prds())
+ pykx.Table(pykx.q('
+ a
+ ---------
+ 0.8276359
+ 3.833871
+ 2.317464
+ 3.940125
+ ..
+ '))
+ ```
+
+ Find the running product of values within each row of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.select(kx.Column('b').prds(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ -------
+ 0 0 0
+ 0 0 0
+ 2 8 32
+ 2 6 0
+ 0 0 0
+ 3 12 48
+ ..
+ '))
+ ```
+ """
+ return self.call('prds', iterator=iterator)
+
+ def prev(self, iterator=None):
+ """
+ Retrieve the immediately preceding item in a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Shift the values in column 'a' within a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5.0),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.update(kx.Column('a').prev())
+ pykx.Table(pykx.q('
+ a b
+ ---------------
+ 0 4 4
+ 0.8276359 0 2 3
+ 4.632315 2 4 4
+ 0.6044712 2 3 0
+ '))
+ ```
+ """
+ return self.call('prev', iterator=iterator)
+
+ def rank(self, iterator=None):
+ """
+ Retrieve the positions items would take in a sorted list from a column
+ or items in a column, this is equivalent of calling `iasc` twice`
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the rank of items in a list
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 1000),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.update(kx.Column('a').prev())
+ pykx.Table(pykx.q('
+ a b
+ --------
+ 89 3 4 4
+ 31 1 2 1
+ 57 4 4 0
+ 25 4 4 2
+ ..
+ '))
+ ```
+ """
+ return self.call('rank', iterator=iterator)
+
+ def ratios(self, iterator=None):
+ """
+ Calculate the ratio between consecutive elements in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the difference between consecutive values in a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 1000),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.exec(kx.Column('a').ratios())
+ pykx.FloatVector(pykx.q('908 0.3964758 1.45..'))
+ ```
+ """
+ return self.call('ratios', iterator=iterator)
+
+ def reciprocal(self, iterator=None):
+ """
+ Calculate the reciprocal of all elements in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the reciprocal of items in a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 1000),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.exec(kx.Column('a').reciprocal())
+ pykx.FloatVector(pykx.q('0.001101322 0.002777778 0.001915709..'))
+ ```
+ """
+ return self.call('reciprocal', iterator=iterator)
+
+ def reverse(self, iterator=None):
+ """
+ Reverse the elements of a column or contents of rows of the column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the reverse the items in a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.til(100),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.exec(kx.Column('a').reverse())
+ pykx.LongVector(pykx.q('99 98 97..'))
+ ```
+ """
+ return self.call('reverse')
+
+ def rotate(self, other, iterator=None, col_arg_ind=1, project_args=None):
+ """
+ Shift the items in a column "left" or "right" by an integer amount denoted
+ by the parameter other.
+
+ Parameters:
+ other: An integer denoting the number of elements left(positve) or right(negative)
+ which the column list will be shifted
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 1.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Shift the items in column b by 2 left:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, ['TEST', 'tEsTing', 'string']),
+ ... 'b': kx.random.random(100, 10)
+ ... })
+ >>> tab.select(kx.Column('b') & kx.Column('b').rotate(2).name('rot_b'))
+ pykx.Table(pykx.q('
+ b rot_b
+ -------
+ 7 4
+ 8 4
+ 4 6
+ 4 9
+ 6 9
+ 9 2
+ ..
+ '))
+ ```
+
+ Round a column of times to 15 minute buckets
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.q('100?0t')),
+ ... 'b': kx.random.random(100, 10)
+ ... })
+ >>> tab.select(kx.Column('b').avg(), by=kx.Column('a').minute.xbar(15))
+ pykx.KeyedTable(pykx.q('
+ a | b
+ -----| --------
+ 00:00| 5.666667
+ 00:15| 3
+ 00:45| 1
+ 01:00| 4.5
+ '))
+ ```
+ """
+ return self.call('rotate', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def rtrim(self, iterator=None):
+ """
+ Remove whitespace from the end of character vectors(strings) within items
+ in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Remove trailing whitespace from all values in a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [b' test ', b' values ', b'trim '],
+ ... 'b': [1, 2, 3]
+ ... })
+ >>> tab.select(kx.Column('a').ltrim())
+ pykx.Table(pykx.q('
+ a
+ ---------
+ " test"
+ " values"
+ "trim"
+ '))
+ ```
+ """
+ return self.call('rtrim', iterator=iterator)
+
+ def scov(self, iterator=None):
+ """
+ Calculate the sample covariance for items in a column or rows in a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the sample covariance of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 10.0),
+ ... 'b': kx.random.random([100, 3], 10.0)
+ ... })
+ >>> tab.exec(kx.Column('a').scov())
+ pykx.FloatAtom(pykx.q('8.983196'))
+ ```
+
+ Calculate the sample covariance for each row in a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 10.0),
+ ... 'b': kx.random.random([100, 3], 10.0)
+ ... })
+ >>> tab.select(kx.Column('b').scov(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ---------
+ 0.3333333
+ 0.3333333
+ 5.333333
+ 1.333333
+ ..
+ '))
+ ```
+ """
+ return self.call('scov', iterator=iterator)
+
+ def sdev(self, iterator=None):
+ """
+ Calculate the sample deviation for items in a column or rows in a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the sample deviation of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 10.0),
+ ... 'b': kx.random.random([100, 3], 10.0)
+ ... })
+ >>> tab.exec(kx.Column('a').sdev())
+ pykx.FloatAtom(pykx.q('8.983196'))
+ ```
+
+ Calculate the sample deviation for each row in a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 10.0),
+ ... 'b': kx.random.random([100, 3], 10.0)
+ ... })
+ >>> tab.select(kx.Column('b').sdev(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ---------
+ 0.3333333
+ 0.3333333
+ 5.333333
+ 1.333333
+ ..
+ '))
+ ```
+ """
+ return self.call('sdev', iterator=iterator)
+
+ def signum(self, iterator=None):
+ """
+ Determine if the elements in a column or items in the row of a column is
+
+ - null or negative, returns -1i
+ - zero, returns 0i
+ - positive, returns 1i
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Determine if values are positive, null, zero or positive in a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.exec(kx.Column('a').signum())
+ pykx.IntVector(pykx.q('1 -1 0i'))
+ ```
+
+ Find if values are positive, null, zero or positive in each row of a specified column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('b').signum(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ------
+ -1 1 1
+ 0 1 1
+ 1 1 1
+ '))
+ ```
+ """
+ return self.call('signum', iterator=iterator)
+
+ def sin(self, iterator=None):
+ """
+ Calculate sine for a column or items in a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the sine value for all elements in a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('a').sin())
+ pykx.Table(pykx.q('
+ a
+ ---------
+ 0.841471
+ -0.841471
+ 0
+ '))
+ ```
+
+ Calculate the sine value for each row of a specified column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('b').sin(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ -----------------------------
+ -0.841471 0.9092974 0.841471
+ 0 0.841471 0.9092974
+ 0.841471 0.9092974 0.14112
+ '))
+ ```
+ """
+ return self.call('sin', iterator=iterator)
+
+ def sqrt(self, iterator=None):
+ """
+ Calculate the square root each element of a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Find the square root of each value within a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5.0),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.exec(kx.Column('a').sqrt())
+ pykx.FloatVector(pykx.q('1.152283 1.717071 1.253352..'))
+ ```
+
+ Find the square root of each value within each row of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5.0),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.select(kx.Column('b').sqrt(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ --------------------------
+ 1.732051 1.414214 1.732051
+ 2 1.732051 2
+ 1.732051 1.732051 1.414214
+ 0 0 1.414214
+ 1.732051 1.414214 1.414214
+ ..
+ '))
+ ```
+ """
+ return self.call('sqrt', iterator=iterator)
+
+ def string(self, iterator=None):
+ """
+ Convert all elements of a column to a PyKX string (CharVector)
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Convert all elements of a column to strings
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1234, 1.01, 12142],
+ ... 'b': [1, 2, 3]
+ ... })
+ >>> tab.select(kx.Column('a').string())
+ pykx.Table(pykx.q('
+ a
+ -------
+ "1234"
+ "1.01"
+ "12142"
+ '))
+ ```
+ """
+ return self.call('string', iterator=iterator)
+
+ def sum(self, iterator=None):
+ """
+ Calculate the sum of all values in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Find the sum of values within a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5.0),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.exec(kx.Column('a').sum())
+ pykx.FloatAtom(pykx.q('249.3847'))
+ ```
+
+ Find the sum of values within each row of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5.0),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.select(kx.Column('b').sum(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ -
+ 6
+ 4
+ 6
+ 6
+ ..
+ '))
+ ```
+ """
+ return self.call('sum', iterator=iterator)
+
+ def sums(self, iterator=None):
+ """
+ Find the running sum of values in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Find the running sum of values within a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5.0),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.select(kx.Column('a').sums())
+ pykx.Table(pykx.q('
+ a
+ ---------
+ 4.396227
+ 8.42457
+ 8.87813
+ 11.26718
+ ..
+ '))
+ ```
+
+ Find the running sum of values within each row of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.select(kx.Column('b').sums(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ -------
+ 0 3 6
+ 3 4 4
+ 1 3 6
+ 3 3 6
+ ..
+ '))
+ ```
+ """
+ return self.call('sums', iterator=iterator)
+
+ def svar(self, iterator=None):
+ """
+ Calculate the sample variance for items in a column or rows in a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the sample variance of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 10.0),
+ ... 'b': kx.random.random([100, 3], 10.0)
+ ... })
+ >>> tab.exec(kx.Column('a').svar())
+ pykx.FloatAtom(pykx.q('8.394893'))
+ ```
+
+ Calculate the sample variance for each row in a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 10.0),
+ ... 'b': kx.random.random([100, 3], 10.0)
+ ... })
+ >>> tab.select(kx.Column('b').svar(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ---------
+ 6.023586
+ 29.48778
+ 6.318229
+ 0.1609426
+ 5.241295
+ ..
+ '))
+ ```
+ """
+ return self.call('svar', iterator=iterator)
+
+ def tan(self, iterator=None):
+ """
+ Calculate tan for a column or items in a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the tan value for all elements in a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('a').tan())
+ pykx.Table(pykx.q('
+ a
+ ---------
+ 1.557408
+ -1.557408
+ 0
+ '))
+ ```
+
+ Calculate the tan value for each row of a specified column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('b').tan(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ -----------------------------
+ -1.557408 -2.18504 1.557408
+ 0 1.557408 -2.18504
+ 1.557408 -2.18504 -0.1425465
+ '))
+ ```
+ """
+ return self.call('tan', iterator=iterator)
+
+ def trim(self, iterator=None):
+ """
+ Remove whitespace from the start and end of character vectors(strings)
+ within items in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Remove trailing and following whitespace from all values in a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [b' test ', b' values ', b'trim '],
+ ... 'b': [1, 2, 3]
+ ... })
+ >>> tab.select(kx.Column('a').trim())
+ pykx.Table(pykx.q('
+ a
+ ---------
+ "test"
+ "values"
+ "trim"
+ '))
+ ```
+ """
+ return self.call('trim', iterator=iterator)
+
+ def union(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ """
+ Return the union between a column and:
+
+ - Another column
+ - A Python list/numpy array
+ - A PyKX variable in q memory
+
+ Parameters:
+ other: The second column or variable (Python/q) to be used
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Return the distinct union of values between two columns:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 5)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10))
+ ... })
+ >>> tab.exec(kx.Column('a').union(kx.Column('b')).distinct())
+ pykx.LongVector(pykx.q('0 1 2 3 4 9 8 7 6 5'))
+ ```
+
+ Return the distinct union of values between a column and variable in q memory:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 5)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10))
+ ... })
+ >>> kx.q('custom_var:100?6')
+ >>> tab.exec(kx.Column('b').union(kx.Variable('custom_var')).distinct())
+ pykx.LongVector(pykx.q('9 8 7 6 5 4 3 2 1 0'))
+ ```
+ """
+ return self.call('union', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def upper(self, iterator=None):
+ """
+ Change the case of string/symbol objects within a column to be all
+ upper case
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Convert all values within a symbol list to be upper case
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': ['TeStiNG', 'UpPer', 'FuncTion'],
+ ... 'b': [1, 2, 3]
+ ... })
+ >>> tab.select(kx.Column('a').upper())
+ pykx.Table(pykx.q('
+ a
+ --------
+ TESTING
+ UPPER
+ FUNCTION
+ '))
+ ```
+ """
+ return self.call('upper', iterator=iterator)
+
+ def var(self, iterator=None, sample=False):
+ """
+ Calculate the variance or sample variance for items in a
+ column or rows in a column
+
+ Parameters:
+ sample: Should calculation of variance return the
+ sample variance (set True) or the variance (set False {default})
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the variance of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 10.0),
+ ... 'b': kx.random.random([100, 3], 10.0)
+ ... })
+ >>> tab.exec(kx.Column('a').var())
+ pykx.FloatAtom(pykx.q('8.310944'))
+ ```
+
+ Calculate the sample sample deviation for each row in a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 10.0),
+ ... 'b': kx.random.random([100, 3], 10.0)
+ ... })
+ >>> tab.select(kx.Column('b').var(sample=True, iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ---------
+ 6.023586
+ 29.48778
+ 6.318229
+ 0.1609426
+ 5.241295
+ ..
+ '))
+ ```
+ """
+ fn = 'svar' if sample else 'var'
+ return self.call(fn, iterator=iterator)
+
+ def wavg(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ """
+ Return the weighted average between a column and:
+
+ - Another column
+ - A Python list/numpy array
+ - A PyKX variable in q memory
+
+ Parameters:
+ other: The second column or variable (Python/q) to be used
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Return the weighted average between two columns:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 5)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10))
+ ... })
+ >>> tab.exec(kx.Column('a').wavg(kx.Column('b')))
+ pykx.FloatAtom(pykx.q('2.456731'))
+ ```
+
+ Return the weighted average between a column and variable in q memory:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 5)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10))
+ ... })
+ >>> kx.q('custom_var:100?6')
+ >>> tab.exec(kx.Column('b').wavg(kx.Variable('custom_var')))
+ pykx.FloatAtom(pykx.q('2.431111'))
+ ```
+ """
+ return self.call('wavg', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def within(self, lower, upper, iterator=None, col_arg_ind=0, project_args=None):
+ """
+ Return a boolean list indicating whether the items of a column are within bounds
+ of an lower and upper limite.
+
+ Parameters:
+ lower: A sortable item defining the lower limit
+ upper: A sortable item defining the upper limit
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Return any rows where column a has a value within the range 1, 4:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.q.desc(kx.random.random(100, 10))
+ ... })
+ >>> tab.select(where = kx.Column('a').within(1, 4))
+ pykx.Table(pykx.q('
+ a b
+ ---
+ 1 9
+ 1 9
+ 2 9
+ 2 9
+ 4 9
+ ..
+ '))
+ ```
+
+ Return any rows where column a has a value within a date range:
+
+ ```python
+ >>> import pykx as kx
+ >>> today = kx.DateAtom('today')
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, today - range(0, 10)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10))
+ ... })
+ >>> tab.select(where=kx.Column('a').within(today - 5, today - 3))
+ pykx.FloatAtom(pykx.q('2.431111'))
+ ```
+ """
+ return self.call('within', [lower, upper], iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def wsum(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ """
+ Return the weighted sum between a column and:
+
+ - Another column
+ - A Python list/numpy array
+ - A PyKX variable in q memory
+
+ Parameters:
+ other: The second column or variable (Python/q) to be used
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Return the weighted sum between two columns:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 5)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10))
+ ... })
+ >>> tab.exec(kx.Column('a').wsum(kx.Column('b')))
+ pykx.FloatAtom(pykx.q('511f'))
+ ```
+
+ Return the weighted sum between a column and variable in q memory:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 5)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10))
+ ... })
+ >>> kx.q('custom_var:100?6')
+ >>> tab.exec(kx.Column('b').wsum(kx.Variable('custom_var')))
+ pykx.FloatAtom(pykx.q('1094f'))
+ ```
+ """
+ return self.call('wsum', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def xbar(self, other, iterator=None, col_arg_ind=1, project_args=None):
+ """
+ Round the elements of a column down to the nearest multiple of the supplied
+ parameter other.
+
+ Parameters:
+ other: An integer denoting the multiple to which all values will be rounded
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 1.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Round the items of a column to multiples of 3:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, ['TEST', 'tEsTing', 'string']),
+ ... 'b': kx.random.random(100, 10)
+ ... })
+ >>> tab.select(kx.Column('b').xbar(3))
+ pykx.Table(pykx.q('
+ b
+ -
+ 3
+ 6
+ 9
+ 6
+ ..
+ '))
+ ```
+
+ Round a column of times to 15 minute buckets
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.q('100?0t')),
+ ... 'b': kx.random.random(100, 10)
+ ... })
+ >>> tab.select(kx.Column('b').avg(), by=kx.Column('a').minute.xbar(15))
+ pykx.KeyedTable(pykx.q('
+ a | b
+ -----| --------
+ 00:00| 5.666667
+ 00:15| 3
+ 00:45| 1
+ 01:00| 4.5
+ '))
+ ```
+ """
+ return self.call('xbar', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def xexp(self, other, iterator=None, col_arg_ind=1, project_args=None):
+ """
+ Raise the elements of a column down to power of the value supplied as
+ the parameter other.
+
+ Parameters:
+ other: An integer denoting the power to which all values will be raised
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 1.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Round the items of a column to multiples of 3:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 10.0),
+ ... 'b': kx.random.random(100, 10)
+ ... })
+ >>> tab.select(kx.Column('b').xexp(2))
+ pykx.Table(pykx.q('
+ b
+ ---
+ 64
+ 512
+ 4
+ 8
+ 2
+ ..
+ '))
+ ```
+ """
+ return self.call('xexp', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def xlog(self, other, iterator=None, col_arg_ind=1, project_args=None):
+ """
+ Return the base-N logarithm for the elements of a column where N is specified
+ by the parameter other.
+
+ Parameters:
+ other: An integer denoting the logarithmic base to which all values will be set
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 1.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Round the items of a column to multiples of 3:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 10.0),
+ ... 'b': kx.random.random(100, 10)
+ ... })
+ >>> tab.select(kx.Column('b').xlog(2))
+ pykx.Table(pykx.q('
+ b
+ --------
+ 1.584963
+ 3.169925
+ 2.321928
+ 3.169925
+ ..
+ '))
+ ```
+ """
+ return self.call('xlog', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def xprev(self, other, iterator=None, col_arg_ind=1, project_args=None):
+ """
+ For a specified column return for each item in the column the item N elements
+ before it. Where N is specified by the parameter other.
+
+ Parameters:
+ other: An integer denoting the number of indices before elements in the list
+ to retrieve the value of
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 1.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Shift the data in a column by 3 indexes:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 10.0),
+ ... 'b': kx.random.random(100, 10)
+ ... })
+ >>> tab.select(kx.Column('a') & kx.Column('a').xprev(3).name('lag_3_a'))
+ pykx.Table(pykx.q('
+ a lag_3_a
+ -------------------
+ 3.927524
+ 5.170911
+ 5.159796
+ 4.066642 3.927524
+ 1.780839 5.170911
+ 3.017723 5.159796
+ '))
+ ```
+ """
+ return self.call('xprev', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ @property
+ def hour(self):
+ """
+ Retrieve the hour information from a temporal column
+
+
+ Examples:
+
+ Retrieve hour information from a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, kx.TimestampAtom.inf),
+ ... 'b': kx.random.random([100, 3], 10.0)
+ ... })
+ >>> tab.exec(kx.Column('a').hour)
+ pykx.IntVector(pykx.q('11 1 13 12..'))
+ ```
+ """
+ return self.call('`hh$')
+
+ @property
+ def minute(self):
+ """
+ Retrieve the minute information from a temporal column
+
+
+ Examples:
+
+ Retrieve minute information from a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, kx.TimestampAtom.inf),
+ ... 'b': kx.random.random([100, 3], 10.0)
+ ... })
+ >>> tab.exec(kx.Column('a').minute)
+ pykx.MinuteVector(pykx.q('11:55 01:09 13:43..'))
+ ```
+ """
+ return self.call('`minute$')
+
+ @property
+ def date(self):
+ """
+ Retrieve the date information from a temporal column
+
+
+ Examples:
+
+ Retrieve date information from a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, kx.TimestampAtom.inf),
+ ... 'b': kx.random.random([100, 3], 10.0)
+ ... })
+ >>> tab.exec(kx.Column('a').date)
+ pykx.DateVector(pykx.q('2122.07.05 2120.10.23..'))
+ ```
+ """
+ return self.call('`date$')
+
+ @property
+ def year(self):
+ """
+ Retrieve the year information from a temporal column
+
+
+ Examples:
+
+ Retrieve year information from a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, kx.TimestampAtom.inf),
+ ... 'b': kx.random.random([100, 3], 10.0)
+ ... })
+ >>> tab.exec(kx.Column('a').year)
+ pykx.IntVector(pykx.q('2122 2120 2185..'))
+ ```
+ """
+ return self.call('`year$')
+
+ @property
+ def month(self):
+ """
+ Retrieve the month information from a temporal column
+
+
+ Examples:
+
+ Retrieve year information from a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, kx.TimestampAtom.inf),
+ ... 'b': kx.random.random([100, 3], 10.0)
+ ... })
+ >>> tab.exec(kx.Column('a').month)
+ pykx.IntVector(pykx.q('7 10 12..'))
+ ```
+ """
+ return self.call('`mm$')
+
+ @property
+ def second(self):
+ """
+ Retrieve the second information from a temporal column
+
+
+ Examples:
+
+ Retrieve year information from a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, kx.TimestampAtom.inf),
+ ... 'b': kx.random.random([100, 3], 10.0)
+ ... })
+ >>> tab.exec(kx.Column('a').second)
+ pykx.SecondVector(pykx.q('11:55:50 01:09:35..'))
+ ```
+ """
+ return self.call('`second$')
+
+ # Functions below this point are generalisations of q operators or #
+ # expanded function names to improve readability in Python first usage #
+
+ def add(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ """
+ Add the content of a column to one of:
+
+ - Another column
+ - A vector of equal length to the column
+ - A PyKX variable in q memory
+
+ Note in it's most basic usage this is equivalent to
+
+ ```python
+ >>> kx.Column('x') + kx.Column('y')
+ ```
+
+ It is supplied as a named function to allow the use of iterators
+ when adding elements.
+
+ Parameters:
+ other: The second column or variable (Python/q) to be used
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Add together two columns:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> tab.select(kx.Column('a').add(kx.Column('b')))
+ pykx.Table(pykx.q('
+ a
+ --------
+ 9.967087
+ 9.870729
+ 9.882342
+ 9.95924
+ ..
+ '))
+ ```
+
+ Add a value of 3 to each element of a column.
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> tab.select(kx.Column('a').add(3))
+ pykx.Table(pykx.q('
+ a
+ --------
+ 3.021845
+ 3.044166
+ 3.062797
+ 3.051352
+ ..
+ '))
+ ```
+
+ For each row in a column add 3 and 4 to the value of the column
+ This makes use of each-left and each-right from q:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> tab.select(kx.Column('a').add([3, 4], iterator='/:\:'))
+ pykx.Table(pykx.q('
+ a
+ -----------------
+ 3.021845 4.021845
+ 3.044166 4.044166
+ 3.062797 4.062797
+ 3.166843 4.166843
+ ..
+ '))
+ ```
+ """
+ return self.call('+', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def name(self, name):
+ """
+ Rename the resulting column from a calculation
+
+ Parameters:
+ name: The name to be given to the column following application of function
+
+ Examples:
+
+ Rename the column 'a' to 'average_a' following application of the function average
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0.5],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('a').average().name('average_a'))
+ pykx.Table(pykx.q('
+ average_a
+ ---------
+ 0.1666667
+ '))
+ ```
+ """
+ self._name = name
+ return self
+
+ def average(self, iterator=None):
+ """
+ Calculate the average value for a column or items in a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the value for all elements in a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0.5],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('a').average())
+ pykx.Table(pykx.q('
+ a
+ ---------
+ 0.1666667
+ '))
+ ```
+
+ Calculate average value for each row of a specified column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('b').average(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ---------
+ 0.6666667
+ 1
+ 2
+ '))
+ ```
+ """
+ return self.call('avg', iterator=iterator)
+
+ def cast(self, other, iterator=None, col_arg_ind=1, project_args=None):
+ """
+ Convert the content of a column to another PyKX type
+
+ Parameters:
+ other: The name of the type to which your column should be cast
+ or the lower case letter used to define it in q, for more information
+ see https://code.kx.com/q/ref/cast/
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Example:
+
+ Cast a column containing PyKX long objects to float objects
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, kx.q.til(10)),
+ ... 'b': kx.random.random(100, kx.q.til(10))
+ ... })
+ >>> tab.dtypes
+ pykx.Table(pykx.q('
+ columns datatypes type
+ -----------------------------------
+ a "kx.LongAtom" "kx.LongAtom"
+ b "kx.LongAtom" "kx.LongAtom"
+ '))
+ >>> tab.select(
+ ... kx.Column('a') &
+ ... kx.Column('a').cast('float').name('a_float') &
+ ... kx.Column('b')).dtypes
+ pykx.Table(pykx.q('
+ columns datatypes type
+ -------------------------------------
+ a "kx.LongAtom" "kx.LongAtom"
+ a_float "kx.FloatAtom" "kx.FloatAtom"
+ b "kx.LongAtom" "kx.LongAtom"
+ '))
+ ```
+ """
+ if not isinstance(other, str):
+ raise QError('Supplied value other must be a str')
+ if 1 == len(other):
+ other = other.encode('UTF-8')
+ return self.call('$', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def correlation(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ """
+ Calculate the correlation between a column and one of:
+
+ - Another column
+ - A vector of equal length to the column
+ - A PyKX variable in q memory
+
+ Parameters:
+ other: The second column or variable (Python/q) to be used
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Calculate the correlation between two columns:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> tab.exec(kx.Column('a').cor(kx.Column('b')))
+ pykx.FloatAtom(pykx.q('-0.9946109'))
+ ```
+
+ Calculate the correlation between a column and variable in q memory:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> kx.q('custom_var:100?1f')
+ >>> tab.exec(kx.Column('a').correlation(kx.Variable('custom_var')))
+ pykx.FloatAtom(pykx.q('-0.1670133'))
+ ```
+
+ Calculate the correlation between a column and a Python variable:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> kx.q('custom_var:100?1f')
+ >>> tab.exec(kx.Column('a').correlation(kx.random.random(100, 10.0)))
+ pykx.FloatAtom(pykx.q('-0.01448725'))
+ ```
+ """
+ return self.call('cor', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def covariance(self, other, sample=False, iterator=None, col_arg_ind=0, project_args=None):
+ """
+ Calculate the covariance/sample covariance between a column and one of:
+
+ - Another column
+ - A vector of equal length to the column
+ - A PyKX variable in q memory
+
+ Parameters:
+ other: The second column or variable (Python/q) to be used
+ sample: Should calculations of covariance return the
+ sample covariance (set True) covariance (set False {default})
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Calculate the covariance between two columns:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> tab.exec(kx.Column('a').covariance(kx.Column('b')))
+ pykx.FloatAtom(pykx.q('-7.87451'))
+ ```
+
+ Calculate the sample covariance between a column and variable in q memory:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> kx.q('custom_var:100?1f')
+ >>> tab.exec(kx.Column('a').covariance(kx.Variable('custom_var'), sample=True))
+ pykx.FloatAtom(pykx.q('-0.1670133'))
+ ```
+
+ Calculate the covariance between a column and a Python object:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> tab.exec(kx.Column('a').covariance(kx.random.random(100, 10.0)))
+ pykx.FloatAtom(pykx.q('-0.1093116'))
+ ```
+ """
+ fn = 'scov' if sample else 'cov'
+ return self.call(fn, other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def divide(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ """
+ Divide the content of one column by:
+
+ - Another column
+ - Python/Numpy list/item
+ - A PyKX variable in q memory
+
+ Note in it's most basic usage this is equivalent to
+
+ ```python
+ >>> kx.Column('x') % kx.Column('y')
+ ```
+
+ It is supplied as a named function to allow the use of iterators
+ when adding elements.
+
+ Parameters:
+ other: The second column or variable (Python/q) to be used
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Divide on column by another column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> tab.select(kx.Column('a').divide(kx.Column('b')))
+ pykx.Table(pykx.q('
+ a
+ -----------
+ 0.0021965
+ 0.004494546
+ 0.006395103
+ 0.01703797
+ ..
+ '))
+ ```
+
+ Divide each element of a column by 3.
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> tab.select(kx.Column('a').divide(3))
+ pykx.Table(pykx.q('
+ a
+ -----------
+ 0.007281574
+ 0.01472198
+ 0.02093233
+ 0.05561419
+ ..
+ '))
+ ```
+
+ For each row in a column divide the row by both 3 and 4 independently.
+ This makes use of each-left and each-right from q:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> tab.select(kx.Column('a').divide([3, 4], iterator='/:\:'))
+ pykx.Table(pykx.q('
+ a
+ ---------------------
+ 0.06553417 0.08737889
+ 0.1324978 0.1766638
+ 0.188391 0.251188
+ 0.5005277 0.6673703
+ ..
+ '))
+ ```
+ """
+ return self.call('%', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def drop(self, other, iterator=None, col_arg_ind=1, project_args=None):
+ """
+ Drop N rows from a column or N elements from items in a column using
+ an iterator. Where N is specified by the other parameter.
+
+ Parameters:
+ other: An integer defining the number of elements to drop
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Drop 3 rows from a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, kx.q.til(10)),
+ ... 'b': kx.random.random([100, 3], kx.q.til(10))
+ ... })
+ >>> tab.select(kx.Column('a').drop(3).count())
+ pykx.Table(pykx.q('
+ a
+ --
+ 10
+ 12
+ 24
+ 27
+ ..
+ '))
+ ```
+ """
+ return self.call('_', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def fill(self, other, iterator=None, col_arg_ind=1, project_args=None):
+ """
+ Replace all null values in a column with a specified 'other' parameter
+
+ Parameters:
+ other: The value which should replace nulls within a column
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Replace all nulls in column a with a value 0, displaying that only 0, 1 and 2 exist
+ in this column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, [1, kx.LongAtom.null, 2]),
+ ... 'b': kx.random.random(100, 10)
+ ... })
+ >>> tab.exec(kx.Column('a').fill(0).distinct())
+ pykx.LongVector(pykx.q('1 0 2'))
+ ```
+ """
+ return self.call('^', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def index_sort(self, ascend=True, iterator=None):
+ """
+ Return the indexes needed to sort the values in a column/row in
+ ascending order or descending order
+
+ Parameters:
+ ascend: A boolean indicating if the index return should be
+ retrieved in ascending or descending order
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Find the indices needed to sort values in a column in ascending order
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, kx.q.til(10)),
+ ... 'b': kx.random.random([100, 3], kx.q.til(10))
+ ... })
+ >>> tab.select(kx.Column('a').index_sort())
+ pykx.Table(pykx.q('
+ a
+ --
+ 10
+ 12
+ 24
+ 27
+ ..
+ '))
+ ```
+
+ Find the indices needed to sort values in a column in descending order
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, kx.q.til(10)),
+ ... 'b': kx.random.random([100, 3], kx.q.til(10))
+ ... })
+ >>> tab.select(kx.Column('a').index_sort(ascend=False))
+ pykx.Table(pykx.q('
+ a
+ --
+ 1
+ 15
+ 44
+ 50
+ ..
+ '))
+ ```
+
+ """
+ fn = 'iasc' if ascend else 'idesc'
+ return self.call(fn, iterator=iterator)
+
+ def join(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ """
+ Join the content of one column with another or using an iterator
+ produce complex combinations of items in a column with:
+
+ - Another Column
+ - A list/item which is to be joined to the column
+ - A variable in q memory
+
+ Parameters:
+ other: The Column, list, item or variable to be joined to the
+ original column
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Join the content of one column to another column (extend the column)
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> len(tab.select(kx.Column('a').join(kx.Column('b'))))
+ 200
+ ```
+
+ Join the value 3 to each row in a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> tab.select(kx.Column('a').join(3, iterator="'"))
+ pykx.Table(pykx.q('
+ a
+ ---
+ 1 3
+ 2 3
+ 1 3
+ 3 3
+ ..
+ '))
+ ```
+ """
+ return self.call(',', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def len(self, iterator=None):
+ """
+ Calculate the length of the number of elements in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the length of the number of elements in a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.exec(kx.Column('a').len())
+ pykx.LongAtom(pykx.q('3'))
+ ```
+
+ Count the length of elements in each row of a specified column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 2], 1]
+ ... })
+ >>> tab.exec(kx.Column('b').len(iterator='each')))
+ pykx.LongVector(pykx.q('3 3 3'))
+ ```
+ """
+ return self.call('count', iterator=iterator)
+
+ def modulus(self, other, iterator=None, col_arg_ind=1, project_args=None):
+ """
+ Calculate the modulus of items in a column for a given value.
+
+ Parameters:
+ other: An integer denoting the divisor to be used when calculating the modulus
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 1.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Calculate the modulus for items within a column for a value 3:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, [1, kx.LongAtom.null, 2]),
+ ... 'b': kx.random.random(100, 10)
+ ... })
+ >>> tab.exec(kx.Column('b').mod(3))
+ pykx.LongVector(pykx.q('1 2 1 1 0..'))
+ ```
+ """
+ return self.call('mod', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def multiply(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ """
+ Multiply the content of a column to one of:
+
+ - Another column
+ - Python/Numpy list
+ - A PyKX variable in q memory
+
+ Note in it's most basic usage this is equivalent to
+
+ ```python
+ >>> kx.Column('x') * kx.Column('y')
+ ```
+
+ It is supplied as a named function to allow the use of iterators
+ when adding elements.
+
+ Parameters:
+ other: The second column or variable (Python/q) to be used
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Multiply together two columns:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> tab.select(kx.Column('a').multiply(kx.Column('b')))
+ pykx.Table(pykx.q('
+ a
+ ---------
+ 0.2172511
+ 0.4339994
+ 0.616638
+ 1.633789
+ ..
+ '))
+ ```
+
+ Multiply each element of a column by 3.
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> tab.select(kx.Column('a').multiply(3))
+ pykx.Table(pykx.q('
+ a
+ ----------
+ 0.06553417
+ 0.1324978
+ 0.188391
+ 0.5005277
+ ..
+ '))
+ ```
+
+ For each row in a column multiply the row by both 3 and 4 independently.
+ This makes use of each-left and each-right from q:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> tab.select(kx.Column('a').multiply([3, 4], iterator='/:\:'))
+ pykx.Table(pykx.q('
+ a
+ ---------------------
+ 0.06553417 0.08737889
+ 0.1324978 0.1766638
+ 0.188391 0.251188
+ 0.5005277 0.6673703
+ ..
+ '))
+ ```
+ """
+ return self.call('*', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def next_item(self, iterator=None):
+ """
+ Retrieve the immediately following item in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Shift the values in column 'a' within a column forward one
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5.0),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.update(kx.Column('a').next_item())
+ pykx.Table(pykx.q('
+ a b
+ ---------------
+ 0 4 4
+ 0.8276359 0 2 3
+ 4.632315 2 4 4
+ 0.6044712 2 3 0
+ '))
+ ```
+ """
+ return self.call('next', iterator=iterator)
+
+ def previous_item(self, iterator=None):
+ """
+ Retrieve the immediately preceding item in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Shift the values in column 'a' within a column back one
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5.0),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.update(kx.Column('a').previous_item())
+ pykx.Table(pykx.q('
+ a b
+ ---------------
+ 0 4 4
+ 0.8276359 0 2 3
+ 4.632315 2 4 4
+ 0.6044712 2 3 0
+ '))
+ ```
+ """
+ return self.call('prev', iterator=iterator)
+
+ def product(self, iterator=None):
+ """
+ Calculate the product of all values in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Find the product of values within a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5.0),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.exec(kx.Column('a').product())
+ pykx.FloatAtom(pykx.q('9.076436e+25'))
+ ```
+
+ Find the product of values within each row of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.select(kx.Column('b').product(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ -
+ 0
+ 0
+ 32
+ 0
+ 0
+ 48
+ ..
+ '))
+ ```
+ """
+ return self.call('prd', iterator=iterator)
+
+ def products(self, iterator=None):
+ """
+ Find the running product of values in a column or rows of a column
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Find the running product of values within a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5.0),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.select(kx.Column('a').products())
+ pykx.Table(pykx.q('
+ a
+ ---------
+ 0.8276359
+ 3.833871
+ 2.317464
+ 3.940125
+ ..
+ '))
+ ```
+
+ Find the running product of values within each row of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 5),
+ ... 'b': kx.random.random([100, 3], 5)
+ ... })
+ >>> tab.select(kx.Column('b').products(iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ -------
+ 0 0 0
+ 0 0 0
+ 2 8 32
+ 2 6 0
+ 0 0 0
+ 3 12 48
+ ..
+ '))
+ ```
+ """
+ return self.call('prds', iterator=iterator)
+
+ def sort(self, ascend=True, iterator=None):
+ """
+ Sort the values within a column in ascending or descending order
+
+ Parameters:
+ ascend: Should the data be sorted in ascending or descending order
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Sort the values in a column ascending
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('a').sort())
+ pykx.Table(pykx.q('
+ a
+ --
+ -1
+ 0
+ 1
+ '))
+ ```
+
+ Sort the values in descending order:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': [1, -1, 0],
+ ... 'b': [[-1, 2, 1], [0, 1, 2], [1, 2, 3]]
+ ... })
+ >>> tab.select(kx.Column('a').sort(ascend=False))
+ pykx.Table(pykx.q('
+ a
+ --
+ 1
+ 0
+ -1
+ '))
+ ```
+ """
+ if ascend:
+ fn = 'asc'
+ else:
+ fn = 'desc'
+ return self.call(fn, iterator=iterator)
+
+ def subtract(self, other, iterator=None, col_arg_ind=0, project_args=None):
+ """
+ Subtract from a column one of:
+
+ - The values of another column
+ - Python/Numpy list/value
+ - A PyKX variable in q memory
+
+ Note in it's most basic usage this is equivalent to
+
+ ```python
+ >>> kx.Column('x') - kx.Column('y')
+ ```
+
+ It is supplied as a named function to allow the use of iterators
+ when adding elements.
+
+ Parameters:
+ other: The second column or variable (Python/q) to be used
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Subtract the values of two columns:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> tab.select(kx.Column('a').subtract(kx.Column('b')))
+ pykx.Table(pykx.q('
+ a
+ ---------
+ -9.923397
+ -9.782397
+ -9.756748
+ -9.625555
+ ..
+ '))
+ ```
+
+ Substract 3 from each element of a column.
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> tab.select(kx.Column('a').subtract(3))
+ pykx.Table(pykx.q('
+ a
+ ---------
+ -2.978155
+ -2.955834
+ -2.937203
+ -2.833157
+ ..
+ '))
+ ```
+
+ For each row in a column subtract 3 and 4 from the row independently.
+ This makes use of each-left and each-right from q:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.q.asc(kx.random.random(100, 10.0)),
+ ... 'b': kx.q.desc(kx.random.random(100, 10.0))
+ ... })
+ >>> tab.select(kx.Column('a').subtract([3, 4], iterator='/:\:'))
+ pykx.Table(pykx.q('
+ a
+ -------------------
+ -2.978155 -3.978155
+ -2.955834 -3.955834
+ -2.937203 -3.937203
+ -2.833157 -3.833157
+ ..
+ '))
+ ```
+ """
+ return self.call('-', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def take(self, other, iterator=None, col_arg_ind=1, project_args=None):
+ """
+ Retrieve the first N rows from a column or N elements from items
+ from a column using an iterator. Where N is specified by the other parameter.
+
+ Parameters:
+ other: An integer defining the number of elements to retrieve
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`.
+ col_arg_ind: Determines the index within the multivariate function
+ where the column parameter will be used. Default 0.
+ project_args: The argument indices of a multivariate function which will be
+ projected on the function before evocation with use of an iterator.
+
+ Examples:
+
+ Retrieve 3 rows from a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, kx.q.til(10)),
+ ... 'b': kx.random.random([100, 3], kx.q.til(10))
+ ... })
+ >>> tab.select(kx.Column('a').take(3).count())
+ pykx.Table(pykx.q('
+ a
+ --
+ 10
+ 12
+ 24
+ '))
+ ```
+ """
+ return self.call('#', other, iterator=iterator, col_arg_ind=col_arg_ind,
+ project_args=project_args)
+
+ def value(self, iterator=None):
+ """
+ When passed an EnumVector will return the corresponding SymbolVector
+
+ Parameters:
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the variance of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.q('([] a:`sym?`a`b`c`a)')
+ >>> tab.exec(kx.Column('a'))
+ pykx.EnumVector(pykx.q('`sym$`a`b`c`a'))
+ >>> tab.exec(kx.Column('a').value())
+ pykx.SymbolVector(pykx.q('`a`b`c`a'))
+ ```
+ """
+ return self.call('value', iterator=iterator)
+
+ def variance(self, sample=False, iterator=None):
+ """
+ Calculate the variance or sample variance for items in a
+ column or rows in a column
+
+ Parameters:
+ sample: Should calculation of variance return the
+ sample variance (set True) or the variance (set False {default})
+ iterator: What iterator to use when operating on the column
+ for example, to execute per row, use `each`
+
+ Examples:
+
+ Calculate the variance of a column
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 10.0),
+ ... 'b': kx.random.random([100, 3], 10.0)
+ ... })
+ >>> tab.exec(kx.Column('a').variance())
+ pykx.FloatAtom(pykx.q('8.310944'))
+ ```
+
+ Calculate the sample sample deviation for each row in a column:
+
+ ```python
+ >>> import pykx as kx
+ >>> tab = kx.Table(data={
+ ... 'a': kx.random.random(100, 10.0),
+ ... 'b': kx.random.random([100, 3], 10.0)
+ ... })
+ >>> tab.select(kx.Column('b').variance(sample=True, iterator='each'))
+ pykx.Table(pykx.q('
+ b
+ ---------
+ 6.023586
+ 29.48778
+ 6.318229
+ 0.1609426
+ 5.241295
+ ..
+ '))
+ ```
+ """
+ fn = 'svar' if sample else 'var'
+ return self.call(fn, iterator=iterator)
+
+
+class QueryPhrase:
+ """Special wrapper for a list which will be treated as a QueryPhrase.
+ For use with the Query API
+ """
+ def __init__(self, phrase, names=None, are_trees=False):
+ if isinstance(phrase, QueryPhrase):
+ self._phrase = phrase._phrase
+ self._names = phrase._names
+ self._are_trees = phrase._are_trees
+ elif isinstance(phrase, ParseTree):
+ self._phrase = phrase._tree
+ elif isinstance(phrase, Column):
+ self._phrase = [phrase._value]
+ self._names = [phrase._name]
+ self._are_trees = [phrase._is_tree]
+ elif isinstance(phrase, str):
+ self._phrase = ParseTree(phrase).enlist()._tree
+ elif isinstance(phrase, dict):
+ self._phrase = list(phrase.values())
+ self._names = list(phrase.keys())
+ self._are_trees = [are_trees] * len(phrase)
+ else:
+ self._phrase = phrase
+ self._names = names
+ self._are_trees = are_trees
+
+ def __repr__(self):
+ preamble = f'pykx.{type(self).__name__}'
+ return (f"{preamble}(names={self._names}, phrase={type(self._phrase)},"
+ f"are_trees={self._are_trees})")
+
+ def append(self, other):
+ if isinstance(other, ParseTree):
+ self._phrase.append(other._tree)
+ self._names.append('')
+ self._are_trees.append(False)
+ elif isinstance(other, Column):
+ self._phrase.append(other._value)
+ self._names.append(other._name)
+ self._are_trees.append(other._is_tree)
+ elif isinstance(other, QueryPhrase):
+ self._phrase.append(other._phrase)
+ self._names.append(other._names)
+ self._are_trees.append(other._are_trees)
+ else:
+ self._phrase.append(other)
+ self._names.append('')
+ self._are_trees.append(False)
+
+ def extend(self, other):
+ if isinstance(other, ParseTree):
+ self._phrase.extend(other._tree)
+ self._names.extend('')
+ self._are_trees.extend(False)
+ elif isinstance(other, Column):
+ self._phrase.extend(other._value)
+ self._names.extend(other._name)
+ self._are_trees.extend(other._is_tree)
+ elif isinstance(other, QueryPhrase):
+ self._phrase.extend(other._phrase)
+ self._names.extend(other._names)
+ self._are_trees.extend(other._are_trees)
+ else:
+ self._phrase.extend(other)
+ self._names.extend('')
+ self._are_trees.extend(False)
+
+ def to_dict(self):
+ return dict(map(lambda i, j: (i, j), self._names, self._phrase))
+
+ def __and__(self, other):
+ if isinstance(other, Column):
+ self.append(other)
+ elif isinstance(other, QueryPhrase):
+ self.extend(other)
+ else:
+ raise TypeError(
+ f"Supplied object type '{type(other)}' cannot `&` off a `pykx.QueryPhrase`.")
+ return self
+
+
def _internal_k_list_wrapper(addr: int, incref: bool):
res = list(_wrappers._factory(addr, incref))
for i in range(len(res)):
@@ -4603,6 +10909,7 @@ def _internal_k_dict_to_py(addr: int):
'BooleanVector',
'ByteAtom',
'ByteVector',
+ 'Column',
'CharAtom',
'CharVector',
'Collection',
@@ -4647,6 +10954,7 @@ def _internal_k_dict_to_py(addr: int):
'NumericVector',
'Operator',
'Over',
+ 'ParseTree',
'PartitionedTable',
'Projection',
'ProjectionNull',
@@ -4676,7 +10984,9 @@ def _internal_k_dict_to_py(addr: int):
'TimestampVector',
'UnaryPrimative',
'UnaryPrimitive',
+ 'Variable',
'Vector',
+ 'QueryPhrase',
'_internal_k_list_wrapper',
'_internal_is_k_dict',
'_internal_k_dict_to_py',
diff --git a/tests/data/script.k b/tests/data/script.k
new file mode 100644
index 0000000..cb42fc9
--- /dev/null
+++ b/tests/data/script.k
@@ -0,0 +1,3 @@
+lambda:{"this is a test lambda"};
+data:10?10;
+q:0b;
diff --git a/tests/data/script.q b/tests/data/script.q
new file mode 100644
index 0000000..acde65a
--- /dev/null
+++ b/tests/data/script.q
@@ -0,0 +1,3 @@
+lambda:{"this is a test lambda"};
+data:10?10;
+q:1b;
diff --git a/tests/data/tmp1.csv b/tests/data/tmp1.csv
new file mode 100644
index 0000000..854db16
--- /dev/null
+++ b/tests/data/tmp1.csv
@@ -0,0 +1,11 @@
+col,a,b,c
+a,0,10,0
+b,1,9,3
+c,2,8,6
+a,3,7,9
+b,4,6,12
+c,5,5,15
+a,6,4,18
+b,7,3,21
+c,8,2,24
+a,9,1,27
\ No newline at end of file
diff --git a/tests/data/tmp2.csv b/tests/data/tmp2.csv
new file mode 100644
index 0000000..a3777e9
--- /dev/null
+++ b/tests/data/tmp2.csv
@@ -0,0 +1,11 @@
+a
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
\ No newline at end of file
diff --git a/tests/parse_tests.py b/tests/parse_tests.py
index f375bc2..aae9d1b 100644
--- a/tests/parse_tests.py
+++ b/tests/parse_tests.py
@@ -290,7 +290,11 @@ def make_tests(self): # noqa
'from textwrap import dedent',
'from operator import index',
'import pytz',
- 'import pandas as pd'
+ 'import pandas as pd',
+ 'import subprocess',
+ 'from packaging import version',
+ 'import uuid',
+ 'import itertools'
]
@@ -301,18 +305,19 @@ def make_tests(self): # noqa
'from time import sleep\n', 'utf-8'))
if 'ipc' in test_file[0]:
- f.write(bytes('''original_QHOME = os.environ['QHOME']
+ f.write(bytes(
+ '''
+original_QHOME = os.environ['QHOME']
from contextlib import closing, contextmanager
import signal
import socket
import subprocess
-from platform import system
+from platform import system\n
def random_free_port():
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
s.bind(('localhost', 0))
- return s.getsockname()[1]
+ return s.getsockname()[1]\n
def q_proc(q_init):
- proc = None
port = random_free_port()
proc = subprocess.Popen(
(lambda x: x.split() if system() != 'Windows' else x)(f'q -p {port}'),
@@ -325,8 +330,9 @@ def q_proc(q_init):
proc.stdin.write(b'\\n'.join((*q_init, b'')))
proc.stdin.flush()
sleep(2) # Windows does not support the signal-based approach used here
- return port
-import pykx as kx\n''', 'utf-8'))
+ return port\n
+import pykx as kx\n
+''', 'utf-8'))
f.write(bytes('q = kx.QConnection(port=q_proc([b""]))\n', 'utf-8'))
else:
f.write(bytes('import pykx as kx\nq = kx.q\n', 'utf-8'))
diff --git a/tests/qcumber_tests/conversions.quke b/tests/qcumber_tests/conversions.quke
index ebec9bb..359c0f3 100644
--- a/tests/qcumber_tests/conversions.quke
+++ b/tests/qcumber_tests/conversions.quke
@@ -39,6 +39,7 @@ feature conversions
feature default conversions
before each
t::.pykx.eval["lambda x: bytes(str(type(x)), 'utf-8')"][<];
+ v::.pykx.eval["lambda x: x"][<];
after each
.pykx.setdefault["default"];
should support default default
@@ -50,10 +51,23 @@ feature default conversions
.qu.compare[""; t ([]a:2 3; b:4 5)];
expect default for keyed table
.qu.compare[""; t ([a:2 3] b:4 5)];
+ expect default conversion of lists to return appropriate non null representation
+ data:(`test;::;0D20:23:25.800000000);
+ lst:.pykx.eval["lambda x:x";<]data;
+ lst~data
should support python default
expect python default
.pykx.setdefault["python"];
.qu.compare[""; t til 10];
+ should support raw default
+ expect return of raw representation of values
+ .pykx.setdefault["raw"];
+ data:"p"$100;
+ all (
+ -12h = type data;
+ 100 = v data;
+ "" ~ t data
+ )
should support numpy default
expect numpy default
.pykx.setdefault["numpy"];
diff --git a/tests/qcumber_tests/extra_functions.quke b/tests/qcumber_tests/extra_functions.quke
index 0ecac7e..a12de5a 100644
--- a/tests/qcumber_tests/extra_functions.quke
+++ b/tests/qcumber_tests/extra_functions.quke
@@ -23,6 +23,7 @@ feature .pykx.unwrap
should unwrap wrapped foreigns
expect a foreign to be contained
.qu.compare[112h; type .pykx.unwrap .pykx.eval"1"];
+
should pass on unwrapped foreigns
expect a foreign to be contained
.qu.compare[112h; type .pykx.unwrap[.pykx.eval["1"]`.]];
@@ -38,6 +39,14 @@ feature .pykx.toq
.qu.compare[-7h; type .pykx.toq .pykx.eval["1"]];
expect a short atom
.qu.compare[-5h; type .pykx.toq .pykx.eval["pykx.ShortAtom(1)"]];
+ should round trip if the item is not a foreign/wrapper
+ expect round trips to be handled gracefully for existing q data
+ all(
+ val~.pykx.toq val:til 10;
+ val~.pykx.toq val:100?0Ng;
+ val~.pykx.toq val:([]10?1f;10?1f);
+ val~.pykx.toq val:`a`b`c!1 2 3
+ )
feature .pykx.py2q
should return native q types from unwrapped foreign
@@ -50,6 +59,14 @@ feature .pykx.py2q
.qu.compare[-7h; type .pykx.py2q .pykx.eval["1"]];
expect a short atom
.qu.compare[-5h; type .pykx.py2q .pykx.eval["pykx.ShortAtom(1)"]];
+ should round trip if the item is not a foreign/wrapper
+ expect round trips to be handled gracefully for existing q data
+ all(
+ val~.pykx.py2q val:til 10;
+ val~.pykx.py2q val:100?0Ng;
+ val~.pykx.py2q val:([]10?1f;10?1f);
+ val~.pykx.py2q val:`a`b`c!1 2 3
+ )
feature .pykx.version
should return an appropriate type when executed
@@ -61,3 +78,12 @@ feature .pykx.debug
expect a general list with PyKX information as the first element
ret:.pykx.debugInfo[];
all(0h~type ret;ret[0]like"*PyKX information*")
+
+feature .pykx.print
+ should call print appropriately on python type objects
+ expect null to be returned in each case
+ all(
+ (::)~.pykx.print .pykx.import[`numpy];
+ (::)~.pykx.print til 10;
+ (::)~.pykx.print .pykx.import[`numpy;`:array][til 10]
+ )
diff --git a/tests/qcumber_tests/pykx.quke b/tests/qcumber_tests/pykx.quke
index 30ae7c8..94d081a 100644
--- a/tests/qcumber_tests/pykx.quke
+++ b/tests/qcumber_tests/pykx.quke
@@ -90,3 +90,44 @@ feature Typed wraps are wraps and can be unwrapped
.pykx.util.isf .pykx.unwrap .pykx.eval["lambda x: x";<]
expect 1b
.pykx.util.isf .pykx.unwrap .pykx.eval["lambda x: x";>]
+
+feature toq0 conversions to support char returns
+ before
+ str::.pykx.eval["\"qstring\""];
+ list::.pykx.eval["[\"qstring0\", \"qstring1\"]"];
+ dict::.pykx.eval["{\"a\":{\"b\":\"qstring0\"}, \"b\":\"qstring1\"}"];
+
+ should show returns by default with toq0 are symbols
+ expect 1b
+ .qu.compare[`qstring;.pykx.toq0 str]
+ expect 1b
+ .qu.compare[`qstring0;first .pykx.toq0[list]]
+ expect 1b
+ .qu.compare[`qstring0;.pykx.toq0[dict] . `a`b]
+
+ should show returns by default with toq0 are strings
+ expect 1b
+ .qu.compare["qstring";.pykx.toq0[str;1b]]
+ expect 1b
+ .qu.compare["qstring0";first .pykx.toq0[list;1b]]
+ expect 1b
+ .qu.compare["qstring0";.pykx.toq0[dict;1b] . `a`b]
+
+ should error if incorrect values provided to toq0
+ expect error if passed incorrect second argument
+ .[.pykx.toq0;(`test;`no);{x like "Supplied 2nd argument*"}]
+ expect error if too many arguments passed
+ .[.pykx.toq0;(`test;1b;1b);{x like "toq takes a maximum*"}]
+
+feature Qlog functions to not be defined in pykx namespace
+ should not have functions polluting the .pykx namespace
+ expect namespace to not have function names provided by qlog in .i namespace
+ @[{get x;0b};`.pykx.i.endpoint;1b]
+ expect namespace to not have functions in root provided by qlog
+ @[{get x;0b};`.pykx.setCorrelator;1b]
+
+ should have logging namespace populated when running on linux
+ expect logging to be defined if running on linux in root namespace
+ $[.z.o~`l64;@[{get x;1b};`.com_kx_log.setCorrelator;0b];1b]
+ expect logging functionality to have loaded in the .i namespace
+ $[.z.o~`l64;@[{get x;1b};`.com_kx_log.i.endpoint;0b];1b]
diff --git a/tests/qcumber_tests/utils.quke b/tests/qcumber_tests/utils.quke
new file mode 100644
index 0000000..c936c59
--- /dev/null
+++ b/tests/qcumber_tests/utils.quke
@@ -0,0 +1,29 @@
+feature .pykx.util.*
+ before
+ sym::`c`d`e;
+ should raise appropriate errors when passed invalid arguments
+ expect arguments to be passed successfully if only one supplied
+ input:(til 10;2;`a`b`c);
+ args:.pykx.util.parseArgs enlist pyarglist input;
+ input~{.pykx.wrap[x]`}@/:args[1]
+ expect error if multiple pyarglist values supplied
+ @[{.pykx.util.parseArgs x; 0b};
+ (pyarglist 1 2 3;pyarglist 1 2 3);
+ {x like "Expected only one arg list*"}]
+
+ should test html code
+ expect usage of html functionality to work with in-memory tables
+ 10h~type .pykx.util.html.memsplay[2 2;([]100?1f;100?`a`b`c;y:`sym$100?`c`d`e)]
+ expect usage of html functionality to work with in-memory tables
+ 10h~type .pykx.util.html.memsplay[100 100;([]10?1f;10?`a`b`c;y:`sym$10?`c`d`e)]
+ expect rowcols to return an appropriate result if out of bound range
+ output:"\n100 rows \303\227 3 columns
";
+ data:([]100?1f;100?`a`b`c;y:`sym$100?`c`d`e);
+ output~.pykx.util.html.rowcols[10 10;data;""]
+ expect no rowcols if in bound range
+ output:"";
+ data:([]100?1f;100?`a`b`c;y:`sym$100?`c`d`e);
+ output~.pykx.util.html.rowcols[200 200;data;""]
+ expect detectbadcols to operate under q
+ tab:flip (`a;`b;`a;`$"a b")!4 4#16?1f;
+ .pykx.util.html.detectbadcols[tab]~`dup`invalid!enlist each(`a;`$"a b")
diff --git a/tests/qcumber_tests/wrapped.quke b/tests/qcumber_tests/wrapped.quke
index e7b705e..f94d378 100644
--- a/tests/qcumber_tests/wrapped.quke
+++ b/tests/qcumber_tests/wrapped.quke
@@ -68,6 +68,21 @@ feature .pykx wrapped objects
expect only keyword arguments to be usable
.qu.compare[6; onlykwargs[`z pykw 2; `y pykw 2; `x pykw 2]`];
+ expect error if multiple dictionaries passed
+ @[{onlykwargs . x;0b};
+ (10;pykwargs `a`b!1 2;pykwargs `c`d!2 3);
+ {x like "Expected only one key word dictionary*"}]
+
+ expect error if duplicate keys supplied
+ @[{onlykwargs . x; 0b};
+ (10;pykwargs `a`b`a!1 2 3);
+ {x like "Expected only unique key names*"}]
+
+ expect error if non symbol keys used in keyword dictionary
+ @[{onlykwargs . x; 0b};
+ (10;pykwargs 1 2 3!1 2 3);
+ {x like "Expected Symbol Atom for*"}]
+
should support get attribute functionality
expect getattr can return functions
.qu.compare[10; .pykx.eval["pykx.LongVector(range(10))"][`:__len__][::]`];
diff --git a/tests/test_compress_encrypt.py b/tests/test_compress_encrypt.py
index 8d2cd0a..d557e54 100644
--- a/tests/test_compress_encrypt.py
+++ b/tests/test_compress_encrypt.py
@@ -27,9 +27,10 @@ def test_compress_encrypt_errors(kx):
kx.Compress(block_size=24)
assert 'block_size must be a power of 2' in str(err.value)
- with pytest.raises(ValueError) as err:
- kx.Compress(algo=kx.CompressionAlgorithm.zstd)
- assert "'CompressionAlgorithm.zstd' only supported on" in str(err.value)
+ if os.getenv('PYKX_4_1_ENABLED') is None:
+ with pytest.raises(ValueError) as err:
+ kx.Compress(algo=kx.CompressionAlgorithm.zstd)
+ assert "'CompressionAlgorithm.zstd' only supported on" in str(err.value)
with pytest.raises(ValueError) as err:
kx.Compress(algo=kx.CompressionAlgorithm.gzip, level=100)
@@ -83,20 +84,3 @@ def test_encrypt_path():
# If this has run, the encryption key has been loaded appropriately
# this can be tested more rigorously once kdb+ 4.0 2024.03.02
assert kx.q('-36!(::)').py()
-
-
-@pytest.mark.isolate
-@pytest.mark.skipif(
- os.getenv('PYKX_THREADING') is not None,
- reason='Not supported with PYKX_THREADING'
-)
-def test_beta():
- import pykx as kx
-
- with pytest.raises(kx.QError) as err:
- kx.Compress()
- assert 'Attempting to use a beta feature "Compress' in str(err.value)
-
- with pytest.raises(kx.QError) as err:
- kx.Encrypt()
- assert 'Attempting to use a beta feature "Compress' in str(err.value)
diff --git a/tests/test_config.py b/tests/test_config.py
index 25363c8..74d30ec 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -1,4 +1,7 @@
from pathlib import Path
+import os
+from tempfile import TemporaryDirectory
+import warnings
import pytest
@@ -13,3 +16,55 @@ def test_QHOME(kx):
def test_dir(kx):
assert isinstance(dir(kx.config), list)
assert sorted(dir(kx.config)) == dir(kx.config)
+
+
+@pytest.mark.isolate
+def test_missing_profile(capsys):
+ with TemporaryDirectory() as tmp_dir:
+ os.chdir(tmp_dir)
+ open('.pykx-config', 'a').close()
+ import pykx as kx # noqa
+ out, _ = capsys.readouterr()
+ assert "Unable to locate specified 'PYKX_PROFILE': 'default' in file" in out
+
+
+@pytest.mark.isolate
+def test_qargs_single():
+ os.environ['QARGS'] = '-p 5050'
+ with pytest.warns() as warnings:
+ import pykx as kx
+ if os.getenv('PYKX_THREADING', None) is None:
+ assert len(warnings) == 1
+ else:
+ assert len(warnings) == 2
+ assert 'setting a port in this way' in str(warnings[0].message)
+ assert 2 == kx.q('2').py()
+
+
+@pytest.mark.isolate
+def test_qargs_multi():
+ os.environ['QARGS'] = '-p 5050 -t 1000'
+ with pytest.warns() as warnings:
+ import pykx as kx
+ if os.getenv('PYKX_THREADING', None) is None:
+ assert len(warnings) == 2
+ else:
+ assert len(warnings) == 3
+ assert 'setting a port in this way' in str(warnings[0].message)
+ assert 'setting timers in this way' in str(warnings[1].message)
+ assert 2 == kx.q('2').py()
+
+
+@pytest.mark.isolate
+def test_suppress_warnings(recwarn):
+ os.environ['PYKX_SUPPRESS_WARNINGS'] = 'True'
+ os.environ['QARGS'] = '-p 5050'
+ with warnings.catch_warnings(record=True) as w:
+ warnings.simplefilter("always")
+ import numpy as np
+ import pykx as kx
+ np.max(kx.q.til(10))
+ for i in w:
+ message = str(i.message)
+ assert 'setting a port in this way' not in message
+ assert 'Attempting to call numpy' not in message
diff --git a/tests/test_ctx.py b/tests/test_ctx.py
index 59381bc..09c995a 100644
--- a/tests/test_ctx.py
+++ b/tests/test_ctx.py
@@ -1,8 +1,11 @@
from contextlib import contextmanager
import os
from pathlib import Path
+from platform import system
import re
+import shutil
from tempfile import gettempdir
+import uuid
# Do not import pykx here - use the `kx` fixture instead!
import pytest
@@ -55,6 +58,10 @@ def test_register_without_args(q):
q._register()
+@pytest.mark.skipif(
+ system() == 'Windows',
+ reason='Temporary file testing flaky, requires rework'
+)
def test_register_by_path(q, q_script_with_k_script_present, kx):
q_script = q_script_with_k_script_present
q_script.rename(q_script.parent/'nameA.q')
@@ -69,6 +76,10 @@ def test_register_by_path(q, q_script_with_k_script_present, kx):
assert isinstance(q.nameA, kx.ctx.QContext)
+@pytest.mark.skipif(
+ system() == 'Windows',
+ reason='Temporary file testing flaky, requires rework'
+)
def test_local_register_by_name_q(q, q_script_with_k_script_present):
tmp_dir = q_script_with_k_script_present.resolve().parent
with cd(str(tmp_dir)):
@@ -76,22 +87,43 @@ def test_local_register_by_name_q(q, q_script_with_k_script_present):
assert q.script.q
-def test_find_namespace_in_q_file(q, tmp_path):
- with pytest.raises(AttributeError):
- q.testnamespace
- with cd(tmp_path):
- with open('testnamespace.q', 'w') as f:
- f.write('.testnamespace.true:1b\n')
- assert q.testnamespace.true
+@pytest.mark.skipif(
+ system() == 'Windows',
+ reason='Temporary file testing flaky, requires rework'
+)
+def test_find_namespace_in_q_file(q):
+ cwd = Path(os.getcwd())
+ tmpdir = cwd / str(uuid.uuid4().hex)[:7]
+ os.makedirs(tmpdir)
+ try:
+ with pytest.raises(AttributeError):
+ q.testnamespace
+ with cd(tmpdir):
+ with open('testnamespace.q', 'w') as f:
+ f.write('.testnamespace.true:1b\n')
+ assert q.testnamespace.true
+ finally:
+ shutil.rmtree(tmpdir, ignore_errors=True)
-def test_reserved_in_ctx(q, tmp_path):
- with pytest.raises(AttributeError):
- q.testnamespace
- with cd(tmp_path):
- with open('name.q', 'w') as f:
- f.write('.name.test.update:{x+1}\n')
- assert q.name.test.update(1) == 2
+@pytest.mark.skipif(
+ system() == 'Windows',
+ reason='Temporary file testing flaky, requires rework'
+)
+def test_reserved_in_ctx(q):
+ cwd = Path(os.getcwd())
+ tmpdir = cwd / str(uuid.uuid4().hex)[:7]
+ os.makedirs(tmpdir)
+
+ try:
+ with pytest.raises(AttributeError):
+ q.testnamespace
+ with cd(tmpdir):
+ with open('name.q', 'w') as f:
+ f.write('.name.test.update:{x+1}\n')
+ assert q.name.test.update(1) == 2
+ finally:
+ shutil.rmtree(tmpdir, ignore_errors=True)
def test_python_keyword_as_q_fn(q):
@@ -275,3 +307,22 @@ def test_ctx_no_overwrite_qerror(q_port, kx):
with kx.QConnection(port=q_port, username='a', password='aaaa') as q:
q('type')
assert 'Access Denied' in str(err.value)
+
+
+@pytest.mark.unlicensed
+def test_operator_retrieval(kx):
+ if kx.licensed:
+ for i in kx.q.operators.keys():
+ op = getattr(kx.q, i)
+ assert isinstance(op, (kx.Operator, kx.Iterator))
+ assert isinstance(op.__doc__, str)
+ assert i in op.__doc__
+ else:
+ for i in kx.q.operators.keys():
+ with pytest.raises(kx.QError) as err:
+ getattr(kx.q, i)
+ assert 'Cannot load requested' in str(err.value)
+
+
+def test_context_loadfile(kx):
+ assert isinstance(kx.q.csvutil, kx.ctx.QContext)
diff --git a/tests/test_db.py b/tests/test_db.py
index 8004656..1717b31 100644
--- a/tests/test_db.py
+++ b/tests/test_db.py
@@ -303,15 +303,56 @@ def test_subview(kx):
@pytest.mark.isolate
-@pytest.mark.skipif(
- os.getenv('PYKX_THREADING') is not None,
- reason='Not supported with PYKX_THREADING'
-)
-def test_beta():
+def test_q_lo_move_dir():
+ os.environ['PYKX_4_1_ENABLED'] = 'True'
+ os.environ['PYKX_BETA_FEATURES'] = 'True'
+ curr_dir = os.getcwd()
import pykx as kx
- with pytest.raises(kx.QError) as err:
- kx.DB()
- assert 'Attempting to use a beta feature "Data' in str(err.value)
+ db = kx.DB(path='db') # noqa: F841
+ assert curr_dir != os.getcwd()
+ os.unsetenv('PYKX_4_1_ENABLED')
+ os.unsetenv('PYKX_BETA_FEATURES')
+
+
+@pytest.mark.isolate
+def test_q_lo_keep_dir():
+ os.environ['PYKX_4_1_ENABLED'] = 'True'
+ os.environ['PYKX_BETA_FEATURES'] = 'True'
+ curr_dir = os.getcwd()
+ import pykx as kx
+ db = kx.DB(path='db', change_dir=False) # noqa: F841
+ assert curr_dir == os.getcwd()
+ os.unsetenv('PYKX_4_1_ENABLED')
+ os.unsetenv('PYKX_BETA_FEATURES')
+
+
+def test_q_lo_40(kx):
+ if os.getenv('PYKX_4_1_ENABLED') is None:
+ with pytest.raises(kx.QError) as err:
+ db = kx.DB(path='db', change_dir=False) # noqa: F841
+ assert 'behavior only supported with PYKX_4_1_ENABLED' in str(err.value)
+ with pytest.raises(kx.QError) as err:
+ db = kx.DB(path='db', load_scripts=False) # noqa: F841
+ assert 'behavior only supported with PYKX_4_1_ENABLED' in str(err.value)
+
+
+@pytest.mark.isolate
+def test_spaces_load(tmp_path):
+ # prior to using util.loadfile the db.create/load would fail with nyi
+ test_location = tmp_path/'test directory/db'
+ import pykx as kx
+ db = kx.DB(path=test_location)
+ tab = kx.Table(data={
+ 'date': kx.q('2015.01.01 2015.01.01 2015.01.02 2015.01.02'),
+ 'ti': kx.q('09:30:00 09:31:00 09:30:00 09:31:00'),
+ 'p': kx.q('101 102 101.5 102.5'),
+ 'sz': kx.q('100 200 150 210'),
+ 'sym': kx.q('`a`b`b`c')
+ })
+ db.create(tab, 't', 'date', by_field='sym', sym_enum='sym')
+ assert db.tables == ['t']
+ db.load(path=test_location, overwrite=True)
+ assert db.tables == ['t']
@pytest.mark.order(-1)
diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py
index 2f91c00..5164734 100644
--- a/tests/test_exceptions.py
+++ b/tests/test_exceptions.py
@@ -1,3 +1,7 @@
+from platform import system
+import subprocess
+import time
+
# Do not import pykx here - use the `kx` fixture instead!
import pytest
@@ -24,6 +28,41 @@ def test_artificially_raised_error(kx):
raise kx.PyArrowUnavailable()
+def test_updated_messages(kx):
+ with pytest.raises(kx.QError) as err:
+ kx.q('cos:{x+1}')
+ assert 'assign: Cannot redefine a reserved' in str(err.value)
+ with pytest.raises(kx.QError) as err:
+ tab = kx.q('([k:0 1]a:1 2)')
+ tab.insert([0, 3])
+ assert 'insert: Cannot insert a record with an existing key' in str(err.value)
+ with pytest.raises(kx.QError) as err:
+ kx.q('3 2').sorted()
+ assert 's-fail: Cannot set "sorted" attribute on an unsorted' in str(err.value)
+ with pytest.raises(kx.QError) as err:
+ kx.q('2 3 2').unique()
+ assert 'u-fail: Failed to do one of the following' in str(err.value)
+
+ # Attempts to run the following tests on Windows results in failures due to
+ # security issues on public runners
+ if system() == 'Windows':
+ return None
+
+ # Test that existing IPC messages are maintained
+ with pytest.raises(kx.QError) as err:
+ kx.SyncQConnection(port=1234)
+ assert 'hop. OS reports: Connection refused' in str(err.value)
+
+ q_exe_path = subprocess.run(['which', 'q'], stdout=subprocess.PIPE).stdout.decode().strip()
+ with kx.PyKXReimport():
+ proc = subprocess.Popen([q_exe_path, 'tests/test_files/pw.q', '-p', '15001'])
+ time.sleep(2)
+ with pytest.raises(kx.QError) as err:
+ kx.SyncQConnection(port=15001)
+ assert 'access: Failed to connect' in str(err.value)
+ proc.kill()
+
+
@pytest.mark.unlicensed
def test_dir(kx):
assert isinstance(dir(kx.exceptions), list)
diff --git a/tests/test_files/pw.q b/tests/test_files/pw.q
new file mode 100644
index 0000000..079e14f
--- /dev/null
+++ b/tests/test_files/pw.q
@@ -0,0 +1 @@
+.z.pw:{[u;p]0b}
diff --git a/tests/test_ipc.py b/tests/test_ipc.py
index eb990b4..ef11358 100644
--- a/tests/test_ipc.py
+++ b/tests/test_ipc.py
@@ -3,7 +3,7 @@
from datetime import date
from io import StringIO
import os
-from platform import system
+from platform import system, uname
import signal
import subprocess
import sys
@@ -273,7 +273,7 @@ def test_async_with_q_features(kx, q_port):
assert q.qsql.select('t').py() == t
assert q.qsql.exec('t', where=['m>2022.02.02']).py() == \
{'k1': 3, 'k2': 'z', 'm': date(2022, 3, 3)}
- q.qsql.update('t', modify='sure', columns={'j': '"j"$m*2'}, where=['m>2022.02.01', 'k1<3'])
+ q.qsql.update('t', inplace='sure', columns={'j': '"j"$m*2'}, where=['m>2022.02.01', 'k1<3'])
assert q('t . ((2;`y);`j)', wait=True).py() == 16136
assert q.qsql.delete('t', 'm').values().keys().py() == ['j']
@@ -383,8 +383,12 @@ async def test_uninitialized_connection(kx, q_port):
@pytest.mark.unlicensed
+@pytest.mark.skipif(
+ system() == 'Windows',
+ reason='SSL test updates not presently implemented on Windows'
+)
def test_ssl_info(kx):
- if system() == 'Linux':
+ if (system() == 'Linux') & (uname()[4] == 'x86_64'):
assert isinstance(kx.ssl_info(), kx.Dictionary)
@@ -563,6 +567,13 @@ def test_tls():
assert q('til 10').py() == list(range(10))
with kx.SecureQConnection(port=port, tls=True) as q:
assert q('til 10').py() == list(range(10))
+ with kx.QConnection(port=port, tls=True) as q:
+ q('func:{x}')
+ assert q('func', 1).py() == 1
+ assert q(b'func', 1).py() == 1
+ assert q(kx.CharVector('func'), 1).py() == 1
+ assert q(kx.SymbolAtom('func'), 1).py() == 1
+ assert q(q('{x}'), 1).py() == 1
finally:
if proc is not None:
proc.stdin.close()
@@ -895,72 +906,149 @@ async def test_debug_kwarg_embedded(kx, q):
assert '[1]' in str(e)
+@pytest.mark.skipif(
+ system() == 'Windows',
+ reason='Temporary file testing flaky, requires rework'
+)
@pytest.mark.unlicensed
-def test_SyncQConnection_reconnect(kx):
+def test_context_loadfile(kx):
q_exe_path = subprocess.run(['which', 'q'], stdout=subprocess.PIPE).stdout.decode().strip()
- proc = subprocess.Popen(
- [q_exe_path, '-p', '15001'],
- stdout=subprocess.DEVNULL,
- stderr=subprocess.STDOUT
- )
+ with kx.PyKXReimport():
+ proc = subprocess.Popen(
+ [q_exe_path, '-p', '15001'],
+ )
+ time.sleep(2)
+ conn = kx.SyncQConnection(port=15001)
+ try:
+ assert isinstance(conn.csvutil, kx.ctx.QContext)
+ except BaseException:
+ proc.kill()
+ assert 1==0 # Force failure in the case csvutil not available as a ctx
+ proc.kill()
+
+
+@pytest.mark.skipif(
+ system() == 'Windows',
+ reason='Subprocess requiring tests not currently operating on Windows consistently'
+)
+@pytest.mark.unlicensed
+def test_SyncQConnection_reconnect(kx, capsys):
+ q_exe_path = subprocess.run(['which', 'q'], stdout=subprocess.PIPE).stdout.decode().strip()
+ with kx.PyKXReimport():
+ proc = subprocess.Popen(
+ [q_exe_path, '-p', '15001'],
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.STDOUT
+ )
time.sleep(2)
- conn = kx.QConnection(port=15001, reconnection_attempts=1)
+ conn = kx.QConnection(port=15001, reconnection_attempts=3)
assert conn('til 20').py() == list(range(20))
proc.kill()
time.sleep(2)
+
with pytest.raises(BaseException):
conn('til 5')
+ captured = capsys.readouterr()
+ assert 'trying again in 0.5 seconds' in captured.err
+ assert 'trying again in 1.0 seconds' in captured.err
+
+ with kx.PyKXReimport():
+ proc = subprocess.Popen(
+ [q_exe_path, '-p', '15001'],
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.STDOUT
+ )
+ time.sleep(2)
+ assert conn('til 10').py() == list(range(10))
- proc = subprocess.Popen(
- [q_exe_path, '-p', '15001'],
- stdout=subprocess.DEVNULL,
- stderr=subprocess.STDOUT
- )
+ conn = kx.QConnection(port=15001, reconnection_attempts=3, reconnection_delay=1.0)
+ assert conn('til 10').py() == list(range(10))
+
+ proc.kill()
time.sleep(2)
+
+ with pytest.raises(BaseException):
+ conn('til 5')
+ captured = capsys.readouterr()
+ assert 'trying again in 0.5 seconds' not in captured.err
+ assert 'trying again in 1.0 seconds' in captured.err
+ assert 'trying again in 2.0 seconds' in captured.err
+
+ with kx.PyKXReimport():
+ proc = subprocess.Popen(
+ [q_exe_path, '-p', '15001'],
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.STDOUT
+ )
+ time.sleep(2)
+ conn = kx.QConnection(port=15001,
+ reconnection_attempts=3,
+ reconnection_delay=1.0,
+ reconnection_function=lambda x: x)
assert conn('til 10').py() == list(range(10))
+
+ proc.kill()
+ time.sleep(2)
+
+ with pytest.raises(BaseException):
+ conn('til 5')
+ captured = capsys.readouterr()
+ assert 'trying again in 0.5 seconds' not in captured.err
+ assert 'trying again in 1.0 seconds' in captured.err
+ assert 'trying again in 2.0 seconds' not in captured.err
+
proc.kill()
time.sleep(2)
@pytest.mark.unlicensed
-def test_context_loadfile(kx):
+@pytest.mark.skipif(
+ system() == 'Windows',
+ reason='Subprocess requiring tests not currently operating on Windows consistently'
+)
+@pytest.mark.xfail(reason='Flaky on several platforms')
+def test_SecureQConnection_reconnect(kx, capsys):
q_exe_path = subprocess.run(['which', 'q'], stdout=subprocess.PIPE).stdout.decode().strip()
proc = subprocess.Popen(
- [q_exe_path, '-p', '15001'],
+ [q_exe_path, '-p', '15002'],
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT
)
time.sleep(2)
- conn = kx.SyncQConnection(port=15001)
- try:
- assert isinstance(conn.csvutil, kx.ctx.QContext)
- except BaseException:
- proc.kill()
- assert 1==0 # Force failure in the case csvutil not available as a ctx
- proc.kill()
+ conn = kx.SecureQConnection(port=15002, reconnection_attempts=3)
+ assert conn('til 20').py() == list(range(20))
+ proc.kill()
+ time.sleep(2)
+ with pytest.raises(BaseException):
+ conn('til 5')
+ captured = capsys.readouterr()
+ assert 'trying again in 0.5 seconds' in captured.err
+ assert 'trying again in 1.0 seconds' in captured.err
-@pytest.mark.unlicensed
-@pytest.mark.xfail(reason='Flaky on several platforms')
-def test_SecureQConnection_reconnect(kx):
- q_exe_path = subprocess.run(['which', 'q'], stdout=subprocess.PIPE).stdout.decode().strip()
proc = subprocess.Popen(
[q_exe_path, '-p', '15002'],
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT
)
time.sleep(2)
+ assert conn('til 10').py() == list(range(10))
- conn = kx.SecureQConnection(port=15002, reconnection_attempts=1)
+ conn = kx.SecureQConnection(port=15002, reconnection_attempts=3, reconnection_delay=1.0)
+ assert conn('til 10').py() == list(range(10))
- assert conn('til 20').py() == list(range(20))
proc.kill()
time.sleep(2)
+
with pytest.raises(BaseException):
conn('til 5')
+ captured = capsys.readouterr()
+ assert 'trying again in 0.5 seconds' not in captured.err
+ assert 'trying again in 1.0 seconds' in captured.err
+ assert 'trying again in 2.0 seconds' in captured.err
proc = subprocess.Popen(
[q_exe_path, '-p', '15002'],
@@ -968,25 +1056,45 @@ def test_SecureQConnection_reconnect(kx):
stderr=subprocess.STDOUT
)
time.sleep(2)
+ conn = kx.SecureQConnection(port=15002,
+ reconnection_attempts=3,
+ reconnection_delay=1.0,
+ reconnection_function=lambda x: x)
assert conn('til 10').py() == list(range(10))
+
+ proc.kill()
+ time.sleep(2)
+
+ with pytest.raises(BaseException):
+ conn('til 5')
+ captured = capsys.readouterr()
+ assert 'trying again in 0.5 seconds' not in captured.err
+ assert 'trying again in 1.0 seconds' in captured.err
+ assert 'trying again in 2.0 seconds' not in captured.err
+
proc.kill()
time.sleep(2)
@pytest.mark.asyncio
@pytest.mark.unlicensed
-async def test_AsyncQConnection_reconnect(kx):
+@pytest.mark.skipif(
+ system() == 'Windows',
+ reason='Subprocess requiring tests not currently operating on Windows consistently'
+)
+async def test_AsyncQConnection_reconnect(kx, capsys):
q_exe_path = subprocess.run(['which', 'q'], stdout=subprocess.PIPE).stdout.decode().strip()
- proc = subprocess.Popen(
- [q_exe_path, '-p', '15003'],
- stdout=subprocess.DEVNULL,
- stderr=subprocess.STDOUT
- )
+ with kx.PyKXReimport():
+ proc = subprocess.Popen(
+ [q_exe_path, '-p', '15003'],
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.STDOUT
+ )
time.sleep(2)
conn = await kx.AsyncQConnection(
port=15003,
- reconnection_attempts=1,
+ reconnection_attempts=3,
event_loop=asyncio.get_event_loop()
)
@@ -995,12 +1103,16 @@ async def test_AsyncQConnection_reconnect(kx):
time.sleep(2)
with pytest.raises(BaseException):
await conn('til 5')
-
- proc = subprocess.Popen(
- [q_exe_path, '-p', '15003'],
- stdout=subprocess.DEVNULL,
- stderr=subprocess.STDOUT
- )
+ captured = capsys.readouterr()
+ assert 'trying again in 0.5 seconds' in captured.err
+ assert 'trying again in 1.0 seconds' in captured.err
+
+ with kx.PyKXReimport():
+ proc = subprocess.Popen(
+ [q_exe_path, '-p', '15003'],
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.STDOUT
+ )
time.sleep(2)
assert (await conn('10?`a`b`c`d')).py() is None
assert (await conn('til 10')).py() == list(range(10))
@@ -1010,47 +1122,80 @@ async def test_AsyncQConnection_reconnect(kx):
time.sleep(2)
with pytest.raises(BaseException):
await fut
- proc = subprocess.Popen(
- [q_exe_path, '-p', '15003'],
- stdout=subprocess.DEVNULL,
- stderr=subprocess.STDOUT
- )
+ captured = capsys.readouterr()
+ assert 'trying again in 0.5 seconds' in captured.err
+ assert 'trying again in 1.0 seconds' in captured.err
+
+ with kx.PyKXReimport():
+ proc = subprocess.Popen(
+ [q_exe_path, '-p', '15003'],
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.STDOUT
+ )
time.sleep(2)
assert (await fut2) is None
assert (await conn('10?`a`b`c`d')).py() is None
assert (await conn('til 10')).py() == list(range(10))
+
+ conn = await kx.AsyncQConnection(
+ port=15003,
+ reconnection_attempts=3,
+ reconnection_delay=1.0,
+ reconnection_function=lambda x: x,
+ event_loop=asyncio.get_event_loop()
+ )
+
+ assert (await conn('til 20')).py() == list(range(20))
proc.kill()
time.sleep(2)
+ with pytest.raises(BaseException):
+ await conn('til 5')
+ captured = capsys.readouterr()
+ assert 'trying again in 0.5 seconds' not in captured.err
+ assert 'trying again in 1.0 seconds' in captured.err
+ assert 'trying again in 2.0 seconds' not in captured.err
+
@pytest.mark.asyncio
@pytest.mark.unlicensed
-async def test_AsyncQConnection_reconnect_with_event_loop(kx, event_loop):
+@pytest.mark.skipif(
+ system() == 'Windows',
+ reason='Subprocess requiring tests not currently operating on Windows consistently'
+)
+async def test_AsyncQConnection_reconnect_with_event_loop(kx, event_loop, capsys):
q_exe_path = subprocess.run(['which', 'q'], stdout=subprocess.PIPE).stdout.decode().strip()
- proc = subprocess.Popen(
- [q_exe_path, '-p', '15004'],
- stdout=subprocess.DEVNULL,
- stderr=subprocess.STDOUT
- )
+
+ with kx.PyKXReimport():
+ proc = subprocess.Popen(
+ [q_exe_path, '-p', '15004'],
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.STDOUT
+ )
time.sleep(2)
conn = await kx.AsyncQConnection(
port=15004,
- reconnection_attempts=1,
+ reconnection_attempts=3,
event_loop=event_loop
)
assert (await conn('til 20')).py() == list(range(20))
proc.kill()
time.sleep(2)
+
with pytest.raises(BaseException):
await conn('til 5')
-
- proc = subprocess.Popen(
- [q_exe_path, '-p', '15004'],
- stdout=subprocess.DEVNULL,
- stderr=subprocess.STDOUT
- )
+ captured = capsys.readouterr()
+ assert 'trying again in 0.5 seconds' in captured.err
+ assert 'trying again in 1.0 seconds' in captured.err
+
+ with kx.PyKXReimport():
+ proc = subprocess.Popen(
+ [q_exe_path, '-p', '15004'],
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.STDOUT
+ )
time.sleep(2)
assert (await conn('10?`a`b`c`d')).py() is None
assert (await conn('til 10')).py() == list(range(10))
@@ -1060,14 +1205,35 @@ async def test_AsyncQConnection_reconnect_with_event_loop(kx, event_loop):
time.sleep(2)
with pytest.raises(BaseException):
await fut
- proc = subprocess.Popen(
- [q_exe_path, '-p', '15004'],
- stdout=subprocess.DEVNULL,
- stderr=subprocess.STDOUT
- )
+ captured = capsys.readouterr()
+ assert 'trying again in 0.5 seconds' in captured.err
+ assert 'trying again in 1.0 seconds' in captured.err
+
+ with kx.PyKXReimport():
+ proc = subprocess.Popen(
+ [q_exe_path, '-p', '15004'],
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.STDOUT
+ )
time.sleep(2)
assert (await fut2) is None
assert (await conn('10?`a`b`c`d')).py() is None
assert (await conn('til 10')).py() == list(range(10))
+
+ conn = await kx.AsyncQConnection(
+ port=15004,
+ reconnection_attempts=3,
+ reconnection_delay=1.0,
+ reconnection_function=lambda x: x,
+ event_loop=event_loop
+ )
+ assert (await conn('til 20')).py() == list(range(20))
proc.kill()
time.sleep(2)
+
+ with pytest.raises(BaseException):
+ await conn('til 5')
+ captured = capsys.readouterr()
+ assert 'trying again in 0.5 seconds' not in captured.err
+ assert 'trying again in 1.0 seconds' in captured.err
+ assert 'trying again in 2.0 seconds' not in captured.err
diff --git a/tests/test_license.py b/tests/test_license.py
index 7399b33..fef7472 100644
--- a/tests/test_license.py
+++ b/tests/test_license.py
@@ -34,7 +34,7 @@ def test_fallback_to_unlicensed_mode_error(tmp_path):
)
def test_unlicensed_signup(tmp_path, monkeypatch):
os.environ['QLIC'] = os.environ['QHOME'] = str(tmp_path.absolute())
- inputs = iter(['N'])
+ inputs = iter(['N', 'N'])
monkeypatch.setattr('builtins.input', lambda _: next(inputs))
import pykx as kx
assert 1 == kx.toq(1).py()
@@ -75,7 +75,7 @@ def test_invalid_commercial_input(tmp_path, monkeypatch):
)
def test_licensed_signup_no_file(tmp_path, monkeypatch):
os.environ['QLIC'] = os.environ['QHOME'] = str(tmp_path.absolute())
- inputs = iter(['Y', '1', 'n', '1', '/test/test.blah'])
+ inputs = iter(['Y', 'n', '1', 'n', '1', '/test/test.blah'])
monkeypatch.setattr('builtins.input', lambda _: next(inputs))
try:
import pykx as kx # noqa: F401
@@ -89,7 +89,7 @@ def test_licensed_signup_no_file(tmp_path, monkeypatch):
)
def test_licensed_signup_invalid_b64(tmp_path, monkeypatch):
os.environ['QLIC'] = os.environ['QHOME'] = str(tmp_path.absolute())
- inputs = iter(['Y', '1', 'n', '2', 'data:image/png;test'])
+ inputs = iter(['Y', 'n', '1', 'n', '2', 'data:image/png;test'])
monkeypatch.setattr('builtins.input', lambda _: next(inputs))
try:
import pykx as kx # noqa: F401
@@ -107,7 +107,7 @@ def test_licensed_success_file(monkeypatch):
qhome_path = os.environ['QHOME']
os.unsetenv('QLIC')
os.unsetenv('QHOME')
- inputs = iter(['Y', '1', 'n', '1', qhome_path + '/kc.lic'])
+ inputs = iter(['Y', 'n', '1', 'n', '1', qhome_path + '/kc.lic'])
monkeypatch.setattr('builtins.input', lambda _: next(inputs))
import pykx as kx
@@ -125,7 +125,23 @@ def test_licensed_success_b64(monkeypatch):
os.unsetenv('QHOME')
with open(qhome_path + '/kc.lic', 'rb') as f:
license_content = base64.encodebytes(f.read())
- inputs = iter(['Y', '1', 'n', '2', str(license_content)])
+ inputs = iter(['Y', 'n', '1', 'n', '2', str(license_content)])
+ monkeypatch.setattr('builtins.input', lambda _: next(inputs))
+
+ import pykx as kx
+ assert kx.licensed
+ assert [0, 1, 2, 3, 4] == kx.q.til(5).py()
+
+
+@pytest.mark.skipif(
+ os.getenv('PYKX_THREADING') is not None,
+ reason='Not supported with PYKX_THREADING'
+)
+def test_licensed_available(monkeypatch):
+ qhome_path = os.environ['QHOME']
+ os.unsetenv('QLIC')
+ os.unsetenv('QHOME')
+ inputs = iter(['Y', 'Y', qhome_path + '/kc.lic'])
monkeypatch.setattr('builtins.input', lambda _: next(inputs))
import pykx as kx
diff --git a/tests/test_nbextension.py b/tests/test_nbextension.py
new file mode 100644
index 0000000..0f51b83
--- /dev/null
+++ b/tests/test_nbextension.py
@@ -0,0 +1,76 @@
+import shutil
+
+import pytest
+
+
+def save_helper(location, code, instructions):
+ from pykx import nbextension
+ nbextension.q(instructions, code)
+ f = open(location, "r")
+ assert f.read() == code
+ shutil.rmtree(location.split('/')[0])
+
+
+@pytest.mark.unlicensed
+def test_save_unexecuted(kx):
+ location = 'testfolder/file.q'
+ code = 't:1 2 3\nt * 19\njunk junk junk\n'
+ instructions = f'--save {location} --execute False'
+ save_helper(location, code, instructions)
+
+
+def test_save_executed(kx):
+ location = 'testfolder/file.q'
+ code = 't:1 2 3\nt * 19\n'
+ instructions = f'--save {location}'
+ save_helper(location, code, instructions)
+
+
+@pytest.mark.unlicensed
+def test_save_unexecuted_unlicensed(kx):
+ location = 'testfolder/file.q'
+ code = 't:1 2 3\nt * 19\njunk junk junk\n'
+ instructions = f'--save {location} --execute False'
+ save_helper(location, code, instructions)
+
+
+def test_save_locked(q, kx):
+ from pykx import nbextension
+ location = 'secretfolder/file.q_'
+ code = 'secret:3 6 9\nsecret_func:{x % 3}\n'
+ nbextension.q(f'--save {location} --execute False', code)
+ q(('\\l') +" "+ location)
+ res = q('secret_func secret')
+ assert (res == q('t:3 6 9;t % 3')).all()
+ assert (str(q('secret_func')) == ("locked"))
+ shutil.rmtree(location.split('/')[0])
+ code = 'secret:3 6 9\nsecret_func:{x % 3}\n'
+ nbextension.q(f'--save {location}', code)
+ q(('\\l') +" "+ location)
+ res = q('secret_func secret')
+ assert (res == q('t:3 6 9;t % 3')).all()
+ assert (str(q('secret_func')) == ("locked"))
+ shutil.rmtree(location.split('/')[0])
+
+
+@pytest.mark.unlicensed
+def test_save_ipc(kx, q_port):
+ port = str(q_port)
+ location = 'testfolder/file.q'
+ code = 't:1 2 3\nt * 19\n'
+ instructions = f'--port {port} --save {location}'
+ save_helper(location, code, instructions)
+
+
+@pytest.mark.unlicensed
+def test_save_ipc_locked(q, q_port):
+ from pykx import nbextension
+ port = str(q_port)
+ location = 'secretfolder/file.q_'
+ code = 'secret:3 6 9\nsecret_func:{x % 3}\n'
+ nbextension.q(f'--save {location} --execute False --port {port}', code)
+ q(('\\l') +" "+ location)
+ res = q('secret_func secret')
+ assert (res == q('t:3 6 9;t % 3')).all()
+ assert (str(q('secret_func')) == ("locked"))
+ shutil.rmtree(location.split('/')[0])
diff --git a/tests/test_pandas_agg.py b/tests/test_pandas_agg.py
index 7077f8a..6150d9e 100644
--- a/tests/test_pandas_agg.py
+++ b/tests/test_pandas_agg.py
@@ -72,6 +72,10 @@ def test_dict_funcs(q, kx):
assert isinstance(dict_str, kx.KeyedTable)
assert q('{x~y}', dict_str_max, max_ret)
assert q('{x~y}', dict_str_min, min_ret)
+ dict_str_max = kx.q.qsql.select(dict_str['x1'], where=kx.Column('function') == 'max')
+ dict_str_min = kx.q.qsql.select(dict_str['x'], where=kx.Column('function') == 'min')
+ assert q('{x~y}', dict_str_max, max_ret)
+ assert q('{x~y}', dict_str_min, min_ret)
def mode(x):
return statistics.mode(x)
diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py
index 78b0d73..28ad96a 100644
--- a/tests/test_pandas_api.py
+++ b/tests/test_pandas_api.py
@@ -8,11 +8,14 @@
import pytest
-def check_result_and_type(kx, tab, result):
- if ((isinstance(tab, kx.Table) or isinstance(tab, kx.KeyedTable))
- and tab.py() == result.py() if isinstance(result, kx.K) else result):
- return True
- return False
+def check_result_and_type(kx, tab, result, type=None):
+ if type is not None and not isinstance(tab, type):
+ return False
+ if isinstance(tab, (kx.K)):
+ if (tab.py() == (result.py() if isinstance(result, kx.K) else result)):
+ return True
+ else:
+ return False
def test_api_meta_error(kx):
@@ -101,34 +104,42 @@ def test_df_pop(kx, q):
def test_df_get(kx, q):
df = q('([] x: til 10; y: 10 - til 10; z: 10?`a`b`c)')
- assert check_result_and_type(kx, df.get('x'), {'x': [x for x in range(10)]})
- assert check_result_and_type(kx, df.get(kx.SymbolAtom('y')), {'y': [10 - x for x in range(10)]})
+ assert check_result_and_type(kx, df.get('x'), [x for x in range(10)], kx.LongVector)
+ assert check_result_and_type(kx, df.get(kx.SymbolAtom('y')), [10 - x for x in range(10)], kx.LongVector) # noqa E501
assert check_result_and_type(kx, df.get(['x', 'y']), {
'x': [x for x in range(10)],
'y': [10 - x for x in range(10)]
- })
+ }, kx.Table)
assert df.get(['y', 'z']).py() == df[['y', 'z']].py()
assert df.get(['x', 'y']).py() == df[['x', 'y']].py()
- assert df.get('r') is None
+ with pytest.raises(kx.QError) as err:
+ df.get('r')
+ assert "inaccessible column: r" in str(err)
+ with pytest.raises(kx.QError) as err:
+ df.get(['x', 'r'])
+ assert "inaccessible column: r" in str(err)
assert df.get('r', default=5) == 5
- assert df.get(['x', 'r']) is None
assert df.get(['x', 'r'], default=5) == 5
def test_df_get_keyed(kx, q):
df = q('([x: til 10] y: 10 - til 10; z: 10?`a`b`c)')
- assert check_result_and_type(kx, df.get('x'), {'x': [x for x in range(10)]})
- assert check_result_and_type(kx, df.get(kx.SymbolAtom('y')), {'y': [10 - x for x in range(10)]})
+ assert check_result_and_type(kx, df.get('x'), [x for x in range(10)], kx.LongVector)
+ assert check_result_and_type(kx, df.get(kx.SymbolAtom('y')), [10 - x for x in range(10)], kx.LongVector) # noqa E501
assert check_result_and_type(kx, df.get(['x', 'y']), {
'x': [x for x in range(10)],
'y': [10 - x for x in range(10)]
})
assert df.get(['y', 'z']).py() == q.value(df[['y', 'z']]).py()
- assert df.get('r') is None
- assert df.get('r', default=5) == 5
- assert df.get(['x', 'r']) is None
assert df.get(['x', 'r'], default=5) == 5
+ assert df.get('r', default=5) == 5
+ with pytest.raises(kx.QError) as err:
+ df.get('r')
+ assert "inaccessible column: r" in str(err)
+ with pytest.raises(kx.QError) as err:
+ df.get(['x', 'r'])
+ assert "inaccessible column: r" in str(err)
def test_df_at(q):
@@ -174,32 +185,32 @@ def test_df_replace_self(q):
def test_df_loc(kx, q):
df = q('([] x: til 10; y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
- assert check_result_and_type(kx, df.loc[0], {'y': 10, 'z': 'a'})
- assert check_result_and_type(kx, df.loc[[1]], {'y': 9, 'z': 'a'})
- assert check_result_and_type(kx, df.loc[[0, 1]], {'y': [10, 9], 'z': ['a', 'a']})
- assert check_result_and_type(kx, df.loc[0, :], {'y': [10, 9], 'z': ['a', 'a']})
+ assert check_result_and_type(kx, df.loc[0], {'x': [0], 'y': [10], 'z': ['a']})
+ assert check_result_and_type(kx, df.loc[[1]], [{'x': [1], 'y': [9], 'z': ['a']}])
+ assert check_result_and_type(kx, df.loc[[0, 1]], [{'x': [0], 'y': [10], 'z': ['a']}, {'x': [1], 'y': [9], 'z': ['a']}]) # noqa E501
+ assert check_result_and_type(kx, df.loc[0, :], {'x': [0], 'y': [10], 'z': ['a']})
def test_df_loc_keyed(kx, q):
df = q('([x: til 10] y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
assert check_result_and_type(kx, df.loc[0], {'y': 10, 'z': 'a'})
- assert check_result_and_type(kx, df.loc[[1]], {'y': 9, 'z': 'a'})
+ assert check_result_and_type(kx, df.loc[[1]], {'y': [9], 'z': ['a']})
assert check_result_and_type(kx, df.loc[[0, 1]], {'y': [10, 9], 'z': ['a', 'a']})
assert check_result_and_type(kx, df.loc[df['y'] < 100], df.py())
def test_df_loc_cols(kx, q):
df = q('([x: til 10] y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
- assert check_result_and_type(kx, df.loc[[0, 1], 'z':], {'z': ['a', 'a']})
+ assert check_result_and_type(kx, df.loc[[0, 1], 'z':], [{'z': ['a']}, {'z': ['a']}])
assert check_result_and_type(kx, df[[0, 1], :'y'], {'y': [10, 9]})
- assert check_result_and_type(kx, df[[0, 1], 'y':'y'], {'y': [10, 9]})
+ assert check_result_and_type(kx, df[[0, 1], 'y':'y'], [{'y': [10]}, {'y': [9]}])
assert check_result_and_type(kx, df[[0, 1], :2], {'y': [10, 9]})
def test_df_getitem(kx, q):
df = q('([x: til 10] y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
assert check_result_and_type(kx, df[0], {'y': 10, 'z': 'a'})
- assert check_result_and_type(kx, df[[1]], {'y': 9, 'z': 'a'})
+ assert check_result_and_type(kx, df[[1]], {'y': [9], 'z': ['a']})
assert check_result_and_type(kx, df[[0, 1]], {'y': [10, 9], 'z': ['a', 'a']})
assert check_result_and_type(kx, df[:], df.py())
assert check_result_and_type(kx, df[:, ['x', 'y']], q('([x: til 10] y: 10 - til 10)').py())
@@ -231,7 +242,7 @@ def test_df_loc_set(kx, q):
assert check_result_and_type(
kx,
df,
- q('([x: til 10] y: (99 99),8 - til 8; z: `a`a`b`b`c`c`d`d`e`e)').py()
+ q('([] x: til 10; y: (99 99),8 - til 8; z: `a`a`b`b`c`c`d`d`e`e)').py()
)
with pytest.raises(ValueError):
df.loc[df['z'] == 'a'] = 99
@@ -296,14 +307,14 @@ def test_df_iloc_set(kx, q):
assert check_result_and_type(
kx,
df,
- q('([x: til 10] y: (99 99),8 - til 8; z: `a`a`b`b`c`c`d`d`e`e)').py()
+ q('([x: til 10] y: (99 99),8 - til 8; z: `a`a`b`b`c`c`d`d`e`e)')
)
df = q('([] x: til 10; y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
df.iloc[df['z'] == 'a', 'y'] = 99
assert check_result_and_type(
kx,
df,
- q('([x: til 10] y: (99 99),8 - til 8; z: `a`a`b`b`c`c`d`d`e`e)').py()
+ q('([] x: til 10; y: (99 99),8 - til 8; z: `a`a`b`b`c`c`d`d`e`e)')
)
with pytest.raises(ValueError):
df.iloc[df['z'] == 'a'] = 99
@@ -320,14 +331,14 @@ def test_df_iloc(kx, q):
assert check_result_and_type(kx, df.iloc[df['y'] < 100], df.py())
df = q('([] x: til 10; y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
assert check_result_and_type(kx, df.iloc[:-2], df.head(8).py())
- assert check_result_and_type(kx, df.iloc[0], {'x': 0, 'y': 10, 'z': 'a'})
- assert check_result_and_type(kx, df.iloc[[0]], {'x': 0, 'y': 10, 'z': 'a'})
+ assert check_result_and_type(kx, df.iloc[0], {'x': [0], 'y': [10], 'z': ['a']})
+ assert check_result_and_type(kx, df.iloc[[0]], {'x': [0], 'y': [10], 'z': ['a']})
assert check_result_and_type(
kx,
df.iloc[::-1],
{
- 'x': [10 - x for x in range(10)],
- 'y': [x for x in range(10)],
+ 'x': [9 - x for x in range(10)],
+ 'y': [x + 1 for x in range(10)],
'z': ['e', 'e', 'd', 'd', 'c', 'c', 'b', 'b', 'a', 'a']
}
)
@@ -361,21 +372,22 @@ def test_df_iloc(kx, q):
def test_df_iloc_with_cols(kx, q):
df = q('([] x: til 10; y: 10 - til 10; z: `a`a`b`b`c`c`d`d`e`e)')
- assert check_result_and_type(kx, df.iloc[0, 0], {'x': 0, 'z': 'a'})
- assert check_result_and_type(kx, df.iloc[[0], [2]], {'z': 'a'})
+ assert check_result_and_type(kx, df.iloc[0, 0], {'x': [0]})
+ assert check_result_and_type(kx, df.iloc[[0], [2]], {'z': ['a']})
assert check_result_and_type(
kx,
df.iloc[::-1, ::-1],
{
'z': ['e', 'e', 'd', 'd', 'c', 'c', 'b', 'b', 'a', 'a'],
- 'y': [x for x in range(10)],
- 'x': [10 - x for x in range(10)]
+ 'y': [1 + x for x in range(10)],
+ 'x': [9 - x for x in range(10)]
}
)
assert check_result_and_type(
kx,
df.head(4).iloc[[True, False, True, False], [False, True, False]],
{
+ 'x': [0, 2],
'y': [10, 8]
}
)
@@ -415,7 +427,7 @@ def test_df_iloc_with_cols(kx, q):
assert check_result_and_type(
kx,
df.iloc[:, :-2],
- q('([] x: til 10; y: 10 - til 10)').py()
+ q('([] x: til 10)').py()
)
assert check_result_and_type(kx, df.loc[df['z']=='a', ['x', 'y']], {'x': [0, 1], 'y': [10, 9]})
@@ -440,8 +452,8 @@ def test_table_merge_copy(kx, q):
df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
tab1 = kx.toq(df1)
tab2 = kx.toq(df2)
- tab1.merge(tab2, left_on='lkey', right_on='rkey', copy=False)
- assert df1.merge(df2, left_on='lkey', right_on='rkey').equals(tab1.pd())
+ tab1.merge(tab2, left_on='lkey', right_on='rkey', copy=False, sort=True)
+ assert df1.merge(df2, left_on='lkey', right_on='rkey', sort=True).equals(tab1.pd())
# Replace_self property
df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
@@ -449,8 +461,8 @@ def test_table_merge_copy(kx, q):
tab1 = kx.toq(df1)
tab1.replace_self = True
tab2 = kx.toq(df2)
- tab1.merge(tab2, left_on='lkey', right_on='rkey')
- assert df1.merge(df2, left_on='lkey', right_on='rkey').equals(tab1.pd())
+ tab1.merge(tab2, left_on='lkey', right_on='rkey', sort=True)
+ assert df1.merge(df2, left_on='lkey', right_on='rkey', sort=True).equals(tab1.pd())
def test_table_inner_merge(kx, q):
@@ -462,12 +474,14 @@ def test_table_inner_merge(kx, q):
assert df1.merge(
df2,
left_on='lkey',
- right_on='rkey'
+ right_on='rkey',
+ sort=True
).equals(
tab1.merge(
tab2,
left_on='lkey',
- right_on='rkey'
+ right_on='rkey',
+ sort=True
).pd()
)
@@ -479,12 +493,14 @@ def test_table_inner_merge(kx, q):
assert df1.merge(
df2,
left_on='lkey',
- right_on='rkey'
+ right_on='rkey',
+ sort=True
).equals(
q('{0!x}', tab1.merge(
tab2,
left_on='lkey',
- right_on='rkey'
+ right_on='rkey',
+ sort=True
)).pd()
)
@@ -657,7 +673,7 @@ def test_table_left_merge(kx, q):
res = tab1.merge(tab2, on='key', how='left').pd()
assert str(res.at[6, 'value_y']) == '--'
res.at[6, 'value_y'] = np.NaN
- assert res.equals(df_res)
+ assert df_res.equals(res)
def test_table_right_merge(kx, q):
@@ -773,7 +789,7 @@ def test_table_right_merge(kx, q):
res = tab1.merge(tab2, on='key', how='right').pd()
assert str(res.at[6, 'key']) == ''
res.at[6, 'key'] = None
- assert res.equals(df_res)
+ assert df_res.equals(res)
def test_table_outer_merge(kx, q):
@@ -783,19 +799,6 @@ def test_table_outer_merge(kx, q):
df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})
tab1 = kx.toq(df1)
tab2 = kx.toq(df2)
- assert df1.merge(
- df2,
- left_on='lkey',
- right_on='rkey',
- how='outer'
- ).equals(
- tab1.merge(
- tab2,
- left_on='lkey',
- right_on='rkey',
- how='outer'
- ).pd()
- )
assert df1.merge(
df2,
left_on='lkey',
@@ -838,12 +841,12 @@ def test_table_outer_merge(kx, q):
df2 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]})
tab1 = kx.toq(df1)
tab2 = kx.toq(df2)
- tab_res = tab1.merge(tab2, on='a', how='outer').pd()
- assert str(tab_res.at[1, 'c']) == '--'
- tab_res.at[1, 'c'] = np.NaN
- assert str(tab_res.at[2, 'b']) == '--'
- tab_res.at[2, 'b'] = np.NaN
- assert df1.merge(df2, on='a', how='outer').equals(tab_res)
+ tab_res = tab1.merge(tab2, on='a', how='outer', sort=True).pd()
+ assert str(tab_res.at[0, 'c']) == '--'
+ tab_res.at[0, 'c'] = np.NaN
+ assert str(tab_res.at[1, 'b']) == '--'
+ tab_res.at[1, 'b'] = np.NaN
+ assert df1.merge(df2, on='a', how='outer', sort=True).equals(tab_res)
# Merge on same indexes
df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})
@@ -929,6 +932,8 @@ def test_table_outer_merge(kx, q):
res = tab1.merge(tab2, on='key', how='outer').pd()
assert res.at[7, 'key'] == ''
res.at[7, 'key'] = None
+ res.sort_values(['key'], inplace=True, ignore_index=True)
+ df_res.sort_values(['key'], inplace=True, ignore_index=True)
assert df_res.equals(res)
@@ -1021,18 +1026,18 @@ def test_df_astype_vanilla_checks(kx, q):
df = q('([] c1:1 2 3i; c2:1 2 3j; c3:1 2 3h; c4:1 2 3i)')
assert check_result_and_type(
kx,
- df.astype(kx.LongVector).py(),
- q('([] c1:1 2 3j; c2:1 2 3j; c3:1 2 3j; c4:1 2 3j)').py()
+ df.astype(kx.LongVector),
+ q('([] c1:1 2 3j; c2:1 2 3j; c3:1 2 3j; c4:1 2 3j)')
)
assert check_result_and_type(
kx,
- df.astype({'c1': kx.LongVector, 'c2': 'kx.ShortVector'}).py(),
- q('([] c1:1 2 3j; c2:1 2 3h; c3:1 2 3h; c4:1 2 3i)').py()
+ df.astype({'c1': kx.LongVector, 'c2': 'kx.ShortVector'}),
+ q('([] c1:1 2 3j; c2:1 2 3h; c3:1 2 3h; c4:1 2 3i)')
)
def test_df_astype_string_to_sym(kx, q):
- df = q('''([] c1:3#.z.p; c2:`abc`def`ghi; c3:1 2 3j;
+ df = q('''([] c1:3#.z.d; c2:`abc`def`ghi; c3:1 2 3j;
c4:("abc";"def";"ghi");c5:"abc";c6:(1 2 3;4 5 6;7 8 9))''')
assert check_result_and_type(
kx,
@@ -1048,26 +1053,26 @@ def test_df_astype_string_to_sym(kx, q):
df)
assert check_result_and_type(
kx,
- df.astype({'c4': kx.SymbolVector, 'c5': kx.SymbolVector}).py(),
- q('''([] c1:3#.z.p; c2:`abc`def`ghi; c3:1 2 3j;
- c4:`abc`def`ghi;c5:`a`b`c;c6:(1 2 3;4 5 6;7 8 9))''').py()
+ df.astype({'c4': kx.SymbolVector, 'c5': kx.SymbolVector}),
+ q('''([] c1:3#.z.d; c2:`abc`def`ghi; c3:1 2 3j;
+ c4:`abc`def`ghi;c5:`a`b`c;c6:(1 2 3;4 5 6;7 8 9))''')
)
assert check_result_and_type(
kx,
- df.astype({'c4': kx.SymbolVector}).py(),
- q('''([] c1:3#.z.p; c2:`abc`def`ghi; c3:1 2 3j;
- c4:`abc`def`ghi;c5:"abc";c6:(1 2 3;4 5 6;7 8 9))''').py()
+ df.astype({'c4': kx.SymbolVector}),
+ q('''([] c1:3#.z.d; c2:`abc`def`ghi; c3:1 2 3j;
+ c4:`abc`def`ghi;c5:"abc";c6:(1 2 3;4 5 6;7 8 9))''')
)
def test_df_astype_value_errors(kx, q):
- df = q('''([] c1:3#.z.p; c2:`abc`def`ghi; c3:1 2 3j;
+ df = q('''([] c1:3#.z.d; c2:`abc`def`ghi; c3:1 2 3j;
c4:("abc";"def";"ghi");c5:"abc";c6:(1 2 3;4 5 6;7 8 9))''')
# Check errors parameter set to 'ignore'
assert check_result_and_type(
kx,
- df.astype({'c6': kx.CharVector}, errors='ignore').py(),
- q('''([] c1:3#.z.p; c2:`abc`def`ghi; c3:1 2 3j;
+ df.astype({'c6': kx.CharVector}, errors='ignore'),
+ q('''([] c1:3#.z.d; c2:`abc`def`ghi; c3:1 2 3j;
c4:("abc";"def";"ghi");c5:"abc";c6:(1 2 3;4 5 6;7 8 9))''').py()
)
with pytest.raises(ValueError,
@@ -1116,41 +1121,51 @@ def test_df_select_dtypes(kx, q):
df = q('([] c1:`a`b`c; c2:1 2 3h; c3:1 2 3j; c4:1 2 3i)')
assert check_result_and_type(
kx,
- df.select_dtypes(include=[kx.ShortVector, kx.LongVector]).py(),
+ df.select_dtypes(include=[kx.ShortVector, kx.LongVector]),
q('([] c2:1 2 3h; c3:1 2 3j)').py()
)
+ assert q('~', df.select_dtypes(include=[kx.FloatVector]), q('(::)')).py()
assert check_result_and_type(
kx,
- df.select_dtypes(exclude='kx.LongVector').py(),
+ df.select_dtypes(exclude='kx.LongVector'),
q('([] c1:`a`b`c; c2:1 2 3h; c4:1 2 3i)').py()
)
assert check_result_and_type(
kx,
df.select_dtypes(include=['ShortVector', kx.LongVector],
- exclude=[kx.SymbolVector]).py(),
- q('([] c2:1 2 3h; c3:1 2 3j; c4:1 2 3i)').py()
+ exclude=[kx.SymbolVector]),
+ q('([] c2:1 2 3h; c3:1 2 3j)').py()
)
+ assert q('~', df.select_dtypes(exclude=[kx.SymbolAtom, kx.ShortAtom, kx.LongAtom, kx.IntAtom]
+ ), q('(::)')).py()
assert check_result_and_type(
kx,
- df.select_dtypes(include=[kx.ShortAtom, kx.LongAtom]).py(),
+ df.select_dtypes(include=[kx.ShortAtom, kx.LongAtom]),
q('([] c2:1 2 3h; c3:1 2 3j)').py()
)
assert check_result_and_type(
kx,
- df.select_dtypes(exclude='kx.LongAtom').py(),
+ df.select_dtypes(exclude='kx.LongAtom'),
q('([] c1:`a`b`c; c2:1 2 3h; c4:1 2 3i)').py()
)
+ dfk = df.set_index('c1')
+ assert q('~', dfk.select_dtypes(include=[kx.ShortAtom, kx.LongAtom, kx.IntAtom]), dfk).py()
+ assert q('~', dfk.select_dtypes(include=[kx.ShortAtom]), q('([c1:`a`b`c] c2:1 2 3h)')).py()
df = q('([] c1:"abc";c2:(1 2 3;4 5 6;7 8 9);c3:("abc";"abc";"abc"))')
assert check_result_and_type(
kx,
- df.select_dtypes(exclude='kx.List').py(),
+ df.select_dtypes(exclude='kx.List'),
q('([] c1:"abc")').py()
)
assert check_result_and_type(
kx,
- df.select_dtypes(include='kx.List').py(),
+ df.select_dtypes(include='kx.List'),
q('([] c2:(1 2 3;4 5 6;7 8 9);c3:("abc";"abc";"abc"))').py()
)
+ dfk = df.set_index('c1')
+ assert q('~', dfk.select_dtypes(exclude='kx.List'), q('(::)')).py()
+ assert q('~', dfk.select_dtypes(include='kx.SymbolAtom'), q('(::)')).py()
+ assert q('~', dfk.select_dtypes(include='kx.List'), dfk).py()
def test_df_select_dtypes_errors(kx, q):
@@ -2182,7 +2197,7 @@ def test_pandas_prod(q):
qprod = tab.prod(numeric_only=True, skipna=True, axis=1, min_count=5).py()
pprod = df.prod(numeric_only=True, skipna=True, axis=1, min_count=5)
for i in range(10):
- assert qprod[i] == q('0N')
+ assert pd.isna(qprod[i])
assert str(pprod[i]) == 'nan'
@@ -2204,7 +2219,7 @@ def test_pandas_sum(q):
qsum = tab.sum(numeric_only=True, skipna=True, axis=1, min_count=5).py()
psum = df.sum(numeric_only=True, skipna=True, axis=1, min_count=5)
for i in range(10):
- assert qsum[i] == q('0N')
+ assert pd.isna(psum[i])
assert str(psum[i]) == 'nan'
@@ -2242,8 +2257,8 @@ def test_pandas_groupby(kx, q):
df = pd.DataFrame(
{
'Animal': ['Falcon', 'Falcon', 'Parrot', 'Parrot'],
- 'Max Speed': [380., 370., 24., 26.],
- 'Max Altitude': [570., 555., 275., 300.]
+ 'Speed': [380., 370., 24., 26.],
+ 'Altitude': [570., 555., 275., 300.]
}
)
@@ -2252,18 +2267,22 @@ def test_pandas_groupby(kx, q):
assert all(
df.groupby(['Animal']).mean() == tab.groupby(kx.SymbolVector(['Animal'])).mean().pd()
)
- assert df.groupby(['Animal']).ndim == tab.groupby(kx.SymbolVector(['Animal'])).ndim
assert all(
- df.groupby(['Animal'], as_index=False).mean()
- == tab.groupby(kx.SymbolVector(['Animal']), as_index=False).mean().pd()
+ pd.DataFrame(df.groupby('Animal')['Speed'].max())
+ == tab.groupby('Animal')['Speed'].max().pd()
+ )
+ assert df.groupby(['Animal']).ndim == tab.groupby(['Animal']).ndim
+ assert all(
+ df.groupby('Animal', as_index=False).mean()
+ == tab.groupby('Animal', as_index=False).mean().pd()
)
assert all(
df.groupby(['Animal']).tail(1).reset_index(drop=True)
- == tab.groupby(kx.SymbolVector(['Animal'])).tail(1).pd()
+ == tab.groupby('Animal').tail(1).pd()
)
assert all(
- df.groupby(['Animal']).tail(2)
- == tab.groupby(kx.SymbolVector(['Animal'])).tail(2).pd()
+ df.groupby('Animal').tail(2)
+ == tab.groupby('Animal').tail(2).pd()
)
df = pd.DataFrame(
diff --git a/tests/test_pandas_map.py b/tests/test_pandas_map.py
new file mode 100644
index 0000000..0f89f72
--- /dev/null
+++ b/tests/test_pandas_map.py
@@ -0,0 +1,70 @@
+"""Tests for the Pandas API apply functionality"""
+
+import os
+
+import pytest
+
+
+def _count(x):
+ try:
+ return len(x)
+ except TypeError:
+ return 1
+
+
+def _multi_arg_count(x, y=0):
+ try:
+ count = len(x)
+ except TypeError:
+ count = 1
+ return count + y
+
+
+@pytest.mark.skipif(
+ os.getenv('PYKX_THREADING') is not None,
+ reason='Not supported with PYKX_THREADING'
+)
+def test_map_applymap(kx):
+ tables = [kx.q('([]100?(1 2 3;"123";1 2;0n);100?1f;100?(0n;1f))'),
+ kx.q('([til 100]x1:100?(1 2 3;"123";1 2;0n);x2:100?1f;x3:100?(0n;1f))')]
+ for tab in tables:
+ fn = kx.q('count')
+ assert kx.q('~', tab.map(fn), tab.applymap(fn))
+
+ fn = lambda x: len(str(x)) # noqa: E731
+ assert kx.q('~', tab.map(fn), tab.applymap(fn))
+
+ assert kx.q('~', tab.map(_count), tab.applymap(_count))
+ assert kx.q('~', tab.map(_multi_arg_count), tab.applymap(_multi_arg_count))
+ assert kx.q('~', tab.map(_multi_arg_count, y=2), tab.applymap(_multi_arg_count, y=2))
+ assert not kx.q('~', tab.map(_multi_arg_count), tab.applymap(_multi_arg_count, y=2))
+
+ assert not tab.map(fn).has_nulls
+ assert tab.map(fn, na_action='ignore').has_nulls
+ assert kx.q('~', tab.map(fn, na_action='ignore'), tab.applymap(fn, na_action='ignore'))
+
+ fn = lambda x, y: y + len(str(x)) # noqa: E731
+ assert kx.q('~', tab.map(fn, y=1), tab.applymap(fn, y=1))
+ assert not kx.q('~', tab.map(fn, y=1), tab.applymap(fn, y=2))
+
+ assert kx.q('~', tab.map(_count), tab.pd().applymap(_count))
+ assert kx.q('~', tab.map(_multi_arg_count), tab.pd().applymap(_multi_arg_count))
+ assert kx.q('~', tab.map(_multi_arg_count, y=1), tab.pd().applymap(_multi_arg_count, y=1))
+ ignore_check = kx.q('=',
+ tab.map(_multi_arg_count, na_action='ignore', y=1),
+ tab.pd().applymap(_multi_arg_count, na_action='ignore', y=1))
+ if isinstance(tab, kx.KeyedTable):
+ ignore_check = ignore_check._values
+ assert ignore_check.all().all()
+
+ with pytest.raises(TypeError) as err:
+ tab.map(_count, na_action=False)
+ assert "na_action must be None or 'ignore'" in str(err.value)
+
+ with pytest.raises(TypeError) as err:
+ tab.map(1)
+ assert "Provided value 'func' is not callable" in str(err.value)
+
+ with pytest.raises(kx.QError) as err:
+ tab.map(kx.q('{[x;y]x+y}'), y=2)
+ assert "ERROR: Passing key" in str(err.value)
diff --git a/tests/test_pykx.py b/tests/test_pykx.py
index 0a6326e..ed83fce 100644
--- a/tests/test_pykx.py
+++ b/tests/test_pykx.py
@@ -116,7 +116,8 @@ def test_QHOME_symlinks():
'Windows': 'w64',
}[system()]
(QHOME/q_lib_dir_name).mkdir(exist_ok=True)
- fake_q_lib_path = Path(site.getsitepackages()[0])/'pykx'/'lib'/q_lib_dir_name/'fake_q_lib.so'
+ lib = Path('lib')/'4-1-libs' if os.getenv('PYKX_4_1_ENABLED') is not None else Path('lib')
+ fake_q_lib_path = Path(site.getsitepackages()[0])/'pykx'/lib/q_lib_dir_name/'fake_q_lib.so'
fake_q_lib_path.touch()
# Convert first argument of `shutil.move` to `str` to work around Python bug bpo-32689
shutil.move(str(fake_q_lib_path), QHOME/q_lib_dir_name)
@@ -138,7 +139,7 @@ def try_clean(path):
@disposable_env_only
@pytest.mark.isolate
def test_QHOME_symlinks_skip():
- os.environ['IGNORE_QHOME'] = "1"
+ os.environ['PYKX_IGNORE_QHOME'] = "1"
# This logic to get QHOME is copied from `pykx.config`, since we can't use `pykx.qhome` until
# after PyKX has been imported, but that would ruin the test.
try:
@@ -288,6 +289,10 @@ def test_pykx_star():
os.getenv('PYKX_THREADING') is not None,
reason='Not supported with PYKX_THREADING'
)
+@pytest.mark.skipif(
+ (sys.version_info.major == 3) and (sys.version_info.minor == 8),
+ reason="python3.8 subprocess behavior inconsistent with newest versions"
+)
def test_pykx_stdout_stderr():
output = subprocess.run(
(str(Path(sys.executable).as_posix()), '-c',
@@ -306,10 +311,14 @@ def test_pykx_stdout_stderr():
os.getenv('PYKX_THREADING') is not None,
reason='Not supported with PYKX_THREADING'
)
+@pytest.mark.skipif(
+ (sys.version_info.major == 3) and (sys.version_info.minor == 8),
+ reason="python3.8 subprocess behavior inconsistent with newest versions"
+)
def test_pykx_stdout_stderr_under_q():
subprocess.run(
(str(Path(sys.executable).as_posix()), '-c',
- 'import pykx;pykx.install_into_QHOME()'),
+ 'import pykx;pykx.install_into_QHOME(cloud_libraries=True)'),
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
@@ -360,6 +369,10 @@ def test_PYKX_Q_LIB_LOCATION():
@pytest.mark.unlicensed
+@pytest.mark.skipif(
+ system() == 'Windows',
+ reason='Subnormal updates not presently implemented on Windows'
+)
def test_subnormals(kx):
import numpy as np
assert '5e-324' == str(np.finfo(np.float64).smallest_subnormal + 0.)
diff --git a/tests/test_query.py b/tests/test_query.py
index fef706e..89f879b 100644
--- a/tests/test_query.py
+++ b/tests/test_query.py
@@ -1,4 +1,7 @@
-from tempfile import TemporaryDirectory
+import os
+from pathlib import Path
+import shutil
+import uuid
# Do not import pykx here - use the `kx` fixture instead!
import pytest
@@ -17,25 +20,67 @@ def test_select(kx, q):
assert q('select from qtab').py() == q.qsql.select(qtab).py()
assert q('select from qtab').py() == q.qsql.select(qtab.pd()).py()
assert q('select from qtab where col1=`a').py() == q.qsql.select(qtab, where='col1=`a').py()
+ if kx.licensed:
+ assert q('select from qtab where col1=`a').py() ==\
+ q.qsql.select(qtab, where=kx.Column('col1') == 'a').py()
assert q('select from qtab where bool').py() == q.qsql.select(qtab, where=qbool).py()
assert q('select from qtab where col1=`a,col2>0.5').py() ==\
q.qsql.select(qtab, where=['col1=`a', 'col2>0.5']).py()
+ if kx.licensed:
+ assert q('select from qtab where col1=`a,col2>0.5').py() ==\
+ q.qsql.select(qtab, where=(kx.Column('col1') == 'a') & (kx.Column('col2') > 0.5)).py()
+ assert q('select from qtab where col1=`a,col2>0.5').py() ==\
+ q.qsql.select(qtab, where=[kx.Column('col1') == 'a', kx.Column('col2') > 0.5]).py()
assert q('select col1 from qtab').py() == q.qsql.select(qtab, columns={'col1': 'col1'}).py()
assert q('select col1 from qtab').py() == q.qsql.select(qtab, columns='col1').py()
assert q('select col1,col2 from qtab').py() == \
q.qsql.select(qtab, columns=['col1', 'col2']).py()
+ if kx.licensed:
+ assert q('select col1,col2 from qtab').py() == \
+ q.qsql.select(qtab, columns=[kx.Column('col1'), kx.Column('col2')]).py()
+ assert q('select col1,col2 from qtab').py() == \
+ q.qsql.select(qtab, columns=kx.Column('col1') & kx.Column('col2')).py()
assert q('select maxCol2:max col2 from qtab').py() ==\
q.qsql.select(qtab, columns={'maxCol2': 'max col2'}).py()
+ # assert q('select maxCol2:max col2 from qtab').py() ==\
+ # q.qsql.select(qtab, columns={'maxCol2': kx.Column('col2').max()}).py()
+ # assert q('select maxCol2:max col2 from qtab').py() ==\
+ # q.qsql.select(qtab, columns=kx.Column('col2', name='maxCol2').max()).py()
+ # assert q('select maxCol2:max col2 from qtab').py() ==\
+ # q.qsql.select(qtab, columns=kx.QueryPhrase(kx.Column('col2', name='maxCol2').max())).py()
assert q('select sumcols:col2+col3 from qtab').py() ==\
q.qsql.select(qtab, columns={'sumcols': 'col2+col3'}).py()
+ if kx.licensed:
+ assert q('select sumcols:col2+col3 from qtab').py() ==\
+ q.qsql.select(qtab, columns={'sumcols': kx.Column('col2')+kx.Column('col3')}).py()
+ assert q('select sumcols:col2+col3 from qtab').py() ==\
+ q.qsql.select(qtab, columns=kx.Column('col2', name='sumcols')+kx.Column('col3')).py()
assert q('select maxCol2:max col2 by groupcol:col1 from qtab').py() ==\
q.qsql.select(qtab, columns={'maxCol2': 'max col2'}, by={'groupcol': 'col1'}).py()
+ if kx.licensed:
+ assert q('select maxCol2:max col2 by groupcol:col1 from qtab').py() ==\
+ q.qsql.select(qtab, columns=kx.Column('col2', name='maxCol2').max(),
+ by={'groupcol': kx.Column('col1')}).py()
+ assert q('select maxCol2:max col2 by groupcol:col1 from qtab').py() ==\
+ q.qsql.select(qtab, columns=kx.Column('col2', name='maxCol2').max(),
+ by=kx.Column('col1', name='groupcol')).py()
+ assert q('select maxCol2:max col2 by groupcol:col1 from qtab').py() ==\
+ q.qsql.select(qtab, columns=kx.Column('col2', name='maxCol2').max(),
+ by=kx.QueryPhrase(kx.Column('col1', name='groupcol'))).py()
assert q('select minCol2:min col2,max col3 by col1 from qtab where col3<0.5,col2>0.7').py() ==\
q.qsql.select(qtab,
columns={'minCol2': 'min col2', 'col3': 'max col3'},
by={'col1': 'col1'},
where=['col3<0.5', 'col2>0.7']
).py()
+ if kx.licensed:
+ assert q('select minCol2:min col2,max col3 by col1 from qtab where col3<0.5,col2>0.7').py()\
+ == q.qsql.select(qtab,
+ columns=kx.Column('col2', name='minCol2').min()
+ & kx.Column('col3').max(),
+ by=kx.Column('col1'),
+ where=(kx.Column('col3') < 0.5) & (kx.Column('col2') > 0.7)
+ ).py()
with pytest.raises(TypeError):
q.qsql.select([1, 2, 3]).py()
with pytest.raises(kx.QError) as err:
@@ -56,8 +101,10 @@ def test_select(kx, q):
def test_partitioned_query(kx, q):
- with TemporaryDirectory() as tmp_dir:
- db = kx.DB(path=tmp_dir)
+ cwd = Path(os.getcwd())
+ tmpdir = cwd / str(uuid.uuid4().hex)[:7]
+ try:
+ db = kx.DB(path=tmpdir)
N = 1000
qtab = kx.Table(data={
'date': kx.q.asc(kx.random.random(N, kx.q('2020.01.01 2020.01.02 2020.01.03'))),
@@ -77,6 +124,9 @@ def test_partitioned_query(kx, q):
with pytest.raises(kx.QError) as err:
kx.q.qsql.update(db.qtable, where=['sym=`AAPL'], inplace=True)
assert "Application of 'inplace' updates not supported" in str(err)
+ finally:
+ os.chdir(cwd)
+ shutil.rmtree(tmpdir, ignore_errors=True)
@pytest.mark.asyncio
@@ -219,11 +269,6 @@ def test_update(q):
q.qsql.update(byqtab, columns={'weight': 'max weight'}, by={'city': 'city'}, inplace=True)
assert q['byqtab'].py() == byqtab.py()
- with pytest.raises(RuntimeError) as err:
- q.qsql.update(qtab, columns={'newcol': 'weight'}, modify=True, inplace=True)
- assert 'Attempting to use both' in str(err)
- assert 0 == q('count .pykx.i.updateCache').py()
-
def test_update_sym_leak(q):
qtab = q('([]name:`tom`dick`harry;age:28 29 35;hair:`fair`dark`fair;eye:`green`brown`gray)')
@@ -372,9 +417,6 @@ def test_table_insert_method(q):
qtab = q('([] a: 1 2 3 4 5; b: 1.0 2.0 3.0 4.0 5.0; c: `a`b`c`d`e)')
q_inserted_tab = q('([] a: 1 2 3 4 5 6; b: 1.0 2.0 3.0 4.0 5.0 6.0; c: `a`b`c`d`e`f)')
- with pytest.warns(DeprecationWarning,
- match=r"Keyword 'replace_self' is deprecated please use 'inplace'"):
- assert qtab.insert([6, 6.0, 'f'], replace_self=False).py() == q_inserted_tab.py()
assert qtab.insert([6, 6.0, 'f'], inplace=False).py() == q_inserted_tab.py()
assert qtab.py() != q_inserted_tab.py()
@@ -387,9 +429,6 @@ def test_table_upsert_method(q):
qtab = q('([] a: 1 2 3 4 5; b: 1.0 2.0 3.0 4.0 5.0; c: `a`b`c`d`e)')
q_inserted_tab = q('([] a: 1 2 3 4 5 6; b: 1.0 2.0 3.0 4.0 5.0 6.0; c: `a`b`c`d`e`f)')
- with pytest.warns(DeprecationWarning,
- match=r"Keyword 'replace_self' is deprecated please use 'inplace'"):
- assert qtab.upsert([6, 6.0, 'f'], replace_self=False).py() == q_inserted_tab.py()
assert qtab.upsert([6, 6.0, 'f'], inplace=False).py() == q_inserted_tab.py()
assert qtab.py() != q_inserted_tab.py()
@@ -402,9 +441,6 @@ def test_keyed_table_insert_method(q):
qtab = q('([a: 1 2 3 4 5] b: 1.0 2.0 3.0 4.0 5.0; c: `a`b`c`d`e)')
q_inserted_tab = q('([a: 1 2 3 4 5 6] b: 1.0 2.0 3.0 4.0 5.0 6.0; c: `a`b`c`d`e`f)')
- with pytest.warns(DeprecationWarning,
- match=r"Keyword 'replace_self' is deprecated please use 'inplace'"):
- assert qtab.insert([6, 6.0, 'f'], replace_self=False).py() == q_inserted_tab.py()
assert qtab.insert([6, 6.0, 'f'], inplace=False).py() == q_inserted_tab.py()
assert qtab.py() != q_inserted_tab.py()
@@ -417,9 +453,6 @@ def test_keyed_table_upsert_method(q):
qtab = q('([a: 1 2 3 4 5] b: 1.0 2.0 3.0 4.0 5.0; c: `a`b`c`d`e)')
q_inserted_tab = q('([a: 1 2 3 4 5 6] b: 1.0 2.0 3.0 4.0 5.0 6.0; c: `a`b`c`d`e`f)')
- with pytest.warns(DeprecationWarning,
- match=r"Keyword 'replace_self' is deprecated please use 'inplace'"):
- assert qtab.upsert([6, 6.0, 'f'], replace_self=False).py() == q_inserted_tab.py()
assert qtab.upsert([6, 6.0, 'f'], inplace=False).py() == q_inserted_tab.py()
assert qtab.py() != q_inserted_tab.py()
@@ -479,3 +512,351 @@ def test_upsert_match_schema_embedded(kx, q):
def test_dir(kx):
assert isinstance(dir(kx.query), list)
assert sorted(dir(kx.query)) == dir(kx.query)
+
+
+def test_pythonic_query(kx):
+ table = kx.q('([] x:`a`b`c;x1:1 2 3;x2:`a`e`g;x11:0 3 3;b:011b)')
+ c='c'
+ kx.q['cvar'] = c
+
+ assert kx.q('~', table[0], table.select(where=kx.Column('x') == 'a'))
+ assert kx.q('~', table[0], table.delete(where=kx.Column('x').isin(['b', 'c'])))
+ assert kx.q('~', table[0], table.delete(where=kx.Column('b')))
+ assert kx.q('~', table[1:3], table.select(where=kx.Column('b')))
+ assert kx.q('~', table[2], table.select(where=kx.Column('x') == c))
+ assert kx.q('~', table[0], table.select(where=kx.Column('x') == kx.Column('x2')))
+ assert kx.q('~', table[2], table.select(where=kx.Column('x') == kx.Variable('cvar')))
+ assert kx.q('~', table[1:3], table.select(where=kx.Column('x1') > 1))
+ assert kx.q('~', table[1:3], table.select(where=kx.Column('x1') >= 2))
+ assert kx.q('~', table[0:2], table.select(where=kx.Column('x').isin(['a', 'b'])))
+ assert kx.q('~', table[1], table.select(where=kx.Column('x').isin('b')))
+ assert kx.q('~', table[2], table.select(where=kx.Column('x').isin(kx.Variable('cvar'))))
+ assert kx.q('~', table[0], table.select(where=kx.Column('x') == 'a'))
+ assert kx.q('~', table[0], table.select(where=kx.ParseTree(kx.q.parse(b'x=`a')).enlist()))
+ assert kx.q('~', table[0], table.select(where=kx.QueryPhrase([kx.q.parse(b'x=`a')])))
+ assert kx.q('~', table[0], table.select(where=kx.QueryPhrase(kx.Column('x') == 'a')))
+ assert kx.q('~', table[0:2], table.select(where=(kx.Column('x') == 'a')
+ | (kx.Column('x') == 'b')))
+ assert kx.q('~', table[0:2], table.select(where=(kx.Column('b')
+ == (kx.Column('x11') > kx.Column('x1')))))
+ assert kx.q('~', table[2], table.select(where=kx.QueryPhrase(kx.Column('x1')
+ == kx.Column('x1').max())))
+ assert kx.q('~', table[2], table.select(where=kx.Column('x11').msum(2) > 4))
+ assert all(kx.q('{update x11msum2:2 msum x11 from x}', table)
+ == table.update({'x11msum2': kx.Column('x11').msum(2)}))
+ assert all(kx.q('{select by neg b from x}', table)
+ == table.select(by={'b': kx.Column('b').call('neg')}))
+ kx.q('myneg:{neg x}')
+ assert all(kx.q('{select by neg b from x}', table)
+ == table.select(by={'b': kx.Column('b').call(kx.Variable('myneg'))}))
+ assert all(kx.q('{select neg b from x}', table)
+ == table.select(columns=kx.Column('b').call('neg')))
+ assert all(kx.q('{select negb:neg b from x}', table)
+ == table.select(columns=kx.Column('b', name='negb').call('neg')))
+ assert all(kx.q('{select negb:neg b from x}', table)
+ == table.select(columns=kx.Column(name='negb', value=[kx.q('neg'), 'b'])))
+ assert all(kx.q('{exec neg b from x}', table)
+ == table.exec(columns=kx.Column('b').call('neg')))
+ assert ({'asA': 'a', 'negB': [kx.q('neg'), 'b']}
+ == (kx.Column('a', name='asA') & kx.Column('b', name='negB').call('neg')).to_dict())
+ assert kx.q('~', kx.q('{select x, negx1:neg x1 by x11, notB:not b from x}', table),
+ table.select(columns=['x', kx.Column('x1', name='negx1').call('neg')],
+ by=['x11', kx.Column('b', name='notB').call('not')]))
+ assert all(kx.q('{select max b from x}', table)
+ == table.select(columns=kx.Column('b').max()))
+ assert kx.q('~', kx.q('{select max b, x from x}', table),
+ table.select(columns=kx.Column('b').max() & kx.Column('x')))
+ assert kx.q('~', kx.q('{select maxB:max b from x}', table),
+ table.select(columns=kx.Column('b', name='maxB').max()))
+ assert kx.q('~', kx.q('{select maxB:max b from x}', table),
+ table.select(columns={'maxB': kx.Column('b').max()}))
+ t= kx.q('([] c1:30?`a`b`c;c2:30?`d`e`f;c3:30?4;c4:30?4)')
+ a = kx.q('{select from x where c3=(max;c3) fby ([] c1;c4)}', t)
+ b = t.select(where=kx.Column('c3') == [kx.q.fby, [kx.q.enlist, kx.q.max, 'c3'],
+ kx.ParseTree.table(['c1', 'c4'])])
+ assert kx.q('~', a, b)
+ c = t.select(where=kx.Column('c3') == kx.Column.fby(['c1', 'c4'], 'max', 'c3', by_table=True))
+ assert kx.q('~', a, c)
+ d = t.select(where=kx.Column('c3') == kx.Column.fby({'c1': 'c1', 'c4': 'c4'}, 'max', 'c3'))
+ assert kx.q('~', a, d)
+ a = table.select(where=(kx.Column('x') == 'a') &(
+ kx.Column('x1') == 1) & (kx.Column('x11') == 0))
+ b = table.select(where=((kx.Column('x') == 'a') &(
+ kx.Column('x1') == 1)) & (kx.Column('x11') == 0))
+ c = table.select(where=(kx.Column('x') == 'a') &(
+ (kx.Column('x1') == 1) & (kx.Column('x11') == 0)))
+ assert kx.q('~', table[0], a)
+ assert kx.q('~', table[0], b)
+ assert kx.q('~', table[0], c)
+
+ with pytest.raises(TypeError) as err:
+ kx.Column('x') & 1
+ assert 'cannot `&` off a `pykx.Column`.' in str(err)
+
+ with pytest.raises(TypeError) as err:
+ (kx.Column('x') & kx.Column('x')) & 1
+ assert 'cannot `&` off a `pykx.QueryPhrase`.' in str(err)
+
+ t= kx.q('''
+ ([] Primary:`1a`1a`1a`1a`2e`2e`2e`2e;
+ Name:`AXA`FLO`FLO`AXA`AXA`ROT`ROT`ROT; Count: 11 1 60 14 1 1 6 4)''')
+ a = kx.q('''{select from x where
+ i=({exec first ind from x where Count=max Count};([]Count;ind:i)) fby Primary}''', t)
+ b = t.select(
+ where=kx.Column('i')
+ == [kx.q.fby, [kx.q.enlist, kx.q('{exec first ind from x where Count=max Count}'),
+ kx.ParseTree.table({'Count': 'Count', 'ind': 'i'})], kx.Column('Primary')])
+ c = t.select(where=kx.Column('i')
+ == kx.ParseTree.fby('Primary', '{exec first ind from x where Count=max Count}',
+ {'Count': 'Count', 'ind': 'i'}))
+ assert kx.q('~', a, b)
+ assert kx.q('~', a, c)
+
+ t=kx.q('([] a:1 2 3;b:4 5 6;c:7 8 9;d:10 11 12)')
+ kx.q['t'] = t
+ a = kx.q('select a:a(,)/:b from t')
+ b = t.select(kx.Column('a').call(',', kx.Column('b'), iterator="/:"))
+ c = t.select(kx.Column('a').call(b',', kx.Column('b'), iterator="/:"))
+ d = t.select(kx.Column('a').call(kx.CharVector(','), kx.Column('b'), iterator="/:"))
+ assert kx.q('~', a, b)
+ assert kx.q('~', a, c)
+ assert kx.q('~', a, d)
+ a = kx.q('select a:a(,)\\:b from t')
+ b = t.select(kx.Column('a').call(',', kx.Column('b'), iterator="\\:"))
+ assert kx.q('~', a, b)
+ a = kx.q('select a:a(,)\\:/:b from t')
+ b = t.select(kx.Column('a').call(',', kx.Column('b'), iterator="\\:/:"))
+ assert kx.q('~', a, b)
+ a = kx.q('select a:a(,)/:\\:b from t')
+ b = t.select(kx.Column('a').call(',', kx.Column('b'), iterator="/:\\:"))
+ assert kx.q('~', a, b)
+ a = kx.q('select {99,x} each a from t')
+ b = t.select(kx.Column('a').call('{99,x}', iterator="each"))
+ assert kx.q('~', a, b)
+ a = kx.q('select {99,x} peach a from t')
+ b = t.select(kx.Column('a').call('{99,x}', iterator="peach"))
+ assert kx.q('~', a, b)
+ a = kx.q('select a:(,[a]) each b from t')
+ b = t.select(kx.Column('a').call(',', kx.Column('b'), iterator="each", project_args=[0]))
+ assert kx.q('~', a, b)
+ a = kx.q('select a:(,[b]) each a from t')
+ b = t.select(kx.Column('a').call(',', kx.Column('b'), iterator="each",
+ col_arg_ind=1, project_args=[0]))
+ assert kx.q('~', a, b)
+ a = kx.q('select (,[;b]) each a from t')
+ b = t.select(kx.Column('a').call(',', kx.Column('b'), iterator="each", project_args=[1]))
+ assert kx.q('~', a, b)
+ a = kx.q('select (+) scan a from t')
+ b = t.select(kx.Column('a').call('+', iterator="scan"))
+ assert kx.q('~', a, b)
+ a = kx.q('select (+\\)a from t')
+ b = t.select(kx.Column('a').call('+', iterator='\\'))
+ assert kx.q('~', a, b)
+ a = kx.q('select max a,(+) over b from t')
+ b = t.select(kx.Column('a').max() & kx.Column('b').call('+', iterator="over"))
+ assert kx.q('~', a, b)
+ a = kx.q('select max a,(+/)b from t')
+ b = t.select(kx.Column('a').max() & kx.Column('b').call('+', iterator="/"))
+ assert kx.q('~', a, b)
+ a = kx.q('select a:a(,)\'b from t')
+ b = t.select(kx.Column('a').call(',', kx.Column('b'), iterator="'"))
+ assert kx.q('~', a, b)
+ a = kx.q('select (-) prior a from t')
+ b = t.select(kx.Column('a').call('-', iterator="prior"))
+ assert kx.q('~', a, b)
+
+ assert 'x' in table.columns.py()
+ assert 'x' not in table.delete(kx.Column('x')).columns.py()
+
+ assert all([i in table.columns.py() for i in ['x', 'x1']])
+ assert all([i not in table.delete(kx.Column('x') & kx.Column('x1')) for i in ['x', 'x1']])
+
+
+def test_pythonic_query_ipc(kx, q_port):
+ q = kx.SyncQConnection(port=q_port)
+ q('table:([] x:`a`b`c;x1:1 2 3;x2:`a`e`g;x11:0 3 3;b:011b)')
+ table = q['table']
+ c='c'
+ q['cvar'] = c
+ assert q('{table[enlist 0]~x}', q.qsql.select('table', where=kx.Column('x') == 'a'))
+ assert q('{table[1 2]~x}', q.qsql.select('table', where=kx.Column('b')))
+ assert q('{table[enlist 2]~x}', q.qsql.select('table', where=kx.Column('x') == c))
+ assert q('{table[enlist 0]~x}', q.qsql.select('table', where=kx.Column('x') == kx.Column('x2')))
+ assert q('{table[enlist 2]~x}', q.qsql.select('table', where=kx.Column('x')
+ == kx.Variable('cvar')))
+ assert q('{table[1 2]~x}', q.qsql.select('table', where=kx.Column('x1') > 1))
+ assert q('{table[1 2]~x}', q.qsql.select('table', where=kx.Column('x1') >= 2))
+ assert q('{table[0 1]~x}', q.qsql.select('table', where=kx.Column('x').isin(['a', 'b'])))
+ assert q('{table[enlist 1]~x}', q.qsql.select('table', where=kx.Column('x').isin('b')))
+ assert q('{table[enlist 2]~x}', q.qsql.select('table',
+ where=kx.Column('x').isin(kx.Variable('cvar'))))
+ assert q('{table[enlist 0]~x}', q.qsql.select('table', where=kx.Column('x') == 'a'))
+ assert q('{table[enlist 0]~x}', q.qsql.select('table',
+ where=kx.ParseTree(q.parse(b'x=`a')).enlist()))
+ assert q('{table[enlist 0]~x}', q.qsql.select('table',
+ where=kx.QueryPhrase([q.parse(b'x=`a')])))
+ assert q('{table[enlist 0]~x}', q.qsql.select('table',
+ where=kx.QueryPhrase(kx.Column('x') == 'a')))
+ assert q('{table[0 1]~x}', q.qsql.select('table', where=(kx.Column('x') == 'a')
+ | (kx.Column('x') == 'b')))
+ assert q('{table[0 1]~x}',
+ q.qsql.select('table', where=(kx.Column('b') == (kx.Column('x11') > kx.Column('x1')))))
+ assert q('{table[enlist 2]~x}',
+ q.qsql.select('table', where=kx.QueryPhrase(kx.Column('x1') == kx.Column('x1').max())))
+ assert q('{table[enlist 2]~x}', q.qsql.select('table', where=kx.Column('x11').msum(2) > 4))
+ assert q('{y~update x11msum2:2 msum x11 from table}',
+ q.qsql.update('table', ({'x11msum2': kx.Column('x11').msum(2)})))
+ assert q('{y~select by neg b from table}',
+ q.qsql.select('table', by={'b': kx.Column('b').call('neg')}))
+ q('myneg:{neg x}')
+ assert q('{y~select by neg b from table}',
+ q.qsql.select('table', by={'b': kx.Column('b').call(kx.Variable('myneg'))}))
+ assert q('{y~select neg b from table}', q.qsql.select('table',
+ columns=kx.Column('b').call('neg')))
+ assert q('{y~select negb:neg b from table}',
+ q.qsql.select('table', columns=kx.Column('b', name='negb').call('neg')))
+ assert q('{y~select negb:neg b from table}',
+ q.qsql.select('table', columns=kx.Column(name='negb', value=[q('neg'), 'b'])))
+ assert q('{y~exec neg b from table}', q.qsql.exec('table', columns=kx.Column('b').call('neg')))
+ assert ({'asA': 'a', 'negB': [q('neg'), 'b']}
+ == (kx.Column('a', name='asA')& kx.Column('b', name='negB').call('neg')).to_dict())
+ assert q('~', q('{select x, negx1:neg x1 by x11, notB:not b from table}',
+ q.qsql.select('table',
+ columns=['x', kx.Column('x1', name='negx1').call('neg')],
+ by=['x11', kx.Column('b', name='notB').call('not')])))
+ assert q('~', q('{select max b from x}', table),
+ q.qsql.select('table', columns=kx.Column('b').max()))
+ assert q('~', q('{select max b, x from x}', table),
+ q.qsql.select('table', columns=kx.Column('b').max() & kx.Column('x')))
+ assert q('~', q('{select maxB:max b from x}', table),
+ q.qsql.select('table', columns=kx.Column('b', name='maxB').max()))
+ assert q('~', q('{select maxB:max b from x}', table),
+ q.qsql.select('table', columns={'maxB': kx.Column('b').max()}))
+
+ a = q.qsql.select('table', where=(kx.Column('x') == 'a') &(
+ kx.Column('x1') == 1) & (kx.Column('x11') == 0))
+ b = q.qsql.select('table', where=((kx.Column('x') == 'a') &(
+ kx.Column('x1') == 1)) & (kx.Column('x11') == 0))
+ c = q.qsql.select('table', where=(kx.Column('x') == 'a') &(
+ (kx.Column('x1') == 1) & (kx.Column('x11') == 0)))
+ assert q('{x[enlist 0]~y}', table, a)
+ assert q('{x[enlist 0]~y}', table, b)
+ assert q('{x[enlist 0]~y}', table, c)
+
+ q('t:([] c1:30?`a`b`c;c2:30?`d`e`f;c3:30?4;c4:30?4)')
+ a = q('select from t where c3=(max;c3) fby ([] c1;c4)')
+ b = q.qsql.select('t', where=kx.Column('c3') == kx.Column.fby(['c1', 'c4'], 'max', 'c3',
+ by_table=True))
+ assert q('~', a, b).py()
+ c = q.qsql.select('t', where=kx.Column('c3') == kx.Column.fby({'c1': 'c1', 'c4': 'c4'}, 'max',
+ 'c3'))
+ assert q('~', a, c).py()
+ d = q.qsql.select('t', where=kx.Column('c3') == kx.Column.fby(['c1', 'c4'], 'max',
+ kx.Column('c3'), by_table=True))
+ assert q('~', a, d).py()
+ e = q.qsql.select('t', where=kx.Column('c3') == kx.Column.fby(kx.Column('c1') & kx.Column('c4'),
+ 'max', kx.Column('c3')))
+ assert q('~', a, e).py()
+ f = q.qsql.select('t', where=kx.Column('c3') == kx.Column.fby(kx.toq({'c1': 'c1', 'c4': 'c4'}),
+ 'max', 'c3'))
+ assert q('~', a, f).py()
+ a = q('select from t where c3=(max;c3) fby c1')
+ b = q.qsql.select('t', where=kx.Column('c3') == kx.Column.fby(kx.Column('c1'), 'max',
+ kx.Column('c3')))
+ assert q('~', a, b).py()
+
+ a = q('select from t where c3=({max x`c3};([] c3;c4)) fby c1')
+ b = q.qsql.select('t', where=kx.Column('c3') == kx.Column.fby('c1', '{max x`c3}', ['c3', 'c4'],
+ data_table=True))
+ assert q('~', a, b).py()
+ c = q.qsql.select('t', where=kx.Column('c3') == kx.Column.fby('c1', '{max x`c3}',
+ {'c3': 'c3', 'c4': 'c4'}))
+ assert q('~', a, c).py()
+ d = q.qsql.select('t', where=kx.Column('c3') == kx.Column.fby('c1', '{max x`c3}', ['c3', 'c4'],
+ data_table=True))
+ assert q('~', a, d).py()
+ e = q.qsql.select('t', where=kx.Column('c3') == kx.Column.fby('c1', '{max x`c3}',
+ kx.Column('c3')
+ & kx.Column('c4')))
+ assert q('~', a, e).py()
+ f = q.qsql.select('t', where=kx.Column('c3') == kx.Column.fby('c1', '{max x`c3}',
+ kx.toq({'c3': 'c3', 'c4': 'c4'})))
+ assert q('~', a, f).py()
+
+ a = q.qsql.select('t', columns=kx.Column('c3').min().name('min_c3')
+ & kx.Column('c4').mavg(4).max(), by=kx.Column('c1'))
+ b = q('select min_c3:min c3, max 4 mavg c4 by c1 from t')
+ assert q('~', a, b)
+
+ q('''t:([] Primary:`1a`1a`1a`1a`2e`2e`2e`2e;
+ Name:`AXA`FLO`FLO`AXA`AXA`ROT`ROT`ROT; Count: 11 1 60 14 1 1 6 4)''')
+ a = q('''select from t where
+ i=({exec first ind from x where Count=max Count};([]Count;ind:i)) fby Primary''')
+ b = q.qsql.select(
+ 't', where=kx.Column('i') == kx.ParseTree.fby(
+ 'Primary', '{exec first ind from x where Count=max Count}',
+ {'Count': 'Count', 'ind': 'i'}))
+ assert q('~', a, b).py()
+
+ t=q('([] a:1 2 3;b:4 5 6;c:7 8 9;d:10 11 12)')
+ q['t'] = t
+ a = q('select a:a(,)/:b from t')
+ b = q.qsql.select('t', kx.Column('a').call(',', kx.Column('b'), iterator="/:"))
+ c = q.qsql.select('t', kx.Column('a').call(b',', kx.Column('b'), iterator="/:"))
+ d = q.qsql.select('t', kx.Column('a').call(kx.CharVector(','), kx.Column('b'), iterator="/:"))
+ assert q('~', a, b).py()
+ assert q('~', a, c).py()
+ assert q('~', a, d).py()
+ a = q('select a:a(,)\\:b from t')
+ b = q.qsql.select('t', kx.Column('a').call(',', kx.Column('b'), iterator="\\:"))
+ assert q('~', a, b).py()
+ a = q('select a:a(,)\\:/:b from t')
+ b = q.qsql.select('t', kx.Column('a').call(',', kx.Column('b'), iterator="\\:/:"))
+ assert q('~', a, b).py()
+ a = q('select a:a(,)/:\\:b from t')
+ b = q.qsql.select('t', kx.Column('a').call(',', kx.Column('b'), iterator="/:\\:"))
+ assert q('~', a, b).py()
+ a = q('select {99,x} each a from t')
+ b = q.qsql.select('t', kx.Column('a').call('{99,x}', iterator="each"))
+ assert q('~', a, b).py()
+ a = q('select {99,x} peach a from t')
+ b = q.qsql.select('t', kx.Column('a').call('{99,x}', iterator="peach"))
+ assert q('~', a, b).py()
+ a = q('select a:(,[a]) each b from t')
+ b = q.qsql.select('t', kx.Column('a').call(',',
+ kx.Column('b'), iterator="each", project_args=[0]))
+ assert q('~', a, b).py()
+ a = q('select a:(,[b]) each a from t')
+ b = q.qsql.select('t', kx.Column('a').call(',', kx.Column('b'), iterator="each",
+ col_arg_ind=1, project_args=[0]))
+ assert q('~', a, b).py()
+ a = q('select (,[;b]) each a from t')
+ b = q.qsql.select('t', kx.Column('a').call(',',
+ kx.Column('b'), iterator="each", project_args=[1]))
+ assert q('~', a, b).py()
+ a = q('select (+) scan a from t')
+ b = q.qsql.select('t', kx.Column('a').call('+', iterator="scan"))
+ assert q('~', a, b).py()
+ a = q('select (+\\)a from t')
+ b = q.qsql.select('t', kx.Column('a').call('+', iterator='\\'))
+ assert q('~', a, b).py()
+ a = q('select max a,(+) over b from t')
+ b = q.qsql.select('t', kx.Column('a').max() & kx.Column('b').call('+', iterator="over"))
+ assert q('~', a, b).py()
+ a = q('select max a,(+/)b from t')
+ b = q.qsql.select('t', kx.Column('a').max() & kx.Column('b').call('+', iterator="/"))
+ assert q('~', a, b).py()
+ a = q('select a:a(,)\'b from t')
+ b = q.qsql.select('t', kx.Column('a').call(',', kx.Column('b'), iterator="'"))
+ assert q('~', a, b).py()
+ a = q('select (-) prior a from t')
+ b = q.qsql.select('t', kx.Column('a').call('-', iterator="prior"))
+ assert q('~', a, b).py()
+
+
+@pytest.mark.unlicensed
+def test_column_licensed(kx):
+ if not kx.licensed:
+ with pytest.raises(kx.LicenseException) as err:
+ kx.Column('s')
+ assert "kx.Column" in str(err)
diff --git a/tests/test_read.py b/tests/test_read.py
index a623818..71deaad 100644
--- a/tests/test_read.py
+++ b/tests/test_read.py
@@ -1,29 +1,15 @@
from contextlib import nullcontext
+from platform import system
+from pathlib import Path
import pytest
-@pytest.fixture
-def tmp_csv_path_1(tmp_path):
- p = tmp_path/'tmp1.csv'
- p.write_text('\n'.join((
- 'col,a,b,c',
- *(f'{"abc"[i % 3]},{i},{10 - i},{3 * i}' for i in range(10)),
- )))
- yield p
- p.unlink()
-
-
-@pytest.fixture
-def tmp_csv_path_2(tmp_path):
- p = tmp_path/'tmp2.csv'
- p.write_text('\n'.join(('a', *(str(x) for x in range(10)))))
- yield p
- p.unlink()
-
-
@pytest.mark.ipc
-def test_read_csv(kx, q, tmp_csv_path_1, tmp_csv_path_2):
+def test_read_csv(kx, q):
+ basePath = Path(__file__).parent.parent.parent
+ tmp_csv_path_1 = (basePath/'data\\tmp1.csv') if system() == 'Windows' else 'tests/data/tmp1.csv'
+ tmp_csv_path_2 = (basePath/'data\\tmp2.csv') if system() == 'Windows' else 'tests/data/tmp2.csv'
assert isinstance(
q.read.csv(str(tmp_csv_path_1), kx.CharVector('JJJ'), kx.CharAtom(','), True),
kx.Table,
@@ -56,7 +42,10 @@ def test_read_csv(kx, q, tmp_csv_path_1, tmp_csv_path_2):
@pytest.mark.ipc
-def test_read_csv_with_type_guessing(kx, q, tmp_csv_path_1, tmp_csv_path_2):
+def test_read_csv_with_type_guessing(kx, q):
+ basePath = Path(__file__).parent.parent.parent
+ tmp_csv_path_1 = (basePath/'data\\tmp1.csv') if system() == 'Windows' else 'tests/data/tmp1.csv'
+ tmp_csv_path_2 = (basePath/'data\\tmp2.csv') if system() == 'Windows' else 'tests/data/tmp2.csv'
reader = kx.QReader(q)
if not kx.licensed:
ctx = pytest.raises(kx.LicenseException)
diff --git a/tests/test_register.py b/tests/test_register.py
index 2920dc1..1dfcbd6 100644
--- a/tests/test_register.py
+++ b/tests/test_register.py
@@ -3,6 +3,10 @@
import pytest
+def test_dir(kx):
+ assert ['column_function', 'py_toq'] == dir(kx.register)
+
+
def test_register_py_toq(q, kx):
with pytest.raises(TypeError) as err_info:
kx.toq(complex(1, 2))
@@ -24,3 +28,28 @@ def complex_toq_upd(data):
kx.register.py_toq(complex, complex_toq_upd, overwrite=True)
assert all(q('`real`imag!1 2f') == q('{x}', complex(1, 2)))
+
+
+def test_register_column_function(q, kx):
+ tab = kx.Table(data={
+ 'sym': kx.random.random(100, ['a', 'b', 'c']),
+ 'true': kx.random.random(100, 100.0),
+ 'pred': kx.random.random(100, 100.0)
+ })
+ with pytest.raises(AttributeError) as err_info:
+ getattr(kx.Column, 'minmax') # noqa: B009
+ assert str(err_info.value) == "type object 'Column' has no attribute 'minmax'"
+
+ def min_max_scaler(self):
+ return self.call('{(x-minData)%max[x]-minData:min x}')
+ kx.register.column_function('minmax', min_max_scaler)
+ assert 1.0 == tab.exec(kx.Column('pred').minmax().max()).py()
+
+ with pytest.raises(Exception) as err_info:
+ kx.register.column_function('minmax', min_max_scaler)
+ assert str(err_info.value) == "Attribute minmax already defined, please use 'overwrite' keyword"
+
+ def min_max_scaler(self):
+ return self.call('{2*(x-minData)%max[x]-minData:min x}')
+ kx.register.column_function('minmax', min_max_scaler, overwrite=True)
+ assert 2.0 == tab.exec(kx.Column('pred').minmax().max()).py()
diff --git a/tests/test_remote.py b/tests/test_remote.py
index b825d06..4763481 100644
--- a/tests/test_remote.py
+++ b/tests/test_remote.py
@@ -1,42 +1,33 @@
+import os
+
# Do not import pykx here - use the `kx` fixture instead!
import pytest
-def test_session_create_clear(kx, q_port):
- session = kx.remote.session()
- assert session._session is None
- session.create(port=q_port)
+@pytest.mark.isolate
+@pytest.mark.skipif(
+ os.getenv('PYKX_THREADING') is not None,
+ reason='Not supported with PYKX_THREADING'
+)
+def test_session_create_clear(q_port):
+ import pykx as kx
+ session = kx.remote.session(port=q_port)
assert isinstance(session._session, kx.SyncQConnection)
- session.clear()
- assert session._session is None
-
-
-def test_library_add_clear(kx, q_port):
- session = kx.remote.session()
- session.create(port=q_port)
- assert session._libraries == []
- session.add_library('numpy', 'pandas')
- assert session._libraries == ['numpy', 'pandas']
- session.clear()
- assert session._libraries == []
-
-
-def test_session_errors(kx, q_port):
- session = kx.remote.session()
- with pytest.raises(Exception) as err:
- session.add_library('numpy')
- assert 'Unable to add packages in the absence' in str(err.value)
- session.create(port=q_port)
- with pytest.raises(Exception) as err:
- session.create(port=q_port)
- assert 'Active session in progress' in str(err.value)
-
-
-@pytest.mark.xfail(reason="KXI-36200", strict=False)
-@pytest.mark.unlicensed
-def test_remote_exec(kx, q_port):
- session = kx.remote.session()
- session.create(port=q_port)
+ assert session._session('1b')
+ session.close()
+ with pytest.raises(RuntimeError) as err:
+ session._session('1b')
+ assert 'Attempted to use a closed IPC connection' in str(err.value)
+
+
+@pytest.mark.isolate
+@pytest.mark.skipif(
+ os.getenv('PYKX_THREADING') is not None,
+ reason='Not supported with PYKX_THREADING'
+)
+def test_remote_exec(q_port):
+ import pykx as kx
+ session = kx.remote.session(port=q_port)
@kx.remote.function(session)
def func(x):
@@ -44,22 +35,34 @@ def func(x):
assert kx.q('2') == func(1)
-@pytest.mark.xfail(reason="KXI-36200", strict=False)
-@pytest.mark.unlicensed
-def test_remote_library_exec(kx, q_port):
- session = kx.remote.session()
- session.create(port=q_port)
- session.add_library('pykx')
+@pytest.mark.isolate
+@pytest.mark.skipif(
+ os.getenv('PYKX_THREADING') is not None,
+ reason='Not supported with PYKX_THREADING'
+)
+def test_remote_library_exec(q_port):
+ import pykx as kx
+ session = kx.remote.session(port=q_port, libraries={'kx': 'pykx'})
@kx.remote.function(session)
def pykx_func(x, y):
- return pykx.q.til(x) + y # noqa: F821
- assert kx.q('5+til 5') == pykx_func(5, 5)
+ return kx.q.til(x) + y # noqa: F821
+ assert all(kx.q('5+til 5') == pykx_func(5, 5))
+
+ @kx.remote.function(session)
+ def zero_arg():
+ return 10
+ assert kx.q('10') == zero_arg()
+
+@pytest.mark.isolate
+@pytest.mark.skipif(
+ os.getenv('PYKX_THREADING') is not None,
+ reason='Not supported with PYKX_THREADING'
+)
+def test_exec_failures(q_port):
+ import pykx as kx
-@pytest.mark.xfail(reason="KXI-36200", strict=False)
-@pytest.mark.unlicensed
-def test_exec_failures(kx, q_port):
@kx.remote.function(10)
def test_func(x):
return x+1
@@ -67,30 +70,15 @@ def test_func(x):
test_func(1)
assert 'Supplied remote_session instance must be' in str(err.value)
- session = kx.remote.session()
-
- @kx.remote.function(session)
- def test_func(x):
- return x+1
- with pytest.raises(Exception) as err:
- test_func(2)
- assert "User session must be generated using the 'create_session'" in str(err.value)
-
- session = kx.remote.session()
- session.create(port=q_port)
+ session = kx.remote.session(port=q_port)
@kx.remote.function(session)
def test_func(x):
return numpy.array([x.py()]) # noqa: F821
- with pytest.raises(kx.exceptions.QError) as err:
+ with pytest.raises(kx.QError) as err:
test_func(10)
assert "name 'numpy' is not defined" in str(err.value)
- session.add_library('undefined')
-
- @kx.remote.function(session)
- def test_func(x):
- return x+1
- with pytest.raises(kx.exception.QError) as err:
- test_func(1)
- assert "Failed to load package: undefined" in str(err.value)
+ with pytest.raises(kx.QError) as err:
+ session.libraries({'un': 'undefined'})
+ assert "Failed to load library 'undefined' with alias 'un'" in str(err.value)
diff --git a/tests/test_streamlit.py b/tests/test_streamlit.py
index b4f9448..41970cd 100644
--- a/tests/test_streamlit.py
+++ b/tests/test_streamlit.py
@@ -1,4 +1,3 @@
-import os
import sys
# Do not import pykx here - use the `kx` fixture instead!
@@ -24,18 +23,3 @@ def test_streamlit(kx, q_port):
with pytest.raises(kx.QError) as err:
conn.query('tab', format='unsupported')
assert 'Unsupported format provided for query' in str(err.value)
-
-
-@pytest.mark.isolate
-@pytest.mark.skipif(
- os.getenv('PYKX_THREADING') is not None,
- reason='Threading only works when beta features enabled so this will pass in threading tests'
-)
-@pytest.mark.skipif(sys.version_info < (3, 8), reason="requires python3.8 or higher")
-def test_beta():
- import pykx as kx
-
- with pytest.raises(kx.QError) as err:
- st.connection('pykx', type=kx.streamlit.PyKXConnection,
- host='localhost', port=5050)
- assert 'Attempting to use a beta feature "Streamlit' in str(err.value)
diff --git a/tests/test_system.py b/tests/test_system.py
index 549d1a0..4fc3c8b 100644
--- a/tests/test_system.py
+++ b/tests/test_system.py
@@ -304,9 +304,11 @@ def test_system_console_size():
' "table"))')
assert str(kx.q('tab')) != ('idx| x ..\n---| --..\n0 | ba..\n1 | qu..\n2 | ba..\n3 |'
' ba..\n4 | fo..\n..')
+ console = kx.q.system.console_size.py()
kx.q.system.console_size = [10, 10]
assert len(str(kx.q('tab'))) == len('idx| x ..\n---| --..\n0 | ba..\n1 | qu..\n2 | ba..\n3 '
'| ba..\n4 | fo..\n..')
+ kx.q.system.console_size = console
@pytest.mark.isolate
diff --git a/tests/test_tick.py b/tests/test_tick.py
new file mode 100644
index 0000000..5950f3e
--- /dev/null
+++ b/tests/test_tick.py
@@ -0,0 +1,372 @@
+import os
+import shutil
+
+# Do not import pykx here - use the `kx` fixture instead!
+import pytest
+
+
+def custom_api(x, y):
+ """
+ A Python custom API which will be made available
+ on a process
+ """
+ return kx.q.til(x) + y # noqa: F821
+
+
+@pytest.mark.skipif(
+ os.getenv('PYKX_THREADING') is not None,
+ reason='Not supported with PYKX_THREADING'
+)
+def test_tick_init(kx):
+ trade_schema = kx.schema.builder({
+ 'time': kx.TimespanAtom,
+ 'sym': kx.SymbolAtom,
+ 'px': kx.FloatAtom})
+ tick = kx.tick.TICK(port=5030)
+ assert tick('1b')
+ assert tick('system"p"').py() == 5030
+ assert tick('.tick.tabs').py() == []
+ tick.set_tables({'trade': trade_schema})
+ assert tick('.tick.tabs').py() == ['trade']
+ tick.stop()
+
+ tick = kx.tick.TICK(port=5030, tables={'trades': trade_schema})
+ assert tick('1b')
+ assert tick('.tick.tabs').py() == ['trades']
+ tick.stop()
+
+ tick.restart()
+ assert tick('1b')
+ assert tick('.tick.tabs').py() == ['trades']
+ tick.stop()
+
+
+@pytest.mark.skipif(
+ os.getenv('PYKX_THREADING') is not None,
+ reason='Not supported with PYKX_THREADING'
+)
+def test_tick_start(kx):
+ trade_schema = kx.schema.builder({
+ 'time': kx.TimespanAtom,
+ 'sym': kx.SymbolAtom,
+ 'px': kx.FloatAtom})
+ tick = kx.tick.TICK(port=5030, tables={'trades': trade_schema})
+ assert tick('.tick.tabs').py() == ['trades']
+ with pytest.raises(kx.QError) as err:
+ tick('.u.t')
+ assert '.u.t' in str(err)
+ tick.start()
+ assert tick('.u.t').py() == ['trades']
+ tick.stop()
+
+ tick = kx.tick.TICK(port=5030)
+ with pytest.raises(kx.QError) as err:
+ tick.start()
+ assert 'Unable to initialise TICKERPLANT without tables' in str(err.value)
+ tick.stop()
+
+ tick.restart()
+ assert tick('1b')
+ assert tick('.tick.tabs').py() == []
+ tick.stop()
+
+ tick = kx.tick.TICK(port=5030, tables={'trade': trade_schema}, log_directory='tick_logs')
+ tick.start()
+ assert 'tick_logs' in os.listdir()
+ assert f'log{kx.DateAtom("today")}' in os.listdir('tick_logs')
+ tick.stop()
+ shutil.rmtree('tick_logs')
+
+
+@pytest.mark.skipif(
+ os.getenv('PYKX_THREADING') is not None,
+ reason='Not supported with PYKX_THREADING'
+)
+def test_tick_chained(kx):
+ trade_schema = kx.schema.builder({
+ 'time': kx.TimespanAtom,
+ 'sym': kx.SymbolAtom,
+ 'px': kx.FloatAtom})
+ tick = kx.tick.TICK(port=5030, tables={'trades': trade_schema})
+ tick.start()
+ tick_chained = kx.tick.TICK(port=5031, chained=True)
+ tick_chained.start({'tickerplant': 'localhost:5030'})
+ assert isinstance(tick_chained('trades'), kx.Table)
+ tick.stop()
+ tick_chained.stop()
+
+
+@pytest.mark.skipif(
+ os.getenv('PYKX_THREADING') is not None,
+ reason='Not supported with PYKX_THREADING'
+)
+def test_tick_apis(kx):
+ tick = kx.tick.TICK(port=5030)
+ tick.register_api('custom_func', custom_api)
+ with pytest.raises(kx.QError) as err:
+ tick('custom_func', 5, 2)
+ assert "name 'kx' is not defined" in str(err.value)
+ tick.libraries({'kx': 'pykx'})
+ assert tick('custom_func', 5, 2).py() == [2, 3, 4, 5, 6]
+ tick.stop()
+
+
+@pytest.mark.skipif(
+ os.getenv('PYKX_THREADING') is not None,
+ reason='Not supported with PYKX_THREADING'
+)
+def test_tick_timer(kx):
+ tick = kx.tick.TICK(port=5030)
+ assert tick('system"t"').py() == 100
+ tick.set_timer(500)
+ assert tick('system"t"').py() == 500
+ tick.stop()
+
+
+@pytest.mark.skipif(
+ os.getenv('PYKX_THREADING') is not None,
+ reason='Not supported with PYKX_THREADING'
+)
+def test_rtp_vanilla(kx):
+ import time
+ trade_schema = kx.schema.builder({
+ 'time': kx.TimespanAtom,
+ 'sym': kx.SymbolAtom,
+ 'px': kx.FloatAtom})
+ quote_schema = kx.schema.builder({
+ 'time': kx.TimespanAtom,
+ 'sym': kx.SymbolAtom,
+ 'sz': kx.FloatAtom})
+ tick = kx.tick.TICK(port=5030, tables={'trades': trade_schema, 'quotes': quote_schema})
+ tick.start()
+
+ rdb = kx.tick.RTP(port=5031)
+ rdb.start({'tickerplant': 'localhost:5030'})
+ assert isinstance(rdb('trades'), kx.Table)
+ assert isinstance(rdb('quotes'), kx.Table)
+ assert len(rdb('trades')) == 0
+ assert len(rdb('quotes')) == 0
+ # Publish data to tickerplant
+ with kx.SyncQConnection(port=5030) as q:
+ q.upd('trades', [kx.q.z.N, 'AAPL', 1.0])
+ time.sleep(1)
+ assert len(rdb('trades')) == 1
+ assert len(rdb('quotes')) == 0
+ rdb.stop()
+
+ rdb = kx.tick.RTP(port=5031, subscriptions=['trades'])
+ rdb.start({'tickerplant': 'localhost:5030'})
+ assert isinstance(rdb('trades'), kx.Table)
+ assert len(rdb('trades')) == 0
+ with pytest.raises(kx.QError) as err:
+ rdb('quotes')
+ assert 'quotes' in str(err.value)
+ tick.stop()
+ rdb.stop()
+
+ rdb = kx.tick.RTP(port=5031)
+ with pytest.raises(kx.QError) as err:
+ rdb.pre_processor(lambda x, y: y)
+ assert 'Pre-processing of incoming' in str(err.value)
+ with pytest.raises(kx.QError) as err:
+ rdb.post_processor(lambda x, y: y)
+ assert 'Post-processing of incoming' in str(err.value)
+ rdb.stop()
+
+
+@pytest.mark.skipif(
+ os.getenv('PYKX_THREADING') is not None,
+ reason='Not supported with PYKX_THREADING'
+)
+def test_rtp_custom_api(kx):
+ rdb = kx.tick.RTP(port=5031, apis={'custom_api': custom_api})
+ with pytest.raises(kx.QError) as err:
+ rdb('custom_api', 5, 2)
+ assert "name 'kx' is not defined" in str(err.value)
+ rdb.libraries({'kx': 'pykx'})
+ assert rdb('custom_api', 5, 2).py() == [2, 3, 4, 5, 6]
+ rdb.stop()
+
+ rdb = kx.tick.RTP(
+ port=5031,
+ apis={'custom_api': custom_api},
+ libraries={'kx': 'pykx'})
+ assert rdb('custom_api', 5, 2).py() == [2, 3, 4, 5, 6]
+ rdb.stop()
+
+
+def _pre_process(table, message):
+ return message + 1
+
+
+@pytest.mark.skipif(
+ os.getenv('PYKX_THREADING') is not None,
+ reason='Not supported with PYKX_THREADING'
+)
+def test_rtp_pre_proc(kx):
+ rte = kx.tick.RTP(port=5031, libraries={'kx': 'pykx'}, vanilla=False)
+ assert rte('.tick.RTPPreProc', 'test', 1) == 1
+ rte.pre_processor(_pre_process)
+ assert rte('.tick.RTPPreProc', 'test', 1) == 2
+ rte.stop()
+
+
+@pytest.mark.skipif(
+ os.getenv('PYKX_THREADING') is not None,
+ reason='Not supported with PYKX_THREADING'
+)
+def test_rtp_timer(kx):
+ rtp = kx.tick.RTP(port=5031)
+ assert rtp('system"t"').py() == 0
+ rtp.set_timer(500)
+ assert rtp('system"t"').py() == 500
+ rtp.stop()
+
+
+@pytest.mark.skipif(
+ os.getenv('PYKX_THREADING') is not None,
+ reason='Not supported with PYKX_THREADING'
+)
+def test_hdb_vanilla(kx):
+ hdb = kx.tick.HDB(port=5032)
+ assert hdb('1b')
+ with pytest.raises(kx.QError) as err:
+ hdb('custom_api', 5, 2)
+ assert "custom_api" in str(err.value)
+ hdb.register_api('custom_api', custom_api)
+ with pytest.raises(kx.QError) as err:
+ hdb('custom_api', 5, 2)
+ assert "name 'kx' is not defined" in str(err.value)
+ hdb.libraries({'kx': 'pykx'})
+ assert hdb('custom_api', 5, 2).py() == [2, 3, 4, 5, 6]
+ hdb.stop()
+
+ hdb = kx.tick.HDB(
+ port=5032,
+ apis={'custom_api': custom_api},
+ libraries={'kx': 'pykx'})
+ assert hdb('custom_api', 5, 2).py() == [2, 3, 4, 5, 6]
+ hdb.stop()
+
+
+def _gateway_func(x):
+ rdb_data = gateway.call_port('rdb', b'{x+1}', x) # noqa: F821
+ hdb_data = gateway.call_port('hdb', b'{x+2}', x) # noqa: F821
+ return rdb_data + hdb_data
+
+
+@pytest.mark.skipif(
+ os.getenv('PYKX_THREADING') is not None,
+ reason='Not supported with PYKX_THREADING'
+)
+def test_gateway_vanilla(kx):
+ trade = kx.schema.builder({
+ 'time': kx.TimespanAtom, 'sym': kx.SymbolAtom,
+ 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom,
+ 'px': kx.FloatAtom})
+ tick = kx.tick.TICK(port=5030, tables={'trade': trade})
+ tick.start()
+ hdb = kx.tick.HDB(port=5031)
+ hdb.start(database='/tmp/db')
+ rdb = kx.tick.RTP(port=5032)
+ rdb.start({'tickerplant': 'localhost:5030'})
+ gw = kx.tick.GATEWAY(
+ port=5033,
+ connections={'rdb': 'localhost:5032', 'hdb': 'localhost:5031'},
+ apis={'custom_api': _gateway_func})
+ gw.start()
+ with kx.SyncQConnection(port=5033) as q:
+ data = q('custom_api', 2)
+ assert isinstance(data, kx.LongAtom)
+ gw.stop()
+ hdb.stop()
+ rdb.stop()
+ tick.stop()
+
+
+@pytest.mark.skipif(
+ os.getenv('PYKX_THREADING') is not None,
+ reason='Not supported with PYKX_THREADING'
+)
+def test_basic_infra(kx):
+ trade = kx.schema.builder({
+ 'time': kx.TimespanAtom,
+ 'sym': kx.SymbolAtom,
+ 'px': kx.FloatAtom})
+ basic = kx.tick.BASIC(tables={'trade': trade}, log_directory='basic_logs')
+ basic.start()
+ assert 'basic_logs' in os.listdir()
+ assert f'log{kx.DateAtom("today")}' in os.listdir('basic_logs')
+ assert basic.hdb is None
+
+ with kx.SyncQConnection(port=5011) as q:
+ tab = q('trade')
+ assert isinstance(tab, kx.Table)
+ assert len(tab) == 0
+ with kx.SyncQConnection(port=5010) as q:
+ q.upd('trade', [kx.q.z.N, 'AAPL', 1.0])
+ with kx.SyncQConnection(port=5011) as q:
+ tab = q('trade')
+ assert isinstance(tab, kx.Table)
+ assert len(tab) == 1
+ basic.stop()
+
+ # Test restart will replay messages
+ basic = kx.tick.BASIC(tables={'trade': trade}, log_directory='basic_logs')
+ basic.start()
+ with kx.SyncQConnection(port=5011) as q:
+ tab = q('trade')
+ assert isinstance(tab, kx.Table)
+ assert len(tab) == 1
+ basic.stop()
+
+ # Test restart with hard_reset set will reset
+ basic = kx.tick.BASIC(
+ tables={'trade': trade},
+ log_directory='basic_logs',
+ hard_reset=True)
+ basic.start()
+ with kx.SyncQConnection(port=5011) as q:
+ tab = q('trade')
+ assert isinstance(tab, kx.Table)
+ assert len(tab) == 0
+ basic.stop()
+ shutil.rmtree('basic_logs')
+
+
+@pytest.mark.skipif(
+ os.getenv('PYKX_THREADING') is not None,
+ reason='Not supported with PYKX_THREADING'
+)
+def test_process_kill(kx):
+ hdb = kx.tick.HDB(port=5012)
+ hdb.start(database='db')
+ with kx.SyncQConnection(port=5012) as q:
+ ret = q('1b')
+ assert ret
+ kx.util.kill_q_process(port=5012)
+ with pytest.raises(kx.QError) as err:
+ kx.SyncQConnection(port=5012)
+ assert any([x in str(err.value) for x in ['Connection refused', 'Connection reset']])
+
+
+@pytest.mark.skipif(
+ os.getenv('PYKX_THREADING') is not None,
+ reason='Not supported with PYKX_THREADING'
+)
+def test_init_args(kx):
+ hdb0 = kx.tick.HDB(port=5012)
+ assert 0 == hdb0('\\g')
+ hdb0.stop()
+
+ hdb1 = kx.tick.HDB(port=5012, init_args=['-g', '1'])
+ assert 1 == hdb1('\\g')
+ hdb1.stop()
+
+ with pytest.raises(TypeError) as err:
+ kx.tick.HDB(port=5012, init_args=10)
+ assert 'must be a list' in str(err.value)
+
+ with pytest.raises(TypeError) as err:
+ kx.tick.HDB(port=5012, init_args=['test', 1])
+ assert 'str type objects' in str(err.value)
diff --git a/tests/test_toq.py b/tests/test_toq.py
index 37fa0bd..8527667 100644
--- a/tests/test_toq.py
+++ b/tests/test_toq.py
@@ -269,6 +269,25 @@ def test_from_bytes(kx):
kx.GUIDAtom(b'x')
+@pytest.mark.unlicensed
+def test_from_bytes_np(kx):
+ b = kx.toq(np.bytes_(''))
+ assert isinstance(b, kx.CharVector)
+ assert b.py() == b''
+ b = kx.toq(np.bytes_('a'))
+ assert isinstance(b, kx.CharAtom)
+ assert b.py() == b'a'
+ b = kx.toq(np.bytes_('aa'))
+ assert isinstance(b, kx.CharVector)
+ assert b.py() == b'aa'
+ assert kx.toq(np.bytes_('abcdefghijklmnopqrstuvwxyz'), -11).py() == 'abcdefghijklmnopqrstuvwxyz'
+ for x in (np.bytes_(''), np.bytes_('12')):
+ with pytest.raises(ValueError):
+ kx.CharAtom(x)
+ with pytest.raises(TypeError):
+ kx.GUIDAtom(np.bytes_('x'))
+
+
@pytest.mark.unlicensed
def test_from_datetime_date(kx):
d = date(2020, 9, 8)
@@ -495,6 +514,10 @@ def test_from_timedelta64(kx):
assert isinstance(kd, kx.TimeAtom)
assert kd.np() == np.timedelta64(60312222, 'ms')
+ kd = kx.TimespanAtom(d)
+ assert isinstance(kd, kx.TimespanAtom)
+ assert kd.np() == np.timedelta64(60312222971000, 'ns')
+
@pytest.mark.unlicensed
def test_from_UUID(kx):
@@ -562,6 +585,28 @@ def test_from_UUID_np_array(kx):
assert str(kx.K(u[0])) == str(u[0])
+@pytest.mark.unlicensed
+def test_from_UUID_pandas(kx):
+ values = [
+ (1.3942713164545354e+64 - 7.26060294431316e-266j),
+ (3.638224669629338e+199 - 7.695044086357459e-212j)
+ ]
+
+ def complex_to_guid(c):
+ real_part = c.real
+ imag_part = c.imag
+ real_bytes = real_part.hex().encode('utf-8')[:8]
+ imag_bytes = imag_part.hex().encode('utf-8')[:8]
+ guid_bytes = real_bytes.ljust(8, b'\x00') + imag_bytes.ljust(8, b'\x00')
+ return UUID(bytes=guid_bytes)
+
+ guid_values = [complex_to_guid(c) for c in values]
+ guid_vector = kx.GUIDVector(guid_values)
+ u = {'g': guid_vector}
+ res = kx.toq(u['g'].pd(raw=True))
+ assert isinstance(res, kx.K)
+
+
@pytest.mark.unlicensed
def test_to_UUID_np_array(kx):
u = np.array([UUID('db712ca2-81b1-0080-95dd-7bdb502da77d')], dtype=object)
@@ -596,6 +641,7 @@ def test_from_tuple(kx):
@pytest.mark.unlicensed
@pytest.mark.nep49
+@pytest.mark.xfail(reason='Flaky NEP-49 testing with datetime', strict=False)
def test_from_list(kx):
assert kx.K([]).py() == []
assert kx.K([1, 2]).py() == [1, 2]
@@ -624,6 +670,7 @@ def test_from_list(kx):
assert isinstance(kx.TimestampVector(np.datetime64(0, 'ns')), kx.TimestampVector)
assert isinstance(kx.MonthVector(np.datetime64(0, 'M')), kx.MonthVector)
assert isinstance(kx.DateVector(np.datetime64(0, 'D')), kx.DateVector)
+ assert isinstance(kx.TimespanVector(np.timedelta64(0, 'us')), kx.TimespanVector)
assert isinstance(kx.TimespanVector(np.timedelta64(0, 'ns')), kx.TimespanVector)
assert isinstance(kx.MinuteVector(np.timedelta64(0, 'W')), kx.MinuteVector)
assert isinstance(kx.SecondVector(np.timedelta64(0, 's')), kx.SecondVector)
@@ -867,8 +914,42 @@ def test_from_numpy_ndarray_1(kx):
== [1.2, 1.3, [1.4, 1.5]]
assert kx.K(np.array([b'a', b'ab', 1.3, [1.3, 1.2], 'x'],
dtype=object)).py() == [b'a', b'ab', 1.3, [1.3, 1.2], 'x']
- with pytest.raises(TypeError):
- kx.LongVector(np.array([1, 2, 3], dtype=np.int32))
+
+ ar = np.array([1, 2], np.dtype('uint16'))
+ ark = kx.K(ar)
+ at = ar[0]
+ atk = kx.K(at)
+ assert isinstance(ark, kx.IntVector)
+ assert ark.py() == [1, 2]
+ assert isinstance(atk, kx.LongAtom) # ToDo
+ assert atk.py() == 1
+
+ ar = np.array([1, 2], np.dtype('uint32'))
+ ark = kx.K(ar)
+ at = ar[0]
+ atk = kx.K(at)
+ assert isinstance(ark, kx.LongVector)
+ assert ark.py() == [1, 2]
+ assert isinstance(atk, kx.LongAtom)
+ assert atk.py() == 1
+
+ ar = np.array([1, 2], np.dtype('int8'))
+ ark = kx.K(ar)
+ at = ar[0]
+ atk = kx.K(at)
+ assert isinstance(ark, kx.ShortVector)
+ assert ark.py() == [1, 2]
+ assert isinstance(atk, kx.LongAtom) # ToDo
+ assert atk.py() == 1
+
+ ar = np.array([1, 2], np.dtype('float16'))
+ ark = kx.K(ar)
+ at = ar[0]
+ atk = kx.K(at)
+ assert isinstance(ark, kx.RealVector)
+ assert ark.py() == [1, 2]
+ assert isinstance(atk, kx.RealAtom)
+ assert atk.py() == 1
@pytest.mark.unlicensed
@@ -929,14 +1010,9 @@ def test_from_numpy_ndarray_3(kx):
@pytest.mark.unlicensed
@pytest.mark.nep49
def test_from_numpy_incompatible_types(kx):
- for ty in (np.int8, np.uint8, np.int16, np.uint16, np.int32, np.uint32):
- with pytest.raises(TypeError):
- kx.LongVector(np.arange(10).astype(ty))
for ty in (np.int64, np.uint64):
with pytest.raises(TypeError):
kx.ShortVector(np.arange(10).astype(ty))
- with pytest.raises(TypeError):
- kx.FloatVector(np.random.rand(10).astype('float32'))
with pytest.raises(TypeError):
kx.RealVector(np.random.rand(10).astype('float64'))
with pytest.raises(TypeError):
@@ -973,6 +1049,12 @@ def test_from_pandas_dataframe(kx, pd):
kx.List(df)
+@pytest.mark.unlicensed
+@pytest.mark.nep49
+def test_from_pandas_NA(kx, pd):
+ assert kx.toq(pd.NA).py() is None
+
+
@pytest.mark.nep49
def test_from_pandas_dataframe_licensed(q, kx):
q.system.console_size = [25, 80]
@@ -1064,6 +1146,8 @@ def test_from_pandas_series(kx, pd):
assert all(symbol_vector == kx.K(symbol_vector).pd())
time_vector = pd.Series([1000000, 2000000, 3000000, 4000000, 5000000], dtype='timedelta64[ns]') # noqa
assert all(time_vector == kx.K(time_vector).pd())
+ time_vector = pd.Series([1000000, 2000000, 3000000, 4000000, 5000000], dtype='timedelta64[us]') # noqa
+ assert all(time_vector == kx.K(time_vector).pd())
timestamp_vector = pd.Series([0, 1, 2, 3, 4], dtype='datetime64[ns]')
assert all(timestamp_vector == kx.K(timestamp_vector).pd())
@@ -1349,3 +1433,271 @@ def test_dir(kx):
def test_Float64Index(kx):
pdFloat64Index = pd.DataFrame(data={'a': [1.0, 2.0, 3.0], 'b': [3, 4, 5]}).set_index('a')
assert all(kx.q('([a:1 2 3.0] b:3 4 5)') == kx.toq(pdFloat64Index))
+
+
+def test_str_as_char(kx):
+ string = 'qstring'
+ str_list = ['qstring0', 'qstring1']
+ str_dict = {'a': {'b': 'qstring0'}, 'b': 'qstring1'}
+ np_list = np.array(str_list)
+ np_list_2d = np.array([str_list, str_list])
+ str_tab = pd.DataFrame(data={'x': np_list})
+
+ assert isinstance(kx.toq(string), kx.SymbolAtom)
+ qchar_string = kx.toq(string, strings_as_char=True)
+ assert isinstance(qchar_string, kx.CharVector)
+ assert all(qchar_string == b'qstring')
+
+ assert isinstance(kx.toq(str_list), kx.SymbolVector)
+ qchar_list = kx.toq(str_list, strings_as_char=True)
+ assert isinstance(qchar_list, kx.List)
+ assert isinstance(qchar_list[0], kx.CharVector)
+ assert all(qchar_list[0] == b'qstring0')
+
+ qsym_dict = kx.toq(str_dict)
+ assert isinstance(qsym_dict['a']['b'], kx.SymbolAtom)
+ assert qsym_dict['a']['b'] == 'qstring0'
+ assert isinstance(qsym_dict['b'], kx.SymbolAtom)
+ assert qsym_dict['b'] == 'qstring1'
+
+ qchar_dict = kx.toq(str_dict, strings_as_char=True)
+ assert isinstance(qchar_dict['a']['b'], kx.CharVector)
+ assert all(qchar_dict['a']['b'] == b'qstring0')
+ assert isinstance(qchar_dict['b'], kx.CharVector)
+ assert all(qchar_dict['b'] == b'qstring1')
+
+ qsym_np_list = kx.toq(np_list)
+ assert isinstance(qsym_np_list, kx.SymbolVector)
+ qchar_np_list = kx.toq(np_list, strings_as_char=True)
+ assert isinstance(qchar_np_list, kx.List)
+ assert isinstance(qchar_np_list[0], kx.CharVector)
+ assert all(qchar_np_list[0] == b'qstring0')
+
+ qsym_np_list_2d = kx.toq(np_list_2d)
+ assert isinstance(qsym_np_list_2d, kx.List)
+ assert isinstance(qsym_np_list_2d[0], kx.SymbolVector)
+ qchar_np_list_2d = kx.toq(np_list_2d, strings_as_char=True)
+ assert isinstance(qchar_np_list_2d, kx.List)
+ assert isinstance(qchar_np_list_2d[0], kx.List)
+ assert isinstance(qchar_np_list_2d[0][0], kx.CharVector)
+ assert all(qchar_np_list_2d[0][0] == b'qstring0')
+
+ qsym_tab = kx.toq(str_tab)
+ assert isinstance(qsym_tab['x'], kx.SymbolVector)
+ qchar_tab = kx.toq(str_tab, strings_as_char=True)
+ assert isinstance(qchar_tab['x'], kx.List)
+ assert isinstance(qchar_tab['x'][0], kx.CharVector)
+
+
+def test_column_variable_tree_phrase(kx):
+ col = kx.Column('x')
+ assert kx.toq(col).py() == 'x'
+
+ col_gt = 1 < kx.Column('x')
+ assert kx.toq(col_gt).py() == [kx.Operator('>'), 'x', 1]
+
+ col_max = kx.Column('x').max()
+ assert kx.toq(col_max).py() == [kx.q('max'), 'x']
+
+ var = kx.Variable('nvar')
+ assert kx.toq(var) == 'nvar'
+
+ tree = kx.ParseTree(kx.q.parse(b'x=`a'))
+ assert kx.toq(tree).py() == [kx.Operator('='), 'x', ['a']]
+
+ phrase_0 = kx.QueryPhrase(kx.Column('x') == 'a')
+ assert kx.toq(phrase_0).py() == [[kx.Operator('='), 'x', ['a']]]
+
+ phrase_1 = kx.QueryPhrase({'asA': 'a', 'negB': [kx.q('neg'), 'b']})
+ assert {'asA': 'a', 'negB': [kx.q('neg'), 'b']} == phrase_1.to_dict()
+
+
+def test_pyarrow(kx):
+ import pyarrow as pa
+
+ def test_pa(arr, karr, kat):
+ at = arr[0]
+ arr_toq= kx.toq(arr)
+ at_toq = kx.toq(at)
+ assert kx.q('~', karr, arr_toq).py()
+ assert kx.q('~', kat, at_toq).py()
+
+ # pyarrow.int8
+ test_pa(pa.array([1], pa.int8()), kx.q('1'), kx.q('1')) # ToDo
+ test_pa(pa.array([1, 2], pa.int8()), kx.q('1 2h'), kx.q('1'))
+
+ # pyarrow.int16
+ test_pa(pa.array([1], pa.int16()), kx.q('1'), kx.q('1')) # ToDo
+ test_pa(pa.array([1, 2], pa.int16()), kx.q('1 2h'), kx.q('1'))
+
+ # pyarrow.int32
+ test_pa(pa.array([1], pa.int32()), kx.q('1'), kx.q('1')) # ToDo
+ test_pa(pa.array([1, 2], pa.int32()), kx.q('1 2i'), kx.q('1'))
+
+ # pyarrow.int64
+ test_pa(pa.array([1], pa.int64()), kx.q('1'), kx.q('1'))
+ test_pa(pa.array([1, 2], pa.int64()), kx.q('1 2'), kx.q('1'))
+
+ # pyarrow.uint8
+ test_pa(pa.array([0], type=pa.uint8()), kx.q('0'), kx.q('0')) # ToDo
+ test_pa(pa.array([0, 1, 2], type=pa.uint8()), kx.q('0x000102'), kx.q('0')) # ToDo
+
+ # pyarrow.uint16
+ test_pa(pa.array([0], type=pa.uint16()), kx.q('0'), kx.q('0')) # ToDo
+ test_pa(pa.array([0, 1, 2], type=pa.uint16()), kx.q('0 1 2i'), kx.q('0')) # ToDo
+
+ # pyarrow.uint32
+ test_pa(pa.array([0], type=pa.uint32()), kx.q('0'), kx.q('0'))
+ test_pa(pa.array([0, 1, 2], type=pa.uint32()), kx.q('0 1 2'), kx.q('0'))
+
+ # pyarrow.uint64
+ # test_pa(pa.array([0, 1, 2], type=pa.uint64()), kx.q('(),1'), kx.q('1'))
+
+ # pyarrow.float16
+ test_pa(pa.array([np.float16(1.0)], pa.float16()), kx.q('1e'), kx.q('1e')) # ToDo
+ test_pa(pa.array([np.float16(1.0), np.float16(2.0)], pa.float16()), kx.q('1 2e'),
+ kx.q('1e'))
+
+ # pyarrow.float32
+ test_pa(pa.array([np.float32(1.0)], pa.float32()), kx.q('1e'), kx.q('1f')) # ToDo
+ test_pa(pa.array([np.float32(1.0), np.float32(2.0)], pa.float32()), kx.q('1 2e'),
+ kx.q('1f'))
+
+ # pyarrow.float64
+ test_pa(pa.array([np.float64(1.0)], pa.float64()), kx.q('1f'), kx.q('1f')) # ToDo
+ test_pa(pa.array([np.float64(1.0), np.float64(2.0)], pa.float64()), kx.q('1 2f'),
+ kx.q('1f'))
+
+ # pyarrow.time32
+ test_pa(pa.array([1], pa.time32('s')), kx.q('0D00:00:01'), kx.q('0D00:00:01')) # ToDo
+ test_pa(pa.array([1, 2], pa.time32('s')), kx.q('0D00:00:01 0D00:00:02'), kx.q('0D00:00:01'))
+
+ # ToDo
+ test_pa(pa.array([1], pa.time32('ms')), kx.q('0D00:00:00.001'), kx.q('0D00:00:00.001'))
+ test_pa(pa.array([1, 2], pa.time32('ms')), kx.q('0D00:00:00.001 0D00:00:00.002'),
+ kx.q('0D00:00:00.001'))
+
+ # pyarrow.time64
+ test_pa(pa.array([1], pa.time64('us')), kx.q('0D00:00:00.000001'),
+ kx.q('0D00:00:00.000001')) # ToDo
+ test_pa(pa.array([1, 2], pa.time64('us')), kx.q('0D00:00:00.000001 0D00:00:00.000002'),
+ kx.q('0D00:00:00.000001'))
+
+ # ToDo: pyarrow.lib.ArrowInvalid: Value 1 has non-zero nanoseconds
+ # test_pa(pa.array([1], pa.time64('ns')), kx.q('(),1'), kx.q('1'))
+
+ test_pa(pa.array([0], pa.time64('ns')), kx.q('0D00'), kx.q('0D00')) # ToDo
+ test_pa(pa.array([0, 1000], pa.time64('ns')), kx.q('0D00 0D00:00:00.000001'), kx.q('0D00'))
+
+ # pyarrow.timestamp
+ test_pa(pa.array([1], pa.timestamp('ms')), kx.q('1970.01.01D00:00:00.001'),
+ kx.q('1970.01.01D00:00:00.001'))
+ test_pa(pa.array([1, 2], pa.timestamp('ms')),
+ kx.q('1970.01.01D00:00:00.001 1970.01.01D00:00:00.002'),
+ kx.q('1970.01.01D00:00:00.001'))
+
+ test_pa(pa.array([1], pa.timestamp('ns')), kx.q('1970.01.01D00:00:00.000000001'),
+ kx.q('1970.01.01D00:00:00.000000001'))
+ test_pa(pa.array([1, 2], pa.timestamp('ns')),
+ kx.q('1970.01.01D00:00:00.000000001 1970.01.01D00:00:00.000000002'),
+ kx.q('1970.01.01D00:00:00.000000001'))
+
+ test_pa(pa.array([1], pa.timestamp('us')), kx.q('1970.01.01D00:00:00.000001000'),
+ kx.q('1970.01.01D00:00:00.000001000'))
+ test_pa(pa.array([1, 2], pa.timestamp('us')),
+ kx.q('1970.01.01D00:00:00.000001000 1970.01.01D00:00:00.000002000'),
+ kx.q('1970.01.01D00:00:00.000001000'))
+
+ test_pa(pa.array([1], pa.timestamp('s')), kx.q('1970.01.01D00:00:01.000000000'),
+ kx.q('1970.01.01D00:00:01.000000000'))
+ test_pa(pa.array([1, 2], pa.timestamp('s')),
+ kx.q('1970.01.01D00:00:01.000000000 1970.01.01D00:00:02.000000000'),
+ kx.q('1970.01.01D00:00:01.000000000'))
+
+ # pyarrow.date32
+ test_pa(pa.array([pa.scalar(date(2012, 1, 1), type=pa.date32())]), kx.q('2012.01.01'),
+ kx.q('2012.01.01'))
+ test_pa(pa.array([pa.scalar(date(2012, 1, 1), type=pa.date32())]*2),
+ kx.q('2#(),2012.01.01'), kx.q('2012.01.01'))
+
+ # pyarrow.date64
+ test_pa(pa.array([pa.scalar(date(2012, 1, 1), type=pa.date64())]), kx.q('2012.01.01'),
+ kx.q('2012.01.01'))
+ test_pa(pa.array([pa.scalar(date(2012, 1, 1), type=pa.date64())]*2),
+ kx.q('2#(),2012.01.01'), kx.q('2012.01.01'))
+
+ # pyarrow.duration
+ test_pa(pa.array([1], pa.duration('ns')), kx.q('0D00:00:00.000000001'),
+ kx.q('0D00:00:00.000000001'))
+ test_pa(pa.array([1, 2], pa.duration('ns')),
+ kx.q('(),0D00:00:00.000000001 0D00:00:00.000000002'), kx.q('0D00:00:00.000000001'))
+
+ test_pa(pa.array([1], pa.duration('ms')), kx.q('0D00:00:00.001000000'),
+ kx.q('0D00:00:00.001000000'))
+ if kx.config.pandas_2:
+ test_pa(pa.array([1, 2], pa.duration('ms')), kx.q('00:00:00.001 00:00:00.002'),
+ kx.q('0D00:00:00.001000000'))
+ else:
+ test_pa(pa.array([1, 2], pa.duration('ms')), kx.q('0D00:00:00.001 0D00:00:00.002'),
+ kx.q('0D00:00:00.001000000'))
+
+ test_pa(pa.array([1], pa.duration('us')), kx.q('0D00:00:00.000001000'),
+ kx.q('0D00:00:00.000001000'))
+ test_pa(pa.array([1, 2], pa.duration('us')),
+ kx.q('0D00:00:00.000001000 0D00:00:00.000002000'), kx.q('0D00:00:00.000001000'))
+
+ test_pa(pa.array([1], pa.duration('s')), kx.q('0D00:00:01.000000000'),
+ kx.q('0D00:00:01.000000000'))
+ if kx.config.pandas_2:
+ test_pa(pa.array([1, 2], pa.duration('s')), kx.q('00:00:01 00:00:02'),
+ kx.q('0D00:00:01.000000000'))
+ else:
+ test_pa(pa.array([1, 2], pa.duration('s')), kx.q('0D00:00:01 0D00:00:02'),
+ kx.q('0D00:00:01.000000000'))
+
+ # pyarrow.binary
+ test_pa(pa.array(['foo'], type=pa.binary()), kx.q('"foo"'),
+ kx.q('"foo"'))
+ test_pa(pa.array(['foo', 'bar', 'baz'], type=pa.binary()), kx.q('("foo";"bar";"baz")'),
+ kx.q('"foo"'))
+
+ # pyarrow.string
+ test_pa(pa.array(['foo'], type=pa.string()), kx.q('`foo'),
+ kx.q('`foo'))
+ test_pa(pa.array(['foo', 'bar', 'baz'], type=pa.string()), kx.q('`foo`bar`baz'),
+ kx.q('`foo'))
+
+ # pyarrow.utf8
+ test_pa(pa.array(['foo'], type=pa.utf8()), kx.q('`foo'),
+ kx.q('`foo'))
+ test_pa(pa.array(['foo', 'bar', 'baz'], type=pa.utf8()), kx.q('`foo`bar`baz'),
+ kx.q('`foo'))
+
+ # pyarrow.large_binary
+ test_pa(pa.array(['foo'], type=pa.large_binary()), kx.q('"foo"'),
+ kx.q('"foo"'))
+ test_pa(pa.array(['foo', 'bar', 'baz'], type=pa.large_binary()),
+ kx.q('("foo";"bar";"baz")'), kx.q('"foo"'))
+
+ # pyarrow.large_string
+ test_pa(pa.array(['foo'], type=pa.large_string()), kx.q('`foo'), kx.q('`foo'))
+ test_pa(pa.array(['foo', 'bar'], type=pa.large_string()), kx.q('`foo`bar'), kx.q('`foo'))
+
+ # pyarrow.large_utf8
+ test_pa(pa.array(['foo'], type=pa.large_utf8()), kx.q('`foo'), kx.q('`foo'))
+ test_pa(pa.array(['foo', 'bar'], type=pa.large_utf8()), kx.q('`foo`bar'), kx.q('`foo'))
+
+
+def test_pandas_timedelta(kx):
+ if kx.config.pandas_2:
+ assert kx.toq(kx.q('16:36').pd()) == kx.q('16:36:00')
+ assert kx.toq(kx.q('16:36:29').pd()) == kx.q('16:36:29')
+ assert kx.toq(kx.q('16:36:29.214').pd()) == kx.q('16:36:29.214')
+ assert kx.toq(kx.q('16:36:29.214344').pd()) == kx.q('0D16:36:29.214344000')
+ assert kx.toq(kx.q('16:36:29.214344678').pd()) == kx.q('0D16:36:29.214344678')
+ else:
+ assert kx.toq(kx.q('16:36').pd()) == kx.q('0D16:36:00.000000000')
+ assert kx.toq(kx.q('16:36:29').pd()) == kx.q('0D16:36:29.000000000')
+ assert kx.toq(kx.q('16:36:29.214').pd()) == kx.q('0D16:36:29.214000000')
+ assert kx.toq(kx.q('16:36:29.214344').pd()) == kx.q('0D16:36:29.214344000')
+ assert kx.toq(kx.q('16:36:29.214344678').pd()) == kx.q('0D16:36:29.214344678')
diff --git a/tests/test_util.py b/tests/test_util.py
index def26ab..bbc7aad 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -1,8 +1,12 @@
+from pathlib import Path
+import os
import pickle
+import shutil
from time import sleep
from uuid import uuid4
import pytest
+import toml
@pytest.mark.unlicensed
@@ -128,3 +132,87 @@ def test_debug_environment(kx):
@pytest.mark.unlicensed
def test_debug_environment_ret(kx):
assert isinstance(kx.util.debug_environment(return_info=True), str)
+
+
+@pytest.mark.unlicensed
+def test_install_q(kx):
+ base_path = Path(os.path.expanduser('~'))
+ folder = base_path / 'qfolder'
+ config_path = base_path / '.pykx-config'
+ assert not os.path.isdir(folder)
+ kx.util.install_q(folder)
+ assert os.path.isfile(config_path)
+ with open(config_path, 'r') as file:
+ data = toml.load(file)
+ assert ['PYKX_Q_EXECUTABLE', 'QHOME'] == list(data['default'].keys())
+ assert os.path.isdir(folder)
+ assert os.path.isfile(folder / 'q.k')
+ shutil.rmtree(str(folder))
+ os.remove(str(base_path / '.pykx-config'))
+
+
+def test_detect_bad_columns(kx):
+ dup_col = kx.q('flip `a`a`a`b!4 4#16?1f')
+ with pytest.warns(RuntimeWarning) as w:
+ assert kx.util.detect_bad_columns(dup_col)
+ assert "Duplicate columns: ['a']" in w[0].message.args[0]
+ assert "Invalid columns" not in w[0].message.args[0]
+ assert ['a'] == kx.util.detect_bad_columns(dup_col, return_cols=True)
+ html_repr = dup_col._repr_html_()
+ assert isinstance(html_repr, str)
+ assert "pykx.Table" in html_repr
+
+ invalid_col = kx.q('flip (`a;`b;`c;`$"a b")!4 4#16?1f')
+ with pytest.warns(RuntimeWarning) as w:
+ assert kx.util.detect_bad_columns(invalid_col)
+ assert "Duplicate columns:" not in w[0].message.args[0]
+ assert "Invalid columns: ['a b']" in w[0].message.args[0]
+ assert ['a b'] == kx.util.detect_bad_columns(invalid_col, return_cols=True)
+ html_repr = invalid_col._repr_html_()
+ assert isinstance(html_repr, str)
+ assert "pykx.Table" in html_repr
+
+ dup_invalid_cols = kx.q('flip (`a;`a;`a;`b;`$"a b")!5 5#25?1f')
+ with pytest.warns(RuntimeWarning) as w:
+ assert kx.util.detect_bad_columns(dup_invalid_cols)
+ assert "Duplicate columns: ['a']" in w[0].message.args[0]
+ assert "Invalid columns: ['a b']" in w[0].message.args[0]
+ assert ['a', 'a b'] == kx.util.detect_bad_columns(dup_invalid_cols, return_cols=True)
+ html_repr = dup_invalid_cols._repr_html_()
+ assert isinstance(html_repr, str)
+ assert "pykx.Table" in html_repr
+
+ for i in [dup_col, invalid_col, dup_invalid_cols]:
+ t = i.set_index(1)
+ with pytest.warns(RuntimeWarning) as w:
+ assert kx.util.detect_bad_columns(t)
+ assert "Duplicate columns or columns with" in w[0].message.args[0]
+ html_repr = t._repr_html_()
+ assert isinstance(html_repr, str)
+ assert "pykx.KeyedTable" in html_repr
+
+ tab = kx.q('{x set flip (`a;`$"a b")!2 10#20?1f;get x}`:multiColSplay/')
+ with pytest.warns(RuntimeWarning) as w:
+ assert kx.util.detect_bad_columns(tab)
+ assert "Duplicate columns:" not in w[0].message.args[0]
+ assert "Invalid columns: ['a b']" in w[0].message.args[0]
+ assert ['a b'] == kx.util.detect_bad_columns(tab, return_cols=True)
+ html_repr = tab._repr_html_()
+ assert isinstance(html_repr, str)
+ assert "pykx.Splay" in html_repr
+
+ os.makedirs('HDB', exist_ok=True)
+ os.chdir('HDB')
+ kx.q('(`$":2001.01.01/partTab/") set flip(`a;`$"a b")!2 10#20?1f')
+ kx.q('(`$":2001.01.02/partTab/") set flip(`a;`$"a b")!2 10#20?1f')
+ kx.q('system"l ."')
+ ptab = kx.q['partTab']
+ with pytest.warns(RuntimeWarning) as w:
+ assert kx.util.detect_bad_columns(ptab)
+ assert "Duplicate columns:" not in w[0].message.args[0]
+ assert "Invalid columns: ['a b']" in w[0].message.args[0]
+ assert ['a b'] == kx.util.detect_bad_columns(ptab, return_cols=True)
+ html_repr = ptab._repr_html_()
+ assert isinstance(html_repr, str)
+ assert "pykx.Part" in html_repr
+ os.chdir('..')
diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py
index d0361cb..e7122b6 100644
--- a/tests/test_wrappers.py
+++ b/tests/test_wrappers.py
@@ -10,6 +10,7 @@
import os
import pickle
from platform import python_implementation
+import shutil
from textwrap import dedent
from uuid import UUID
import itertools
@@ -244,51 +245,51 @@ def test_equality(self, q):
assert not q('5') == None # noqa: E711
def test_slicing(self, q, kx):
- test_vector = q('1 2 3')
- assert test_vector[0].py() == test_vector.py()[0]
- assert test_vector[-1].py() == test_vector.py()[-1]
- assert test_vector[1:].py() == test_vector.py()[1:]
- assert test_vector[-1:].py() == test_vector.py()[-1:]
- assert test_vector[-2:-1].py() == test_vector.py()[-2:-1]
- assert test_vector[-5:].py() == test_vector.py()[-5:]
- assert test_vector[:-1].py() == test_vector.py()[:-1]
- assert test_vector[:-3].py() == test_vector.py()[:-3]
- assert test_vector[::1].py() == test_vector.py()[::1]
- assert test_vector[::2].py() == test_vector.py()[::2]
- assert test_vector[-1:5:2].py() == test_vector.py()[-1:5:2]
- assert test_vector[::-1].py() == test_vector.py()[::-1]
+ vector = q('1 2 3')
+ assert vector[0].py() == vector.py()[0]
+ assert vector[-1].py() == vector.py()[-1]
+ assert vector[1:].py() == vector.py()[1:]
+ assert vector[-1:].py() == vector.py()[-1:]
+ assert vector[-2:-1].py() == vector.py()[-2:-1]
+ assert vector[-5:].py() == vector.py()[-5:]
+ assert vector[:-1].py() == vector.py()[:-1]
+ assert vector[:-3].py() == vector.py()[:-3]
+ assert vector[::1].py() == vector.py()[::1]
+ assert vector[::2].py() == vector.py()[::2]
+ assert vector[-1:5:2].py() == vector.py()[-1:5:2]
+ assert vector[::-1].py() == vector.py()[::-1]
with pytest.raises(ValueError) as err:
- test_vector[::0]
+ vector[::0]
assert 'slice step cannot be zero' in str(err)
- test_list = q('(1 2 3; 4 5 6)')
- assert test_list[0].py() == test_list.py()[0]
- assert test_list[-1].py() == test_list.py()[-1]
- assert test_list[:6].py() == test_list.py()[:6]
+ qlist = q('(1 2 3; 4 5 6)')
+ assert qlist[0].py() == qlist.py()[0]
+ assert qlist[-1].py() == qlist.py()[-1]
+ assert qlist[:6].py() == qlist.py()[:6]
with pytest.raises(ValueError) as err:
- test_list[::0]
+ qlist[::0]
assert 'slice step cannot be zero' in str(err)
- test_table = q('([] a:1 2 3)')
- assert all(test_table[1:].pd() == test_table.pd()[1:].reset_index(drop=True))
- assert all(test_table[-1:].pd() == test_table.pd()[-1:].reset_index(drop=True))
+ qtable = q('([] a:1 2 3)')
+ assert all(qtable[1:].pd() == qtable.pd()[1:].reset_index(drop=True))
+ assert all(qtable[-1:].pd() == qtable.pd()[-1:].reset_index(drop=True))
with pytest.raises(ValueError) as err:
- test_table[::0]
+ qtable[::0]
assert 'slice step cannot be zero' in str(err)
- test_table2 = q('([] a:1 2 3; b:4 5 6; c:7 8 9)')
- assert all(test_table2[2:].pd() == test_table2.pd()[2:].reset_index(drop=True))
- assert all(test_table2[-2:].pd() == test_table2.pd()[-2:].reset_index(drop=True))
- assert all(test_table2[-3:].pd() == test_table2.pd()[-3:].reset_index(drop=True))
- assert test_table2[6:] == test_table2[10:]
- assert all(test_table2[-4:].pd() == test_table2.pd()[-4:].reset_index(drop=True))
- assert all(test_table2[:4].pd() == test_table2.pd()[:4].reset_index(drop=True))
- assert all(test_table2[::1].pd() == test_table2.pd()[::1].reset_index(drop=True))
- assert all(test_table2[::2].pd() == test_table2.pd()[::2].reset_index(drop=True))
- assert all(test_table2[-1:5:2].pd() == test_table2.pd()[-1:5:2].reset_index(drop=True))
- assert all(test_table2[::-1].pd() == test_table2.pd()[::-1].reset_index(drop=True))
- assert test_table2[:-9] == q('sublist', 0, test_table2)
- assert all(test_table2[:-1].pd() == test_table2.pd()[:-1])
+ qtable2 = q('([] a:1 2 3; b:4 5 6; c:7 8 9)')
+ assert all(qtable2[2:].pd() == qtable2.pd()[2:].reset_index(drop=True))
+ assert all(qtable2[-2:].pd() == qtable2.pd()[-2:].reset_index(drop=True))
+ assert all(qtable2[-3:].pd() == qtable2.pd()[-3:].reset_index(drop=True))
+ assert qtable2[6:] == qtable2[10:]
+ assert all(qtable2[-4:].pd() == qtable2.pd()[-4:].reset_index(drop=True))
+ assert all(qtable2[:4].pd() == qtable2.pd()[:4].reset_index(drop=True))
+ assert all(qtable2[::1].pd() == qtable2.pd()[::1].reset_index(drop=True))
+ assert all(qtable2[::2].pd() == qtable2.pd()[::2].reset_index(drop=True))
+ assert all(qtable2[-1:5:2].pd() == qtable2.pd()[-1:5:2].reset_index(drop=True))
+ assert all(qtable2[::-1].pd() == qtable2.pd()[::-1].reset_index(drop=True))
+ assert qtable2[:-9] == q('sublist', 0, qtable2)
+ assert all(qtable2[:-1].pd() == qtable2.pd()[:-1])
empty_vector = q('`long$()')
assert empty_vector[1:] == empty_vector
@@ -320,7 +321,7 @@ def test_slicing(self, q, kx):
assert (list_of_two[-1:] == q('enlist 2f')).all()
assert (list_of_two[:-1] == q('enlist 1')).all()
- def test_vector_indexing(self, q): # noqa: C901
+ def test_vector_indexing(self, q, kx):
vector = q('til 3')
vectorpy = vector.py()
indexList = [-3, -2, -1, 0, 1, 2, 3, None]
@@ -330,8 +331,8 @@ def test_vector_indexing(self, q): # noqa: C901
for i in comboList:
s = slice(*i)
try:
- q = vector[s]
- qNoqNulls = [None if i.is_null else i.py() for i in q]
+ qpd = vector[s]
+ qNoqNulls = [None if i.is_null else i.py() for i in qpd]
qErr = False
except Exception as ex:
qEx = ex
@@ -350,7 +351,7 @@ def test_vector_indexing(self, q): # noqa: C901
print(s, qEx, p)
raise AssertionError
elif not qErr and pErr:
- print(s, q, pEx)
+ print(s, qpd, pEx)
raise AssertionError
elif qErr and pErr:
if not qErr == pErr:
@@ -360,7 +361,7 @@ def test_vector_indexing(self, q): # noqa: C901
print(s)
raise AssertionError
- def test_list_indexing(self, q): # noqa: C901
+ def test_list_indexing(self, q, kx):
vector = q('(1i;2f;3j)')
vectorpy = vector.py()
indexList = [-3, -2, -1, 0, 1, 2, 3, None]
@@ -370,8 +371,8 @@ def test_list_indexing(self, q): # noqa: C901
for i in comboList:
s = slice(*i)
try:
- q = vector[s]
- qNoqNulls = [None if i.is_null else i.py() for i in q]
+ qpd = vector[s]
+ qNoqNulls = [None if i.is_null else i.py() for i in qpd]
qErr = False
except Exception as ex:
qEx = ex
@@ -390,7 +391,7 @@ def test_list_indexing(self, q): # noqa: C901
print(s, qEx, p)
raise AssertionError
elif not qErr and pErr:
- print(s, q, pEx)
+ print(s, qpd, pEx)
raise AssertionError
elif qErr and pErr:
if not qErr == pErr:
@@ -400,7 +401,7 @@ def test_list_indexing(self, q): # noqa: C901
print(s)
raise AssertionError
- def test_table_indexing(self, q): # noqa: C901
+ def test_table_indexing(self, q, kx):
tab = q('([] a:1 2 3; b:4 5 6; c:7 8 9)')
tabpd = tab.pd()
indexList = [-3, -2, -1, 0, 1, 2, 3, None]
@@ -410,7 +411,7 @@ def test_table_indexing(self, q): # noqa: C901
for i in comboList:
s = slice(*i)
try:
- q = tab[s].pd()
+ qpd = tab[s].pd()
qErr = False
except Exception as ex:
qEx = ex
@@ -422,14 +423,14 @@ def test_table_indexing(self, q): # noqa: C901
pEx = ex
pErr = True
if not qErr and not pErr:
- if len(q) != len(p) or not all(q == p):
+ if len(qpd) != len(p) or not all(q == p):
print(s, q, p)
raise AssertionError
elif qErr and not pErr:
print(s, qEx, p)
raise AssertionError
elif not qErr and pErr:
- print(s, q, pEx)
+ print(s, qpd, pEx)
raise AssertionError
elif qErr and pErr:
if not qErr == pErr:
@@ -454,7 +455,8 @@ def test_boolean_atom(self, q):
assert bool(t) is True
assert bool(f) is False
- def test_null_gen_lic(self, kx):
+ @pytest.mark.unlicensed()
+ def test_null_gen(self, kx):
qtypes = [kx.GUIDAtom, kx.ShortAtom, kx.IntAtom,
kx.LongAtom, kx.RealAtom, kx.FloatAtom,
kx.CharAtom, kx.SymbolAtom, kx.TimestampAtom,
@@ -466,7 +468,8 @@ def test_null_gen_lic(self, kx):
assert type(null_val) == i
assert null_val.is_null
- def test_inf_pos_lic(self, kx):
+ @pytest.mark.unlicensed()
+ def test_inf_pos(self, kx):
qtypes = [kx.ShortAtom, kx.IntAtom,
kx.LongAtom, kx.RealAtom, kx.FloatAtom,
kx.TimestampAtom, kx.MonthAtom, kx.DateAtom,
@@ -475,21 +478,34 @@ def test_inf_pos_lic(self, kx):
for i in qtypes:
inf_val = getattr(i, 'inf') # noqa: B009
assert type(inf_val) == i
- assert inf_val>0
+ assert inf_val>0 if kx.licensed else True
assert inf_val.is_inf
+ assert inf_val.is_pos_inf
+ assert not inf_val.is_neg_inf
- def test_inf_neg_lic(self, kx):
+ @pytest.mark.unlicensed()
+ def test_inf_neg(self, kx):
qtypes = [kx.ShortAtom, kx.IntAtom,
kx.LongAtom, kx.RealAtom, kx.FloatAtom,
kx.TimestampAtom, kx.MonthAtom, kx.DateAtom,
kx.DatetimeAtom, kx.TimespanAtom, kx.MinuteAtom,
kx.SecondAtom, kx.TimeAtom]
for i in qtypes:
- inf_val = -getattr(i, 'inf') # noqa: B009
- assert type(inf_val) == i
- assert inf_val<0
- assert inf_val.is_inf
-
+ if kx.licensed:
+ inf_val = -getattr(i, 'inf') # noqa: B009
+ assert type(inf_val) == i
+ assert inf_val<0
+ assert inf_val.is_inf
+ assert not inf_val.is_pos_inf
+ assert inf_val.is_neg_inf
+ inf_neg_val = getattr(i, 'inf_neg') # noqa: B009
+ assert type(inf_neg_val) == i
+ assert inf_neg_val<0 if kx.licensed else True
+ assert inf_neg_val.is_inf
+ assert not inf_neg_val.is_pos_inf
+ assert inf_neg_val.is_neg_inf
+
+ @pytest.mark.unlicensed()
def test_null_fail(self, kx):
qtypes = [kx.BooleanAtom, kx.ByteAtom]
for i in qtypes:
@@ -497,6 +513,7 @@ def test_null_fail(self, kx):
getattr(i, 'null') # noqa: B009
assert 'Retrieval of null values' in str(err)
+ @pytest.mark.unlicensed()
def test_inf_fail(self, kx):
qtypes = [kx.BooleanAtom, kx.ByteAtom, kx.GUIDAtom,
kx.CharAtom, kx.SymbolAtom]
@@ -504,24 +521,35 @@ def test_inf_fail(self, kx):
with pytest.raises(NotImplementedError) as err:
getattr(i, 'inf') # noqa: B009
assert 'Retrieval of infinite values' in str(err)
+ with pytest.raises(NotImplementedError) as err:
+ getattr(i, 'inf_neg') # noqa: B009
+ assert 'Retrieval of infinite values' in str(err)
- @pytest.mark.unlicensed(unlicensed_only=True)
+ @pytest.mark.unlicensed()
@pytest.mark.skipif(
os.getenv('PYKX_THREADING') is not None,
reason='Not supported with PYKX_THREADING'
)
- def test_null_inf_unlic(self, kx):
- qtypes = [kx.ByteAtom, kx.GUIDAtom, kx.ShortAtom,
- kx.IntAtom, kx.LongAtom, kx.RealAtom,
- kx.FloatAtom, kx.CharAtom, kx.SymbolAtom,
- kx.TimestampAtom, kx.MonthAtom, kx.DateAtom,
- kx.DatetimeAtom, kx.TimespanAtom, kx.MinuteAtom,
- kx.SecondAtom, kx.TimeAtom]
+ def test_null_inf(self, kx):
+ qtypes = [kx.GUIDAtom, kx.ShortAtom, kx.IntAtom, kx.LongAtom, kx.RealAtom, kx.FloatAtom,
+ kx.CharAtom, kx.SymbolAtom, kx.TimestampAtom, kx.MonthAtom, kx.DateAtom,
+ kx.DatetimeAtom, kx.TimespanAtom, kx.MinuteAtom, kx.SecondAtom, kx.TimeAtom]
+ for i in qtypes:
+ assert isinstance(i.null, i)
+ assert i.null.is_null
+
+ qtypes = [kx.ShortAtom, kx.IntAtom, kx.LongAtom, kx.RealAtom, kx.FloatAtom,
+ kx.TimestampAtom, kx.MonthAtom, kx.DateAtom, kx.DatetimeAtom, kx.TimespanAtom,
+ kx.MinuteAtom, kx.SecondAtom, kx.TimeAtom]
for i in qtypes:
- for j in ['null', 'inf']:
- with pytest.raises(kx.QError) as err:
- getattr(i, j)()
- assert 'not supported in unlicensed mode' in str(err)
+ assert isinstance(i.inf, i)
+ assert i.inf.is_inf
+ assert i.inf.is_pos_inf
+ assert not i.inf.is_neg_inf
+ assert isinstance(i.inf_neg, i)
+ assert i.inf_neg.is_inf
+ assert i.inf_neg.is_neg_inf
+ assert not i.inf_neg.is_pos_inf
def test_is_null_and_is_inf(self, q):
assert q('0Ng').is_null
@@ -531,73 +559,145 @@ def test_is_null_and_is_inf(self, q):
assert not q('first 1?0h').is_null
assert q('0Wh').is_inf
assert q('-0Wh').is_inf
+ assert q('0Wh').is_pos_inf
+ assert not q('-0Wh').is_pos_inf
+ assert q('-0Wh').is_neg_inf
+ assert not q('0Wh').is_neg_inf
assert not q('first 1?0h').is_inf
+ assert not q('first 1?0h').is_pos_inf
+ assert not q('first 1?0h').is_neg_inf
assert q('0Ni').is_null
assert not q('first 1?0i').is_null
assert q('0Wi').is_inf
assert q('-0Wi').is_inf
+ assert q('0Wi').is_pos_inf
+ assert not q('-0Wi').is_pos_inf
+ assert q('-0Wi').is_neg_inf
+ assert not q('0Wi').is_neg_inf
assert not q('first 1?0i').is_inf
+ assert not q('first 1?0i').is_pos_inf
+ assert not q('first 1?0i').is_neg_inf
assert q('0Nj').is_null
assert not q('first 1?0j').is_null
assert q('0Wj').is_inf
assert q('-0Wj').is_inf
+ assert q('0Wj').is_pos_inf
+ assert not q('-0Wj').is_pos_inf
+ assert q('-0Wj').is_neg_inf
+ assert not q('0Wj').is_neg_inf
assert not q('first 1?0j').is_inf
+ assert not q('first 1?0j').is_pos_inf
+ assert not q('first 1?0j').is_neg_inf
assert q('0Ne').is_null
assert not q('first 1?1e').is_null
assert q('0We').is_inf
assert q('-0We').is_inf
+ assert q('0We').is_pos_inf
+ assert not q('-0We').is_pos_inf
+ assert q('-0We').is_neg_inf
+ assert not q('0We').is_neg_inf
assert not q('first 1?1e').is_inf
+ assert not q('first 1?1e').is_pos_inf
+ assert not q('first 1?1e').is_neg_inf
assert q('0Nf').is_null
assert not q('first 1?1f').is_null
assert q('0Wf').is_inf
assert q('-0Wf').is_inf
+ assert q('0Wf').is_pos_inf
+ assert not q('-0Wf').is_pos_inf
+ assert q('-0Wf').is_neg_inf
+ assert not q('0Wf').is_neg_inf
assert not q('first 1?1f').is_inf
+ assert not q('first 1?1f').is_pos_inf
+ assert not q('first 1?1f').is_neg_inf
assert q('0Np').is_null
assert not q('first 1?1f').is_null
assert q('0Wp').is_inf
assert q('-0Wp').is_inf
+ assert q('0Wp').is_pos_inf
+ assert not q('-0Wp').is_pos_inf
+ assert q('-0Wp').is_neg_inf
+ assert not q('0Wp').is_neg_inf
assert not q('first 1?0p').is_inf
+ assert not q('first 1?0p').is_pos_inf
+ assert not q('first 1?0p').is_neg_inf
assert q('0Nm').is_null
assert not q('first 1?2000.01m').is_null
assert q('0Wm').is_inf
assert q('-0Wm').is_inf
+ assert q('0Wm').is_pos_inf
+ assert not q('-0Wm').is_pos_inf
+ assert q('-0Wm').is_neg_inf
+ assert not q('0Wm').is_neg_inf
assert not q('first 1?2000.01m').is_inf
+ assert not q('first 1?2000.01m').is_pos_inf
+ assert not q('first 1?2000.01m').is_neg_inf
assert q('0Nd').is_null
assert not q('first 1?2000.01.01').is_null
assert q('0Wd').is_inf
assert q('-0Wd').is_inf
+ assert q('0Wd').is_pos_inf
+ assert not q('-0Wd').is_pos_inf
+ assert q('-0Wd').is_neg_inf
+ assert not q('0Wd').is_neg_inf
assert not q('first 1?2000.01.01').is_inf
+ assert not q('first 1?2000.01.01').is_pos_inf
+ assert not q('first 1?2000.01.01').is_neg_inf
assert q('0Nn').is_null
assert not q('first "n"$1?0').is_null
assert q('0Wn').is_inf
assert q('-0Wn').is_inf
+ assert q('0Wn').is_pos_inf
+ assert not q('-0Wn').is_pos_inf
+ assert q('-0Wn').is_neg_inf
+ assert not q('0Wn').is_neg_inf
assert not q('first "n"$1?0').is_inf
+ assert not q('first "n"$1?0').is_pos_inf
+ assert not q('first "n"$1?0').is_neg_inf
assert q('0Nu').is_null
assert not q('first 1?0u').is_null
assert q('0Wu').is_inf
assert q('-0wu').is_inf
+ assert q('0Wn').is_pos_inf
+ assert not q('-0Wn').is_pos_inf
+ assert q('-0Wn').is_neg_inf
+ assert not q('0Wn').is_neg_inf
assert not q('first 1?0u').is_inf
+ assert not q('first 1?0u').is_pos_inf
+ assert not q('first 1?0u').is_neg_inf
assert q('0Nv').is_null
assert not q('first 1?0v').is_null
assert q('0Wv').is_inf
assert q('-0Wv').is_inf
+ assert q('0Wv').is_pos_inf
+ assert not q('-0Wv').is_pos_inf
+ assert q('-0Wv').is_neg_inf
+ assert not q('0Wv').is_neg_inf
assert not q('first 1?0v').is_inf
+ assert not q('first 1?0v').is_pos_inf
+ assert not q('first 1?0v').is_neg_inf
assert q('0Nt').is_null
assert not q('first 1?0t').is_null
assert q('0Wt').is_inf
assert q('-0Wt').is_inf
+ assert q('0Wn').is_pos_inf
+ assert not q('-0Wt').is_pos_inf
+ assert q('-0Wt').is_neg_inf
+ assert not q('0Wt').is_neg_inf
assert not q('first 1?0t').is_inf
+ assert not q('first 1?0t').is_pos_inf
+ assert not q('first 1?0t').is_neg_inf
assert not q('{x*y+z}').is_null
assert not q('{x*y+z}').is_inf
@@ -605,8 +705,7 @@ def test_is_null_and_is_inf(self, q):
@pytest.mark.nep49
def test_null_np(self, q, kx):
for type_char in 'hij':
- with pytest.raises(kx.PyKXException):
- q(f'0N{type_char}').np()
+ q(f'0N{type_char}').np()
for type_char in 'ef':
assert np.isnan(q(f'0N{type_char}').np())
@@ -617,8 +716,7 @@ def test_null_np(self, q, kx):
@pytest.mark.nep49
def test_null_pd(self, q, kx, pd):
for type_char in 'hij':
- with pytest.raises(kx.PyKXException):
- q(f'0N{type_char}').pd()
+ q(f'0N{type_char}').pd()
for type_char in 'ef':
assert pd.isna(q(f'0N{type_char}').pd())
@@ -743,21 +841,21 @@ def test_py(self, q, kx):
assert q('0xFF').py() == 2 ** 8 - 1
assert q('0xFF').t == -4
- assert(q('0Nh').py() == kx.ShortAtom(q('0Nh')))
+ assert pd.isna(q('0Nh').py())
try:
q('0Wh').py()
except kx.PyKXException:
pass
assert q('0Wh').t == -5
- assert (q('0Ni').py() == kx.IntAtom(q('0Ni')))
+ assert pd.isna(q('0Ni').py())
try:
q('0Wi').py()
except kx.PyKXException:
pass
assert q('0Wi').t == -6
- assert (q('0N').py() == kx.LongAtom(q('0N')))
+ assert pd.isna(q('0N').py())
try:
q('0Wj').py()
except kx.PyKXException:
@@ -945,6 +1043,32 @@ def test_py(self, q):
assert e.py() == 'xyz'
assert e.py(raw=True) == 1
+ def test_enum_init(self, q, kx):
+ q('tc:`a`b`c')
+ v = 'c'
+
+ e = kx.EnumAtom('tc', value=v, extend=False)
+ assert e == q('`tc$`c')
+ assert e.value() == v
+ assert e.domain() == 'tc'
+ assert e.index() == 2
+
+ v_ex = 'd'
+ e = kx.EnumAtom('tc', value=v_ex, extend=True)
+ assert e == q('`tc$`d')
+ tc_mod = ('a', 'b', 'c', 'd')
+ assert (kx.q('tc') == tc_mod).all()
+ assert e.value() == v_ex
+ assert e.domain() == 'tc'
+ assert e.index() == 3
+
+ i = 2
+ e = kx.EnumAtom('tc', index=i)
+ assert e == q('`tc$`c')
+ assert e.value() == v
+ assert e.domain() == kx.toq('tc')
+ assert e.index() == i
+
class Test_TemporalSpanAtom:
@pytest.mark.nep49
@@ -1019,6 +1143,23 @@ def test_timestamp(self, q, kx):
assert timestamp.np(raw=True) == 4759072275070713856
assert timestamp.py(raw=True) == 4759072275070713856
+ assert timestamp == kx.TimestampAtom(2150, 10, 22, 20, 31, 15, 70713856)
+ with pytest.raises(TypeError) as err:
+ kx.TimestampAtom(2150, 10, 22, 20, 31, 70713856)
+ assert "Too few values" in str(err)
+ with pytest.raises(TypeError) as err:
+ kx.TimestampAtom(2150, 10, 22, 20, 31, 15, 21, 70713856)
+ assert "Too few values" in str(err)
+ with pytest.raises(TypeError) as err:
+ kx.TimestampAtom(2150, 10, "22", 20, 31, 15, 70713856)
+ assert "All values must be of type int" in str(err)
+
+ @pytest.mark.unlicensed(unlicensed_only=True)
+ def test_timestamp_unlicensed(self, q, kx):
+ with pytest.raises(kx.LicenseException) as err:
+ kx.TimestampAtom(2150, 10, 22, 20, 31, 15, 70713856)
+ assert "numerical values" in str(err)
+
@pytest.mark.nep49
def test_timestamp_timezone(self, kx):
kx.config._set_keep_local_times(False)
@@ -1049,6 +1190,23 @@ def test_date(self, q, kx):
assert q_date.np(raw=True) == -10076
assert q_date.py(raw=True) == -10076
+ assert kx.DateAtom(1972, 5, 31) == q_date
+ with pytest.raises(TypeError) as err:
+ kx.DateAtom(2021, 19)
+ assert "Too few values" in str(err)
+ with pytest.raises(TypeError) as err:
+ kx.DateAtom(2021, 19, 8, 3)
+ assert "Too few values" in str(err)
+ with pytest.raises(TypeError) as err:
+ kx.DateAtom(1972, '5', 31)
+ assert "All values must be of type int" in str(err)
+
+ @pytest.mark.unlicensed(unlicensed_only=True)
+ def test_date_unlicensed(self, q, kx):
+ with pytest.raises(kx.LicenseException) as err:
+ kx.DateAtom(1972, 5, 31)
+ assert "numerical values" in str(err)
+
@pytest.mark.nep49
def test_datetime(self, q, kx):
with pytest.warns(DeprecationWarning):
@@ -1071,6 +1229,23 @@ def test_timespan(self, q, kx):
assert timespan.np(raw=True) == 3796312051664551936
assert timespan.py(raw=True) == 3796312051664551936
+ assert timespan == kx.TimespanAtom(43938, 19, 7, 31, 664551936)
+ with pytest.raises(TypeError) as err:
+ kx.TimespanAtom(43938, 19, 7, 664551936)
+ assert "Too few values" in str(err)
+ with pytest.raises(TypeError) as err:
+ kx.TimespanAtom(43938, 19, 7, 31, 12, 664551936)
+ assert "Too few values" in str(err)
+ with pytest.raises(TypeError) as err:
+ kx.TimespanAtom(43938, 19, '7', 31, 664551936)
+ assert "All values must be of type int" in str(err)
+
+ @pytest.mark.unlicensed(unlicensed_only=True)
+ def test_timespan_unlicensed(self, q, kx):
+ with pytest.raises(kx.LicenseException) as err:
+ kx.TimespanAtom(43938, 19, 7, 31, 664551936)
+ assert "numerical values" in str(err)
+
@pytest.mark.nep49
def test_minute(self, q, kx):
minute = q('03:36')
@@ -1186,10 +1361,12 @@ def test_append(self, q, kx):
q1 = kx.toq(p1)
p1.append('a')
p1.append([1, 2, 3])
+ p1.append(['a', 1])
q1.append('a')
q1.append([1, 2, 3])
+ q1.append(['a', 1])
assert q('{x~y}', p1, q1)
- assert 5 == len(q1)
+ assert 6 == len(q1)
def test_extend(self, q, kx):
p0 = [1, 2, 3]
@@ -1438,6 +1615,11 @@ def test_has_null_and_has_inf(self, q):
assert not q('(`o;7;{x*y+z})').has_nulls
assert q('(`;7;{x*y+z})').has_nulls
assert q('(`hmmm;0N;{x*y+z})').has_nulls
+ assert not q('([]1 2 3;3?1f)').has_nulls
+ assert q('([]1 2 0N;3?1f)').has_nulls
+ assert q('([x:1 2 3]x1:(1 2 3;"123";0n);x2:(1f;0n;2f))').has_nulls
+ assert not q('([x:1 0N 1] x1:1 2 3)').has_nulls
+ assert q('([x:1 0N 1] x1:1 0N 3)').has_nulls
assert not q('(`hmmm;0N;"not inf";123456789;{x*y+z})').has_infs
assert q('(`hmmm;0N;"not inf";0W;{x*y+z})').has_infs
@@ -1469,14 +1651,14 @@ def f(type_code, zero):
f(type_code, zero)
def test_np_timestampvector_nulls(self, kx):
- assert kx.q('0Np').py() is None
+ assert pd.isna(kx.q('0Np').py())
assert kx.q('enlist 0Np').py() == [kx.TimestampAtom(kx.q('0Np'))]
@pytest.mark.unlicensed
def test_np_timestampvector_nulls_IPC(self, kx, q_port):
with kx.QConnection(port=q_port) as conn:
r = conn('([] t:2#0Np)').py()
- assert r['t'][0].py() is None
+ assert pd.isna(r['t'][0])
class Test_List:
@@ -1519,8 +1701,8 @@ def test_getting(self, q, kx):
# assert all(a == 0 for a in x[4:7])
def test_py(self, q, kx):
- assert q('1 0N 3h').py() == [1, q('0Nh'), 3]
- assert isinstance(q('1 0N 3h').py()[1], kx.ShortAtom)
+ assert q('1 0N 3h').py() == [1, pd.NA, 3]
+ assert isinstance(q('1 0N 3h').py()[1], type(pd.NA))
@pytest.mark.nep49
def test_np(self, q, kx):
@@ -2272,6 +2454,32 @@ def test_pd(self, q, kx):
assert all(
q(self.q_vec_str).pd(as_arrow=True) == ['abc', 'xyz', 'hmm', 'abc', 'xyz', 'hmm'])
+ def test_enum_init(self, q, kx):
+ q('tc:`a`b`c')
+ v = ('a', 'c')
+
+ e = kx.EnumVector('tc', values=v, extend=False)
+ assert (e == q('`tc$`a`c')).all()
+ assert (e.values() == v).all()
+ assert (e.domain() == 'tc')
+ assert (e.indices() == (0, 2)).all()
+
+ v_ex = ('a', 'b', 'b', 'c', 'd', 'd')
+ e = kx.EnumVector('tc', values=v_ex, extend=True)
+ assert (e == q('`tc$`a`b`b`c`d`d')).all()
+ tc_mod = ('a', 'b', 'c', 'd')
+ assert (kx.q('tc') == tc_mod).all()
+ assert (e.values() == v_ex).all()
+ assert (e.domain() == 'tc')
+ assert (e.indices() == (0, 1, 1, 2, 3, 3)).all()
+
+ i = (0, 2)
+ e = kx.EnumVector('tc', indices=i)
+ assert (e == q('`tc$`a`c')).all()
+ assert (e.values() == v).all()
+ assert (e.domain() == kx.toq('tc'))
+ assert (e.indices() == i).all()
+
class Test_Anymap:
def test_anymap(self, kx, q, tmp_path):
@@ -2605,6 +2813,129 @@ def test_window_join(self, kx, q):
{'ask_min_bid': [kx.q('{x - y}'), 'ask', 'bid']})
assert q('~', py_multi_join, q_multi_join)
+ def test_reorder(self, kx, q):
+ tab = kx.Table(data={
+ 'col1': kx.random.random(100, ['a', 'b', 'c']),
+ 'col2': kx.random.random(100, 1.0),
+ 'col3': kx.random.random(100, False),
+ 'col4': kx.random.random(100, 10.0)})
+
+ assert tab.columns.py() == ['col1', 'col2', 'col3', 'col4']
+ assert tab.reorder_columns('col4').columns.py() == ['col4', 'col1', 'col2', 'col3']
+ assert tab.reorder_columns(['col4', 'col3']).columns.py() == ['col4', 'col3', 'col1', 'col2'] # noqa: E501
+ with pytest.raises(kx.QError) as err:
+ tab.reorder_columns('col5')
+ assert 'Supplied column "col5" not in' in str(err.value)
+ with pytest.raises(kx.QError) as err:
+ tab.reorder_columns(['col4', 'col5'])
+ assert 'Supplied column "col5" not in' in str(err.value)
+ with pytest.raises(kx.QError) as err:
+ tab.reorder_columns(1)
+ assert 'Supplied column is not a string or list' in str(err.value)
+ assert tab.reorder_columns(['col4', 'col3'], inplace=True).columns.py() == ['col4', 'col3', 'col1', 'col2'] # noqa: E501
+ assert tab.columns.py() == ['col4', 'col3', 'col1', 'col2']
+
+ def test_method_query(self, kx, q):
+ tab = kx.Table(data={
+ 'col1': kx.random.random(100, ['a', 'b', 'c']),
+ 'col2': kx.random.random(100, 1.0),
+ 'col3': kx.random.random(100, False),
+ 'col4': kx.random.random(100, 10.0)})
+ ktab = tab.set_index('col1')
+
+ for table in [tab, ktab]:
+ # The below exception is related to a bug currently not allowing keyed tables
+ # to be passed as positional arguments in `.s.sp`
+ if not isinstance(ktab, kx.KeyedTable):
+ noarg_sql_method = table.sql("select * from $1 where col2 > 0.5")
+ noarg_sql_basic = kx.q.sql("select * from $1 where col2 > 0.5", table)
+ assert q('~', noarg_sql_method, noarg_sql_basic)
+
+ multi_sql_method = table.sql("select * from $1 where col1 = $2 and col2 < $3", 'a', 0.5) # noqa: E501
+ multi_sql_basic = kx.q.sql("select * from $1 where col1 = $2 and col2 < $3",
+ table,
+ 'a',
+ 0.5)
+ assert q('~', multi_sql_method, multi_sql_basic)
+
+ with pytest.raises(kx.QError) as err:
+ table.sql('select * from table')
+ assert 'Supplied query does not contain' in str(err.value)
+
+ with pytest.raises(TypeError) as err:
+ table.sql(1)
+ assert 'Supplied query is not of type' in str(err.value)
+
+ select_basic = kx.q.qsql.select(table)
+ select_method = table.select()
+ assert q('~', select_basic, select_method)
+
+ select_basic_where = kx.q.qsql.select(table, where='col2<0.5')
+ select_method_where = table.select(where='col2<0.5')
+ assert q('~', select_basic_where, select_method_where)
+
+ select_basic_complex = kx.q.qsql.select(table,
+ columns={'avgCol2': 'avg col2', 'minCol4': 'min col4'}, # noqa: E501
+ by={'col1': 'col1'},
+ where='col3=0b')
+ select_method_complex = table.select(columns={'avgCol2': 'avg col2', 'minCol4': 'min col4'}, # noqa: E501
+ by={'col1': 'col1'},
+ where='col3=0b')
+ assert q('~', select_basic_complex, select_method_complex)
+
+ exec_basic = kx.q.qsql.exec(table)
+ exec_method = table.exec()
+ assert q('~', exec_basic, exec_method)
+
+ exec_basic_cols = kx.q.qsql.exec(table, {'symcol': 'col1', 'boolcol': 'col3'})
+ exec_method_cols = table.exec({'symcol': 'col1', 'boolcol': 'col3'})
+ assert q('~', exec_basic_cols, exec_method_cols)
+
+ exec_basic_complex = kx.q.qsql.exec(table,
+ columns={'avgCol2': 'avg col2', 'minCol4': 'min col4'}, # noqa: E501
+ by={'col1': 'col1'},
+ where='col3=0b')
+ exec_method_complex = table.exec(columns={'avgCol2': 'avg col2', 'minCol4': 'min col4'},
+ by={'col1': 'col1'},
+ where='col3=0b')
+ assert q('~', exec_basic_complex, exec_method_complex)
+
+ delete_tab = kx.Table(data={
+ 'name': ['tom', 'dick', 'harry'],
+ 'age': [28, 29, 35],
+ 'hair': ['fair', 'dark', 'fair'],
+ 'eye': ['green', 'brown', 'gray']})
+ delete_ktab = delete_tab.set_index('name')
+
+ for table in [delete_tab, delete_ktab]:
+ delete_basic = kx.q.qsql.delete(table)
+ delete_method = table.delete()
+ assert q('~', delete_basic, delete_method)
+
+ delete_basic_columns = kx.q.qsql.delete(table, ['age', 'eye'])
+ delete_method_columns = table.delete(['age', 'eye'])
+ assert q('~', delete_basic_columns, delete_method_columns)
+
+ delete_basic_where = kx.q.qsql.delete(table, where=['hair=`fair', 'age=28'])
+ delete_method_where = table.delete(where=['hair=`fair', 'age=28'])
+ assert q('~', delete_basic_where, delete_method_where)
+
+ update_tab = kx.Table(data={
+ 'name': ['tom', 'dick', 'harry'],
+ 'age': [28, 29, 35],
+ 'hair': ['fair', 'dark', 'fair'],
+ 'eye': ['green', 'brown', 'gray']})
+ update_ktab = update_tab.set_index('hair')
+
+ for table in [update_tab, update_ktab]:
+ update_basic = kx.q.qsql.update(table, {'eye': '`blue`brown`green'})
+ update_method = table.update({'eye': '`blue`brown`green'})
+ assert q('~', update_basic, update_method)
+
+ update_basic_by = kx.q.qsql.update(table, {'age': 'avg age'}, by={'hair': 'hair'})
+ update_method_by = table.update({'age': 'avg age'}, by={'hair': 'hair'})
+ assert q('~', update_basic_by, update_method_by)
+
@pytest.mark.filterwarnings('ignore:Splayed tables are not yet implemented')
class Test_SplayedTable:
@@ -2622,7 +2953,7 @@ def test_key_related_methods(self, q, tmp_path, kx):
assert list(t) == ['a', 'b', 'c']
assert len(t) == 3
- def test_not_implemented_methods(self, q, tmp_path):
+ def test_not_implemented_methods(self, q, tmp_path, kx):
t = self.create_splayed_table(q, tmp_path)
assert t._values is None
with pytest.raises(NotImplementedError):
@@ -2641,6 +2972,42 @@ def test_not_implemented_methods(self, q, tmp_path):
t.pd()
with pytest.raises(NotImplementedError):
t.py()
+ with pytest.raises(AttributeError):
+ t.add_prefix(prefix='test')
+ with pytest.raises(AttributeError):
+ t.add_suffix(suffix='test')
+ with pytest.raises(AttributeError):
+ t.agg('sum')
+ with pytest.raises(AttributeError):
+ t.apply(q.sqrt)
+ with pytest.raises(AttributeError):
+ t.cast(kx.PartitionedTable)
+ with pytest.raises(AttributeError):
+ t.count()
+ with pytest.raises(AttributeError):
+ t.drop_duplicates()
+ with pytest.raises(AttributeError):
+ t.exec()
+ with pytest.raises(AttributeError):
+ t.groupby()
+ with pytest.raises(AttributeError):
+ t.grouped()
+ with pytest.raises(AttributeError):
+ t.has_infs()
+ with pytest.raises(AttributeError):
+ t.has_nulls()
+ with pytest.raises(AttributeError):
+ t.merge(t)
+ with pytest.raises(AttributeError):
+ t.merge_asof(t)
+ with pytest.raises(AttributeError):
+ t.prototype()
+ with pytest.raises(AttributeError):
+ t.ungroup()
+ with pytest.raises(AttributeError):
+ t.upsert(0)
+ with pytest.raises(AttributeError):
+ t.window_join(t, t, 'test', t)
@pytest.mark.filterwarnings('ignore:(Splayed|Partitioned) tables are not yet implemented')
@@ -2660,7 +3027,7 @@ def test_key_related_methods(self, q, tmp_path, kx):
assert list(t) == ['a', 'b', 'c']
assert len(t) == 9
- def test_not_implemented_methods(self, q, tmp_path):
+ def test_not_implemented_methods(self, q, tmp_path, kx):
t = self.create_partitioned_table(q, tmp_path)
assert t._values is None
with pytest.raises(NotImplementedError):
@@ -2679,6 +3046,82 @@ def test_not_implemented_methods(self, q, tmp_path):
t.pd()
with pytest.raises(NotImplementedError):
t.py()
+ with pytest.raises(AttributeError):
+ t.add_prefix(prefix='test')
+ with pytest.raises(AttributeError):
+ t.add_suffix(suffix='test')
+ with pytest.raises(AttributeError):
+ t.agg('sum')
+ with pytest.raises(AttributeError):
+ t.apply(q.sqrt)
+ with pytest.raises(AttributeError):
+ t.cast(kx.PartitionedTable)
+ with pytest.raises(AttributeError):
+ t.count()
+ with pytest.raises(AttributeError):
+ t.drop_duplicates()
+ with pytest.raises(AttributeError):
+ t.exec()
+ with pytest.raises(AttributeError):
+ t.groupby()
+ with pytest.raises(AttributeError):
+ t.grouped()
+ with pytest.raises(AttributeError):
+ t.has_infs()
+ with pytest.raises(AttributeError):
+ t.has_nulls()
+ with pytest.raises(AttributeError):
+ t.merge(t)
+ with pytest.raises(AttributeError):
+ t.merge_asof(t)
+ with pytest.raises(AttributeError):
+ t.prototype()
+ with pytest.raises(AttributeError):
+ t.ungroup()
+ with pytest.raises(AttributeError):
+ t.upsert(0)
+ with pytest.raises(AttributeError):
+ t.window_join(t, t, 'test', t)
+ with pytest.raises(AttributeError):
+ t.astype(kx.CharVector)
+ with pytest.raises(AttributeError):
+ t.delete()
+ with pytest.raises(AttributeError):
+ t.drop()
+ with pytest.raises(AttributeError):
+ t.get(0)
+ with pytest.raises(AttributeError):
+ t.head()
+ with pytest.raises(AttributeError):
+ t.iloc()
+ with pytest.raises(AttributeError):
+ t.loc()
+ with pytest.raises(AttributeError):
+ t.mode()
+ with pytest.raises(AttributeError):
+ t.nlargest(n=2)
+ with pytest.raises(AttributeError):
+ t.nsmallest(n=2)
+ with pytest.raises(AttributeError):
+ t.sort_values()
+ with pytest.raises(AttributeError):
+ t.prod()
+ with pytest.raises(AttributeError):
+ t.sample()
+ with pytest.raises(AttributeError):
+ t.select_dtypes()
+ with pytest.raises(AttributeError):
+ t.sorted()
+ with pytest.raises(AttributeError):
+ t.sum()
+ with pytest.raises(AttributeError):
+ t.std()
+ with pytest.raises(AttributeError):
+ t.tail()
+ with pytest.raises(AttributeError):
+ t.unique()
+ with pytest.raises(AttributeError):
+ t.xbar(5)
class Test_Dictionary:
@@ -2912,7 +3355,7 @@ def test_multi_keyed_py(self, q):
def test_getting(self, kx, q):
kt = q(self.kt)
- assert kt[q('404')].py() == {'x': q('0N'), 'y': ''}
+ assert kt[q('404')].py() == {'x': pd.NA, 'y': ''}
assert kt[q('100')].py() == {'x': 0, 'y': 'singly'}
assert kt[q('enlist 100')].py() == {'x': [0], 'y': ['singly']}
assert kt[(100,)].py() == {'x': [0], 'y': ['singly']}
@@ -2929,8 +3372,10 @@ def test_multi_keyed_getting(self, kx, q):
def test_attributes(self, q, kx):
mkt = q(self.mkt)
- assert list(mkt) == [('a', 100), ('b', 101), ('a', 102)]
- assert mkt.keys() == [('a', 100), ('b', 101), ('a', 102)]
+ assert list(mkt) == [[kx.LongAtom(kx.q('0')), kx.SymbolAtom(kx.q('`multi'))],
+ [kx.LongAtom(kx.q('1')), kx.SymbolAtom(kx.q('`keyed'))],
+ [kx.LongAtom(kx.q('2')), kx.SymbolAtom(kx.q('`table'))]]
+ assert all(mkt.keys() == kx.q('([] k1:`a`b`a; k2: 100 101 102)'))
v1 = mkt.values().py()
v2 = {
'x': [0, 1, 2],
@@ -3054,6 +3499,21 @@ def test_keyed_table_constructor(self, kx, q):
columns=['idx', 'index']
).py() == q('([idx1: til 10] idx: til 10; index: 10 - til 10)').py()
+ def test_keyed_table_iterator(self, kx, q):
+ tab = q('([a:1 2 3 4] b:4#enlist 6 7 8 9; c:4#enlist til 4; d:til 4)')
+ res = []
+ for x in tab['d']:
+ res.append(x)
+ assert isinstance(x, kx.LongAtom)
+ assert (res == kx.q('til 4')).all()
+
+ res = []
+ for x in tab[['b', 'c']]:
+ res.append(x)
+ assert ((len(res) == 4) and len(res[0]) == 2)
+ assert (res[0][0] == kx.q('6 7 8 9')).all()
+ assert (res[0][1] == kx.q('til 4')).all()
+
class Test_Function:
def test_bool(self, q):
@@ -3069,8 +3529,8 @@ def test_bool(self, q):
assert q('{}\':')
assert q('{}/:')
assert q('{}\\:')
- assert q('.pykx.i.pyfunc')
- assert q.pykx.i.pyfunc
+ assert q('.pykx.util.isf')
+ assert q.pykx.util.isf
def test_call(self, q):
assert list(range(8)) == q('til')(8).py()
@@ -3188,7 +3648,7 @@ def test_all_function_types(self, q, kx):
assert isinstance(q('{}\':'), kx.EachPrior)
assert isinstance(q('{}/:'), kx.EachRight)
assert isinstance(q('{}\\:'), kx.EachLeft)
- assert isinstance(q('.pykx.i.pyfunc'), kx.Foreign)
+ assert isinstance(q('.pykx.util.isf'), kx.Foreign)
assert isinstance(q.Q.ajf0, kx.SymbolicFunction)
def test_args_property(self, q):
@@ -3225,7 +3685,7 @@ def test_func_property(self, kx, q):
f3 = q.pykx.modpow
assert f3(10, 2, 19) == f3(10, 2, 19) == 5
- f4 = q.pykx.i.pyfunc
+ f4 = q.pykx.util.isf
assert isinstance(f4, kx.SymbolicFunction)
assert not isinstance(f4, kx.Foreign)
assert isinstance(f4.func, kx.Foreign)
@@ -3308,6 +3768,306 @@ def test_symbolic_function(self, kx, q, q_port):
assert conn('testAlias')
+def test_nulls(kx, q, pa):
+
+ def compare_nulls(q_null, py_null):
+ assert type(q_null) == type(py_null)
+ if isinstance(py_null, (float, np.float32, np.float64)) and math.isnan(py_null):
+ assert math.isnan(q_null)
+ elif isinstance(py_null, (np.datetime64, np.timedelta64)):
+ assert np.isnat(q_null)
+ elif isinstance(py_null, (pa.lib.FloatScalar, pa.lib.DoubleScalar)):
+ assert pa.compute.is_nan(q_null)
+ elif isinstance(py_null, (type(pd.NaT), type(pd.NA))):
+ assert pd.isnull(q_null)
+ else:
+ assert q_null == py_null
+
+ # Atom Nulls
+ nulls = q('(0Ng;0Nh;0Ni;0Nj;0Ne;0n;" ";`;0Np;0Nm;0Nd;0Nn;0Nu;0Nv;0Nt)')
+ # Atom Nulls in mixed lists
+ mixed_nulls = q('{{(x;(::))} each x}', nulls)
+ # Nulls in typed vectors
+ typed_nulls = q('{enlist each x}', nulls)
+
+ # Atom Nulls .py()
+ py_nulls = [UUID(int=0), pd.NA, pd.NA, pd.NA, float('nan'), float('nan'), b' ',
+ '', pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT]
+ for i in range(len(nulls)):
+ q_null = nulls[i].py()
+ py_null = py_nulls[i]
+ compare_nulls(q_null, py_null)
+
+ # Nulls in List .py()
+ for i in range(len(mixed_nulls)):
+ q_null = mixed_nulls[i].py()[0]
+ py_null = py_nulls[i]
+ compare_nulls(q_null, py_null)
+
+ for i in range(len(typed_nulls)):
+ if isinstance(typed_nulls[i], kx.wrappers.CharVector):
+ q_null = typed_nulls[i].py()
+ else:
+ q_null = typed_nulls[i].py()[0]
+ py_null = py_nulls[i]
+ compare_nulls(q_null, py_null)
+
+ # Atom Nulls .np()
+ np_nulls = [UUID(int=0), np.int16(-32768), np.int32(-2147483648),
+ np.int64(-9223372036854775808), np.float32('nan'), np.float64('nan'),
+ b' ', '', np.datetime64('NaT'), np.datetime64('NaT'), np.datetime64('NaT'),
+ np.timedelta64('NaT'), np.timedelta64('NaT'), np.timedelta64('NaT'),
+ np.timedelta64('NaT')]
+ skip = []
+ for i in range(len(nulls)):
+ if i not in skip:
+ q_null = nulls[i].np()
+ py_null = np_nulls[i]
+ compare_nulls(q_null, py_null)
+
+ # Nulls in List .np()
+ for i in range(len(mixed_nulls)):
+ if i not in skip:
+ q_null = mixed_nulls[i].np()[0]
+ py_null = np_nulls[i]
+ compare_nulls(q_null, py_null)
+
+ np_nulls[6] = np.bytes_(' ')
+
+ # Nulls in Vectors .np()
+ skip = []
+ for i in range(len(typed_nulls)):
+ q_null = typed_nulls[i].np()
+ if isinstance(q_null, np.ma.core.MaskedArray):
+ q_null = q_null.filled()[0]
+ else:
+ q_null = q_null[0]
+ py_null = np_nulls[i]
+ compare_nulls(q_null, py_null)
+
+ # Atom Nulls .pd()
+ pd_nulls = [UUID(int=0), pd.NA, pd.NA, pd.NA, np.float32('nan'), np.float64('nan'), b' ',
+ '', pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT]
+ skip = []
+ for i in range(len(nulls)):
+ if i not in skip:
+ q_null = nulls[i].pd()
+ py_null = pd_nulls[i]
+ compare_nulls(q_null, py_null)
+
+ # Nulls in List .pd()
+ for i in range(len(mixed_nulls)):
+ if i not in skip:
+ q_null = mixed_nulls[i].pd()[0]
+ py_null = pd_nulls[i]
+ compare_nulls(q_null, py_null)
+
+ # Nulls in Vectors .pd()
+ skip = []
+ pd_nulls[6] = np.bytes_(' ')
+ for i in range(len(typed_nulls)):
+ q_null = typed_nulls[i].pd()[0]
+ py_null = pd_nulls[i]
+ compare_nulls(q_null, py_null)
+
+ # Atom Nulls .pa()
+ pa_nulls = [UUID(int=0), pd.NA, pd.NA, pd.NA, np.float32('nan'), np.float64('nan'), b' ',
+ '', pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT]
+ skip = []
+ for i in range(len(nulls)):
+ if i not in skip:
+ q_null = nulls[i].pa()
+ py_null = pa_nulls[i]
+ compare_nulls(q_null, py_null)
+
+ pa_nulls = [UUID(int=0), pa.array([None])[0], pa.array([None])[0],
+ pa.array([None])[0], pa.array([None], pa.float32())[0],
+ pa.array([None], pa.float64())[0], pa.array([b' '], pa.binary())[0],
+ pa.array([''], pa.string())[0], None, None, None,
+ None, None, None, None]
+
+ # Nulls in List .pa()
+ skip = [0, 8, 9, 10, 11, 12, 13, 14]
+ # 0 Could not convert UUID('00000000-0000-0000-0000-000000000000') with type UUID:
+ # did not recognize Python value type when inferring an Arrow data type
+ # 8, 9, 10 pyarrow.lib.ArrowNotImplementedError: Unbound or generic datetime64 time unit
+ # 11, 12, 13, 14 pyarrow.lib.ArrowNotImplementedError: Unbound or generic timedelta64 time unit
+ for i in range(len(mixed_nulls)):
+ if i not in skip:
+ q_null = mixed_nulls[i].pa()[0]
+ py_null = pa_nulls[i]
+ compare_nulls(q_null, py_null)
+
+ # Nulls in Vectors .pa()
+ skip = [9, 12]
+ # 9 Month - pyarrow.lib.ArrowNotImplementedError: Unsupported datetime64 time unit
+ # 12 Minute - pyarrow.lib.ArrowNotImplementedError: Unsupported timedelta64 time unit
+ pa_nulls = [pa.ExtensionArray.from_storage(
+ kx.wrappers.arrow_uuid_type,
+ pa.array([b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'],
+ pa.binary(16)))[0],
+ pa.array([None], pa.int16())[0], pa.array([None], pa.int32())[0],
+ pa.array([None], pa.int64())[0], pa.array([None], pa.float32())[0],
+ pa.array([None], pa.float64())[0], pa.array([b' '], pa.binary())[0],
+ pa.array([''], pa.string())[0], pa.array([None], pa.timestamp('ns'))[0], None,
+ pa.array([None], pa.date32())[0], pa.array([None], pa.duration('ns'))[0],
+ pa.array([None], pa.duration('ns'))[0], pa.array([None], pa.duration('s'))[0],
+ pa.array([None], pa.duration('ms'))[0]]
+ for i in range(len(typed_nulls)):
+ if i not in skip:
+ q_null = typed_nulls[i].pa()[0]
+ py_null = pa_nulls[i]
+ compare_nulls(q_null, py_null)
+
+
+def test_infinites(kx, q, pa):
+ import datetime
+
+ def compare_infinites(q_infinite, py_infinite):
+ assert type(q_infinite) == type(py_infinite)
+ assert q_infinite == py_infinite
+
+ def compare_all_infinites(q_infinites, py_infinites, conv, ind=None, skip=None):
+ for i in range(len(py_infinites)):
+ if skip is None or i not in skip:
+ if ind is None:
+ q_infinite = getattr(q_infinites[i], conv)()
+ py_infinite = py_infinites[i]
+ else:
+ q_infinite = getattr(q_infinites[i], conv)()[ind]
+ py_infinite = py_infinites[i][ind]
+ compare_infinites(q_infinite, py_infinite)
+
+ # Atom infinites
+ positive_infinites = q('(0Wh;0Wi;0Wj;0We;0w;0Wp;0Wm;0Wd;0Wn;0Wu;0Wv;0Wt)')
+ negative_infinites = q('(-0Wh;-0Wi;-0Wj;-0We;-0w;-0Wp;-0Wm;-0Wd;-0Wn;-0Wu;-0Wv;-0Wt)')
+ # infinites in typed vectors
+ infinites = q('{flip (x;y)}', positive_infinites, negative_infinites)
+
+ # Atom infinites .py()
+ py_positive_infinites = [math.inf, math.inf, math.inf, float('inf'), float('inf'),
+ datetime.datetime(2262, 4, 11, 23, 47, 16, 854775), 2147484007,
+ 2147494604, datetime.timedelta(106751, 16, 854775, 0, 47, 23),
+ datetime.timedelta(-3220, 4, 33138, 0, 5, 5),
+ datetime.timedelta(24855, 7, 0, 0, 14, 3),
+ datetime.timedelta(24, 23, 647000, 0, 31, 20)]
+ compare_all_infinites(positive_infinites, py_positive_infinites, 'py')
+
+ py_negative_infinites = [-math.inf, -math.inf, -math.inf, float('-inf'), float('-inf'),
+ datetime.datetime(1707, 9, 22, 0, 12, 43, 145224), -2147483287,
+ -2147472690, datetime.timedelta(-106752, 43, 145224, 0, 12),
+ datetime.timedelta(3219, 55, 966861, 0, 54, 18),
+ datetime.timedelta(-24856, 53, 0, 0, 45, 20),
+ datetime.timedelta(-25, 36, 353000, 0, 28, 3)]
+ compare_all_infinites(negative_infinites, py_negative_infinites, 'py')
+
+ # infinites in Vectors .py()
+ py_infinites = [[x, y] for x, y in zip(py_positive_infinites, py_negative_infinites)]
+ py_infinites[5][0] = datetime.datetime(1707, 9, 22, 0, 12, 43, 145224)
+ compare_all_infinites(infinites, py_infinites, 'py', ind=0)
+ py_infinites[5][1] = datetime.datetime(1707, 9, 22, 0, 12, 43, 145224)
+ compare_all_infinites(infinites, py_infinites, 'py', ind=1)
+
+ # Atom infinites .np()
+ np_positive_infinites = [np.int16(32767), np.int32(2147483647), np.int64(9223372036854775807),
+ np.float32('inf'), np.float64('inf'),
+ np.datetime64('2262-04-11T23:47:16.854775807'),
+ np.datetime64('178958970-08'), np.datetime64('5881610-07-11'),
+ np.timedelta64(9223372036854775807, 'ns'),
+ np.timedelta64(2147483647, 'm'), np.timedelta64(2147483647, 's'),
+ np.timedelta64(2147483647, 'ms')]
+ np_negative_infinites = [np.int16(-32767), np.int32(-2147483647),
+ np.int64(-9223372036854775807),
+ np.float32('-inf'), np.float64('-inf'),
+ np.datetime64('1707-09-22T00:12:43.145224193'),
+ np.datetime64('-178954971-06'), np.datetime64('-5877611-06-23'),
+ np.timedelta64(-9223372036854775807, 'ns'),
+ np.timedelta64(-2147483647, 'm'), np.timedelta64(-2147483647, 's'),
+ np.timedelta64(-2147483647, 'ms')]
+ skip = []
+ compare_all_infinites(positive_infinites, np_positive_infinites, 'np', skip=skip)
+ compare_all_infinites(negative_infinites, np_negative_infinites, 'np', skip=skip)
+
+ # infinites in Vectors .np()
+ skip = []
+ np_infinites = [[x, y] for x, y in zip(np_positive_infinites, np_negative_infinites)]
+ np_infinites[5][0] = np.datetime64('1707-09-22T00:12:43.145224191')
+ np_infinites[6][0] = np.datetime64('-178954971-04')
+ np_infinites[7][0] = np.datetime64('-5877611-06-21')
+ compare_all_infinites(infinites, np_infinites, 'np', ind=0)
+ np_infinites[5][1] = np.datetime64('1707-09-22T00:12:43.145224193')
+ compare_all_infinites(infinites, np_infinites, 'np', ind=1)
+
+ # Atom infinites .pd()
+ pd_positive_infinites = [np.int16(32767), np.int32(2147483647), np.int64(9223372036854775807),
+ np.float32('inf'), np.float64('inf'),
+ pd.Timestamp('2262-04-11T23:47:16.854775807'), None, None,
+ pd.Timedelta(9223372036854775807, 'ns'), None,
+ pd.Timedelta(2147483647, 's'), pd.Timedelta(2147483647, 'ms')]
+ pd_negative_infinites = [np.int16(-32767), np.int32(-2147483647),
+ np.int64(-9223372036854775807), np.float32('-inf'),
+ np.float64('-inf'), pd.Timestamp('1707-09-22 00:12:43.145224193'),
+ None, None, pd.Timedelta(-9223372036854775807, 'ns'), None,
+ pd.Timedelta(-2147483647, 's'), pd.Timedelta(-2147483647, 'ms')]
+ skip = [6, 7, 9]
+ # 6 7 9 Values out of range - Pandas constructors block them
+ compare_all_infinites(positive_infinites, pd_positive_infinites, 'pd', skip=skip)
+ compare_all_infinites(negative_infinites, pd_negative_infinites, 'pd', skip=skip)
+
+ # infinites in Vectors .pd()
+ skip = [6, 7, 9] # 6 7 9 Values out of range - Pandas constructors block them
+ pd_infinites = [[x, y] for x, y in zip(pd_positive_infinites, pd_negative_infinites)]
+ pd_infinites[5][0] = pd.Timestamp('1707-09-22T00:12:43.145224191')
+ compare_all_infinites(infinites, pd_infinites, 'pd', ind=0, skip=skip)
+ pd_infinites[5][1] = pd.Timestamp('1707-09-22 00:12:43.145224193')
+ compare_all_infinites(infinites, pd_infinites, 'pd', ind=1, skip=skip)
+
+ # Atom infinites .pa()
+ pa_positive_infinites = [np.int16(32767), np.int32(2147483647), np.int64(9223372036854775807),
+ np.float32('inf'), np.float64('inf'),
+ pd.Timestamp('2262-04-11T23:47:16.854775807'),
+ None, None, pd.Timedelta(9223372036854775807, 'ns'), None,
+ pd.Timedelta(2147483647, 's'), pd.Timedelta(2147483647, 'ms')]
+ pa_negative_infinites = [np.int16(-32767), np.int32(-2147483647),
+ np.int64(-9223372036854775807),
+ np.float32('-inf'), np.float64('-inf'),
+ pd.Timestamp('1707-09-22 00:12:43.145224193'),
+ None, None, pd.Timedelta(-9223372036854775807, 'ns'), None,
+ pd.Timedelta(-2147483647, 's'),
+ pd.Timedelta(-2147483647, 'ms')]
+ skip = [6, 7, 9]
+ # 6, 7, 9 .pa runs but creates out of bounds objects
+ compare_all_infinites(positive_infinites, pa_positive_infinites, 'pa', skip=skip)
+ compare_all_infinites(negative_infinites, pa_negative_infinites, 'pa', skip=skip)
+
+ # infinites in Vectors .pa()
+ skip = [5, 6, 7, 9]
+ # 5, 6 pyarrow.lib.ArrowNotImplementedError: Unsupported datetime64 time unit
+ # 7 OverflowError: days=-2147472692; must have magnitude <= 999999999
+ # 9 pyarrow.lib.ArrowNotImplementedError: Unsupported timedelta64 time unit
+ pa_infinites = [[x, y] for x, y in zip(pa_positive_infinites, pa_negative_infinites)]
+ pa_infinites[0][0] = pa.array([32767], pa.int16())[0]
+ pa_infinites[1][0] = pa.array([2147483647], pa.int32())[0]
+ pa_infinites[2][0] = pa.array([9223372036854775807], pa.int64())[0]
+ pa_infinites[3][0] = pa.array([np.float32('inf')], pa.float32())[0]
+ pa_infinites[4][0] = pa.array([np.float64('inf')], pa.float64())[0]
+ pa_infinites[5][0] = pa.array([-9223372036854775809+946684800000000000], pa.timestamp('ns'))[0]
+ pa_infinites[8][0] = pa.array([9223372036854775807], pa.duration('ns'))[0]
+ pa_infinites[10][0] = pa.array([2147483647], pa.duration('s'))[0]
+ pa_infinites[11][0] = pa.array([2147483647], pa.duration('ms'))[0]
+ compare_all_infinites(infinites, pa_infinites, 'pa', ind=0, skip=skip)
+ pa_infinites[0][1] = pa.array([-32767], pa.int16())[0]
+ pa_infinites[1][1] = pa.array([-2147483647], pa.int32())[0]
+ pa_infinites[2][1] = pa.array([-9223372036854775807], pa.int64())[0]
+ pa_infinites[3][1] = pa.array([np.float32('-inf')], pa.float32())[0]
+ pa_infinites[4][1] = pa.array([np.float64('-inf')], pa.float64())[0]
+ pa_infinites[5][1] = pa.array([-9223372036854775809+946684800000000000], pa.timestamp('ns'))[0]
+ pa_infinites[8][1] = pa.array([-9223372036854775807], pa.duration('ns'))[0]
+ pa_infinites[10][1] = pa.array([-2147483647], pa.duration('s'))[0]
+ pa_infinites[11][1] = pa.array([-2147483647], pa.duration('ms'))[0]
+ compare_all_infinites(infinites, pa_infinites, 'pa', ind=1, skip=skip)
+
+
# Conversions of nested K lists requires a license, We need to be able to call
# __getitem__ on the list to get correctly typed Numpy arrays to use.
@pytest.mark.licensed
@@ -3806,6 +4566,8 @@ def test_magic_dates_times(kx):
assert curr_time <= kx.TimeAtom('now')
curr_tstamp = kx.q('.z.P')
assert curr_tstamp <= kx.TimestampAtom('now')
+ curr_tspan = kx.q('.z.N')
+ assert curr_tspan <= kx.TimespanAtom('now')
def checkHTML(tab):
@@ -3817,16 +4579,20 @@ def checkHTML(tab):
def test_repr_html(kx, q):
H = 10
W = 20
+ console = q.system.console_size.py()
q.system.console_size = [H, W]
# Many datatypes
- q('t:flip {(`$/:t)!2#/:(t:{x where not x in " z"}.Q.t)$\\:()}[]')
- q('T:flip {(`$/:upper t)!2#/:enlist each 2#/:(t:{x where not x in " z"}.Q.t)$\\:()}[]')
+ q('typs:{neg[1]_x where not x in " z"}.Q.t') # Remove duplicate "s" column
+ q('tcols:`$/:typs')
+ q('tcols:@[tcols;where tcols=`i;:;`int]') # Removal of i is to limit issue with .Q.id
+ q('t:flip {tcols!2#/:typs$\\:()}[]')
+ q('T:flip {(upper tcols)!2#/:enlist each 2#/:typs$\\:()}[]')
q('wideManyDatatypes:t,\'T')
- tab = q('wideManyDatatypes')
+ tab = q['wideManyDatatypes']
# (rows, headers, details)
- assert (3, 44, 40) == checkHTML(tab)
+ assert (3, 46, 42) == checkHTML(tab)
# Single column table
q('singleColTab:([] a:.z.d-til 2000)')
@@ -3836,10 +4602,14 @@ def test_repr_html(kx, q):
assert (2, 5, 1) == checkHTML(tab)
tab = q('2#singleColTab')
assert (3, 6, 2) == checkHTML(tab)
- tab = q('10#singleColTab')
+ tab = q('9#singleColTab')
assert (H, 13, 9) == checkHTML(tab)
- tab = q('11#singleColTab')
+ tab = q('10#singleColTab')
assert (H+1, 14, 10) == checkHTML(tab)
+ tab = q('11#singleColTab')
+ assert (H+2, 15, 11) == checkHTML(tab)
+ tab = q('12#singleColTab')
+ assert (H+2, 15, 11) == checkHTML(tab)
# Multi column table
q('multiColTab:([] a:.z.d-til 2000; sym:2000?`7)')
@@ -3850,9 +4620,9 @@ def test_repr_html(kx, q):
tab = q('2#multiColTab')
assert (3, 8, 4) == checkHTML(tab)
tab = q('10#multiColTab')
- assert (H, 15, 18) == checkHTML(tab)
- tab = q('11#multiColTab')
assert (H+1, 16, 20) == checkHTML(tab)
+ tab = q('11#multiColTab')
+ assert (H+2, 17, 22) == checkHTML(tab)
q('n:-1+last system"c";extraWide:flip (`$"col",/:string 1+til n)!n#enlist til 1000')
tab = q('0#extraWide')
@@ -3861,10 +4631,14 @@ def test_repr_html(kx, q):
assert (2, 41, 19) == checkHTML(tab)
tab = q('2#extraWide')
assert (3, 42, 38) == checkHTML(tab)
- tab = q('10#extraWide')
+ tab = q('9#extraWide')
assert (H, 49, 171) == checkHTML(tab)
- tab = q('11#extraWide')
+ tab = q('10#extraWide')
assert (H+1, 50, 190) == checkHTML(tab)
+ tab = q('11#extraWide')
+ assert (H+2, 51, 209) == checkHTML(tab)
+ tab = q('11#extraWide')
+ assert (H+2, 51, 209) == checkHTML(tab)
q('n:last system"c";extraWide:flip (`$"col",/:string 1+til n)!n#enlist til 1000')
tab = q('0#extraWide')
@@ -3874,9 +4648,11 @@ def test_repr_html(kx, q):
tab = q('2#extraWide')
assert (3, 44, 40) == checkHTML(tab)
tab = q('10#extraWide')
- assert (H, 51, 180) == checkHTML(tab)
- tab = q('11#extraWide')
assert (H+1, 52, 200) == checkHTML(tab)
+ tab = q('11#extraWide')
+ assert (H+2, 53, 220) == checkHTML(tab)
+ tab = q('12#extraWide')
+ assert (H+2, 53, 220) == checkHTML(tab)
q('n:1+last system"c";extraWide:flip (`$"col",/:string 1+til n)!n#enlist til 1000')
tab = q('0#extraWide')
@@ -3885,10 +4661,14 @@ def test_repr_html(kx, q):
assert (2, 45, 21) == checkHTML(tab)
tab = q('2#extraWide')
assert (3, 46, 42) == checkHTML(tab)
- tab = q('10#extraWide')
+ tab = q('9#extraWide')
assert (H, 53, 189) == checkHTML(tab)
- tab = q('11#extraWide')
+ tab = q('10#extraWide')
assert (H+1, 54, 210) == checkHTML(tab)
+ tab = q('11#extraWide')
+ assert (H+2, 55, 231) == checkHTML(tab)
+ tab = q('12#extraWide')
+ assert (H+2, 55, 231) == checkHTML(tab)
q('n:50+last system"c";extraWide:flip (`$"col",/:string 1+til n)!n#enlist til 1000')
tab = q('0#extraWide')
@@ -3897,10 +4677,14 @@ def test_repr_html(kx, q):
assert (2, 45, 21) == checkHTML(tab)
tab = q('2#extraWide')
assert (3, 46, 42) == checkHTML(tab)
- tab = q('10#extraWide')
+ tab = q('9#extraWide')
assert (H, 53, 189) == checkHTML(tab)
- tab = q('11#extraWide')
+ tab = q('10#extraWide')
assert (H+1, 54, 210) == checkHTML(tab)
+ tab = q('11#extraWide')
+ assert (H+2, 55, 231) == checkHTML(tab)
+ tab = q('12#extraWide')
+ assert (H+2, 55, 231) == checkHTML(tab)
# Many keys
tab = q('(-1+last system"c")!0#extraWide')
@@ -3909,10 +4693,14 @@ def test_repr_html(kx, q):
assert (2, 61, 2) == checkHTML(tab)
tab = q('(-1+last system"c")!2#extraWide')
assert (3, 80, 4) == checkHTML(tab)
- tab = q('(-1+last system"c")!10#extraWide')
+ tab = q('(-1+last system"c")!9#extraWide')
assert (H, 213, 18) == checkHTML(tab)
- tab = q('(-1+last system"c")!11#extraWide')
+ tab = q('(-1+last system"c")!10#extraWide')
assert (H+1, 232, 20) == checkHTML(tab)
+ tab = q('(-1+last system"c")!11#extraWide')
+ assert (H+2, 251, 22) == checkHTML(tab)
+ tab = q('(-1+last system"c")!12#extraWide')
+ assert (H+2, 251, 22) == checkHTML(tab)
tab = q('(last system"c")!0#extraWide')
assert (1, 42, 0) == checkHTML(tab)
@@ -3920,10 +4708,14 @@ def test_repr_html(kx, q):
assert (2, 61, 2) == checkHTML(tab)
tab = q('(last system"c")!2#extraWide')
assert (3, 80, 4) == checkHTML(tab)
- tab = q('(last system"c")!10#extraWide')
+ tab = q('(last system"c")!9#extraWide')
assert (H, 213, 18) == checkHTML(tab)
- tab = q('(last system"c")!11#extraWide')
+ tab = q('(last system"c")!10#extraWide')
assert (H+1, 232, 20) == checkHTML(tab)
+ tab = q('(last system"c")!11#extraWide')
+ assert (H+2, 251, 22) == checkHTML(tab)
+ tab = q('(last system"c")!12#extraWide')
+ assert (H+2, 251, 22) == checkHTML(tab)
tab = q('(1+last system"c")!0#extraWide')
assert (1, 42, 0) == checkHTML(tab)
@@ -3931,10 +4723,14 @@ def test_repr_html(kx, q):
assert (2, 61, 2) == checkHTML(tab)
tab = q('(1+last system"c")!2#extraWide')
assert (3, 80, 4) == checkHTML(tab)
- tab = q('(1+last system"c")!10#extraWide')
+ tab = q('(1+last system"c")!9#extraWide')
assert (H, 213, 18) == checkHTML(tab)
- tab = q('(1+last system"c")!11#extraWide')
+ tab = q('(1+last system"c")!10#extraWide')
assert (H+1, 232, 20) == checkHTML(tab)
+ tab = q('(1+last system"c")!11#extraWide')
+ assert (H+2, 251, 22) == checkHTML(tab)
+ tab = q('(1+last system"c")!12#extraWide')
+ assert (H+2, 251, 22) == checkHTML(tab)
# Dictionaries
assert 'Empty pykx.Dictionary: ' == q('()!()')._repr_html_()[:26]
@@ -3970,10 +4766,14 @@ def test_repr_html(kx, q):
assert (2, 5, 1) == checkHTML(tab)
tab = q('2#singleKeyTab')
assert (3, 6, 2) == checkHTML(tab)
- tab = q('10#singleKeyTab')
+ tab = q('9#singleKeyTab')
assert (H, 13, 9) == checkHTML(tab)
- tab = q('11#singleKeyTab')
+ tab = q('10#singleKeyTab')
assert (H+1, 14, 10) == checkHTML(tab)
+ tab = q('11#singleKeyTab')
+ assert (H+2, 15, 11) == checkHTML(tab)
+ tab = q('12#singleKeyTab')
+ assert (H+2, 15, 11) == checkHTML(tab)
# Multi Key
q('multiKeyTab:`sym`blah xkey ([] a:.z.d-til 2000; sym:2000?`7;blah:-2000?1000000)')
@@ -3983,10 +4783,14 @@ def test_repr_html(kx, q):
assert (2, 8, 1) == checkHTML(tab)
tab = q('2#multiKeyTab')
assert (3, 10, 2) == checkHTML(tab)
- tab = q('10#multiKeyTab')
+ tab = q('9#multiKeyTab')
assert (H, 24, 9) == checkHTML(tab)
- tab = q('11#multiKeyTab')
+ tab = q('10#multiKeyTab')
assert (H+1, 26, 10) == checkHTML(tab)
+ tab = q('11#multiKeyTab')
+ assert (H+2, 28, 11) == checkHTML(tab)
+ tab = q('12#multiKeyTab')
+ assert (H+2, 28, 11) == checkHTML(tab)
# Single column splay table
tab = q('{x set 0#([] a:.z.d-til 2000);get x}`:singleColSplay/')
@@ -3995,10 +4799,14 @@ def test_repr_html(kx, q):
assert (2, 5, 1) == checkHTML(tab)
tab = q('{x set 2#([] a:.z.d-til 2000);get x}`:singleColSplay/')
assert (3, 6, 2) == checkHTML(tab)
- tab = q('{x set 10#([] a:.z.d-til 2000);get x}`:singleColSplay/')
+ tab = q('{x set 9#([] a:.z.d-til 2000);get x}`:singleColSplay/')
assert (H, 13, 9) == checkHTML(tab)
- tab = q('{x set 11#([] a:.z.d-til 2000);get x}`:singleColSplay/')
+ tab = q('{x set 10#([] a:.z.d-til 2000);get x}`:singleColSplay/')
assert (H+1, 14, 10) == checkHTML(tab)
+ tab = q('{x set 11#([] a:.z.d-til 2000);get x}`:singleColSplay/')
+ assert (H+2, 15, 11) == checkHTML(tab)
+ tab = q('{x set 12#([] a:.z.d-til 2000);get x}`:singleColSplay/')
+ assert (H+2, 15, 11) == checkHTML(tab)
# Multi column splay
tab = q('{x set 0#([] a:.z.d-til 2000; b:til 2000);get x}`:multiColSplay/')
@@ -4007,10 +4815,14 @@ def test_repr_html(kx, q):
assert (2, 7, 2) == checkHTML(tab)
tab = q('{x set 2#([] a:.z.d-til 2000; b:til 2000);get x}`:multiColSplay/')
assert (3, 8, 4) == checkHTML(tab)
- tab = q('{x set 10#([] a:.z.d-til 2000; b:til 2000);get x}`:multiColSplay/')
+ tab = q('{x set 9#([] a:.z.d-til 2000; b:til 2000);get x}`:multiColSplay/')
assert (H, 15, 18) == checkHTML(tab)
- tab = q('{x set 11#([] a:.z.d-til 2000; b:til 2000);get x}`:multiColSplay/')
+ tab = q('{x set 10#([] a:.z.d-til 2000; b:til 2000);get x}`:multiColSplay/')
assert (H+1, 16, 20) == checkHTML(tab)
+ tab = q('{x set 11#([] a:.z.d-til 2000; b:til 2000);get x}`:multiColSplay/')
+ assert (H+2, 17, 22) == checkHTML(tab)
+ tab = q('{x set 12#([] a:.z.d-til 2000; b:til 2000);get x}`:multiColSplay/')
+ assert (H+2, 17, 22) == checkHTML(tab)
q('n:-1+last system"c";extraWide:flip (`$"col",/:string 1+til n)!n#enlist til 1000')
tab = q('{x set y;get x}[`:multiColSplay/]0#extraWide')
@@ -4019,10 +4831,14 @@ def test_repr_html(kx, q):
assert (2, 41, 19) == checkHTML(tab)
tab = q('{x set y;get x}[`:multiColSplay/]2#extraWide')
assert (3, 42, 38) == checkHTML(tab)
- tab = q('{x set y;get x}[`:multiColSplay/]10#extraWide')
+ tab = q('{x set y;get x}[`:multiColSplay/]9#extraWide')
assert (H, 49, 171) == checkHTML(tab)
- tab = q('{x set y;get x}[`:multiColSplay/]11#extraWide')
+ tab = q('{x set y;get x}[`:multiColSplay/]10#extraWide')
assert (H+1, 50, 190) == checkHTML(tab)
+ tab = q('{x set y;get x}[`:multiColSplay/]11#extraWide')
+ assert (H+2, 51, 209) == checkHTML(tab)
+ tab = q('{x set y;get x}[`:multiColSplay/]12#extraWide')
+ assert (H+2, 51, 209) == checkHTML(tab)
q('n:last system"c";extraWide:flip (`$"col",/:string 1+til n)!n#enlist til 1000')
tab = q('{x set y;get x}[`:multiColSplay/]0#extraWide')
@@ -4031,10 +4847,14 @@ def test_repr_html(kx, q):
assert (2, 43, 20) == checkHTML(tab)
tab = q('{x set y;get x}[`:multiColSplay/]2#extraWide')
assert (3, 44, 40) == checkHTML(tab)
- tab = q('{x set y;get x}[`:multiColSplay/]10#extraWide')
+ tab = q('{x set y;get x}[`:multiColSplay/]9#extraWide')
assert (H, 51, 180) == checkHTML(tab)
- tab = q('{x set y;get x}[`:multiColSplay/]11#extraWide')
+ tab = q('{x set y;get x}[`:multiColSplay/]10#extraWide')
assert (H+1, 52, 200) == checkHTML(tab)
+ tab = q('{x set y;get x}[`:multiColSplay/]11#extraWide')
+ assert (H+2, 53, 220) == checkHTML(tab)
+ tab = q('{x set y;get x}[`:multiColSplay/]12#extraWide')
+ assert (H+2, 53, 220) == checkHTML(tab)
q('n:1+last system"c";extraWide:flip (`$"col",/:string 1+til n)!n#enlist til 1000')
tab = q('{x set y;get x}[`:multiColSplay/]0#extraWide')
@@ -4043,10 +4863,14 @@ def test_repr_html(kx, q):
assert (2, 45, 21) == checkHTML(tab)
tab = q('{x set y;get x}[`:multiColSplay/]2#extraWide')
assert (3, 46, 42) == checkHTML(tab)
- tab = q('{x set y;get x}[`:multiColSplay/]10#extraWide')
+ tab = q('{x set y;get x}[`:multiColSplay/]9#extraWide')
assert (H, 53, 189) == checkHTML(tab)
- tab = q('{x set y;get x}[`:multiColSplay/]11#extraWide')
+ tab = q('{x set y;get x}[`:multiColSplay/]10#extraWide')
assert (H+1, 54, 210) == checkHTML(tab)
+ tab = q('{x set y;get x}[`:multiColSplay/]11#extraWide')
+ assert (H+2, 55, 231) == checkHTML(tab)
+ tab = q('{x set y;get x}[`:multiColSplay/]12#extraWide')
+ assert (H+2, 55, 231) == checkHTML(tab)
q('n:50+last system"c";extraWide:flip (`$"col",/:string 1+til n)!n#enlist til 1000')
tab = q('{x set y;get x}[`:multiColSplay/]0#extraWide')
@@ -4055,10 +4879,14 @@ def test_repr_html(kx, q):
assert (2, 45, 21) == checkHTML(tab)
tab = q('{x set y;get x}[`:multiColSplay/]2#extraWide')
assert (3, 46, 42) == checkHTML(tab)
- tab = q('{x set y;get x}[`:multiColSplay/]10#extraWide')
+ tab = q('{x set y;get x}[`:multiColSplay/]9#extraWide')
assert (H, 53, 189) == checkHTML(tab)
- tab = q('{x set y;get x}[`:multiColSplay/]11#extraWide')
+ tab = q('{x set y;get x}[`:multiColSplay/]10#extraWide')
assert (H+1, 54, 210) == checkHTML(tab)
+ tab = q('{x set y;get x}[`:multiColSplay/]11#extraWide')
+ assert (H+2, 55, 231) == checkHTML(tab)
+ tab = q('{x set y;get x}[`:multiColSplay/]12#extraWide')
+ assert (H+2, 55, 231) == checkHTML(tab)
# Syms and enums
q('enums:`sym?`aa`cc`bb')
@@ -4139,6 +4967,11 @@ def test_repr_html(kx, q):
tab = q('partitionedTab')
assert (3, 8, 4) == checkHTML(tab)
+ q('(`$":2001.01.01/partitionedTab/") set 9#([] a:.z.d-til 2000;b:til 2000)')
+ q('system"l ."')
+ tab = q('partitionedTab')
+ assert (H, 15, 18) == checkHTML(tab)
+
q('(`$":2001.01.01/partitionedTab/") set 10#([] a:.z.d-til 2000;b:til 2000)')
q('system"l ."')
tab = q('partitionedTab')
@@ -4147,7 +4980,12 @@ def test_repr_html(kx, q):
q('(`$":2001.01.01/partitionedTab/") set 11#([] a:.z.d-til 2000;b:til 2000)')
q('system"l ."')
tab = q('partitionedTab')
- assert (H+1, 16, 20) == checkHTML(tab)
+ assert (H+2, 17, 22) == checkHTML(tab)
+
+ q('(`$":2001.01.01/partitionedTab/") set 12#([] a:.z.d-til 2000;b:til 2000)')
+ q('system"l ."')
+ tab = q('partitionedTab')
+ assert (H+2, 17, 22) == checkHTML(tab)
q('(`$":2001.01.01/partitionedTab/") set 0#([] a:.z.d-til 2000;b:til 2000)')
q('(`$":2001.01.02/partitionedTab/") set 0#([] a:.z.d-til 2000;b:til 2000)')
@@ -4161,11 +4999,23 @@ def test_repr_html(kx, q):
tab = q('partitionedTab')
assert (2, 9, 3) == checkHTML(tab)
+ q('(`$":2001.01.01/partitionedTab/") set 4#([] a:.z.d-til 2000;b:til 2000)')
+ q('(`$":2001.01.02/partitionedTab/") set 5#([] a:.z.d-til 2000;b:til 2000)')
+ q('system"l ."')
+ tab = q('partitionedTab')
+ assert (H, 17, 27) == checkHTML(tab)
+
+ q('(`$":2001.01.01/partitionedTab/") set 5#([] a:.z.d-til 2000;b:til 2000)')
+ q('(`$":2001.01.02/partitionedTab/") set 5#([] a:.z.d-til 2000;b:til 2000)')
+ q('system"l ."')
+ tab = q('partitionedTab')
+ assert (H+1, 18, 30) == checkHTML(tab)
+
q('(`$":2001.01.01/partitionedTab/") set 10#([] a:.z.d-til 2000;b:til 2000)')
q('(`$":2001.01.02/partitionedTab/") set 10#([] a:.z.d-til 2000;b:til 2000)')
q('system"l ."')
tab = q('partitionedTab')
- assert (H+1, 18, 30) == checkHTML(tab)
+ assert (H+2, 19, 33) == checkHTML(tab)
q('(`$":2001.01.01/partitionedTab/") set 1#([] a:.z.d-til 2000;b:til 2000)')
q('(`$":2001.01.02/partitionedTab/") set 0#([] a:.z.d-til 2000;b:til 2000)')
@@ -4177,19 +5027,19 @@ def test_repr_html(kx, q):
q('(`$":2001.01.02/partitionedTab/") set 0#([] a:.z.d-til 2000;b:til 2000)')
q('system"l ."')
tab = q('partitionedTab')
- assert (H+1, 18, 30) == checkHTML(tab)
+ assert (H+2, 19, 33) == checkHTML(tab)
q('(`$":2001.01.01/partitionedTab/") set 0#([] a:.z.d-til 2000;b:til 2000)')
q('(`$":2001.01.02/partitionedTab/") set 35#([] a:.z.d-til 2000;b:til 2000)')
q('system"l ."')
tab = q('partitionedTab')
- assert (H+1, 18, 30) == checkHTML(tab)
+ assert (H+2, 19, 33) == checkHTML(tab)
q('(`$":2001.01.01/partitionedTab/") set 11#([] a:.z.d-til 2000;b:til 2000)')
q('(`$":2001.01.02/partitionedTab/") set 11#([] a:.z.d-til 2000;b:til 2000)')
q('system"l ."')
tab = q('partitionedTab')
- assert (H+1, 18, 30) == checkHTML(tab)
+ assert (H+2, 19, 33) == checkHTML(tab)
q('(`$":2001.01.01/partitionedTab/") set 0#extraWide')
q('(`$":2001.01.02/partitionedTab/") set 0#extraWide')
@@ -4203,6 +5053,8 @@ def test_repr_html(kx, q):
tab = q('partitionedTab')
assert (6, 49, 105) == checkHTML(tab)
+ q.system.console_size = console
+
@pytest.mark.unlicensed
@pytest.mark.skipif(pd.__version__[0] == '1', reason="Only supported from Pandas 2.* onwards")
@@ -4230,7 +5082,6 @@ def gen_q_datatypes_table(q, table_name: str, num_rows: int = 100) -> str:
q('tab: (til 100)!(tab)')
-@pytest.mark.unlicensed
@pytest.mark.skipif(pd.__version__[0] == '1', reason="Only supported from Pandas 2.* onwards")
def test_pyarrow_pandas_all(q):
def gen_q_datatypes_table(q, table_name: str, num_rows: int = 100) -> str:
@@ -4342,6 +5193,18 @@ def test_pyarrow_pandas_table_roundtrip(kx):
assert (tab[x] == tab2[x]).all()
+@pytest.mark.unlicensed
+def test_pyarrow_list_error(kx, q_port):
+ with kx.QConnection(port=q_port) as q:
+ qlist1 = q('(.z.p;4)')
+ qlist2 = q('(4;.z.p)')
+ qtab = q('([](1;2.0;`a);1 2 3)')
+ for i in [qlist1, qlist2, qtab]:
+ with pytest.raises(kx.QError) as err:
+ i.pa()
+ assert 'Unable to convert pykx.List ' in str(err)
+
+
@pytest.mark.unlicensed
def test_all_timetypes(kx, q_port):
with kx.QConnection(port=q_port) as q:
@@ -4524,3 +5387,12 @@ def test_all_timetypes(kx, q_port):
def test_datetime64(kx):
df = pd.DataFrame(data={'a': np.array([9999, 1577899899], dtype='datetime64[s]')})
all(df['a'] == kx.toq(df).pd()['a'])
+
+
+@pytest.mark.order(-1)
+def test_cleanup(kx):
+ shutil.rmtree('HDB', ignore_errors=True)
+ shutil.rmtree('symsEnumsSplay', ignore_errors=True)
+ shutil.rmtree('singleColSplay', ignore_errors=True)
+ shutil.rmtree('multiColSplay', ignore_errors=True)
+ assert True
diff --git a/tests/win_tests.bat b/tests/win_tests.bat
index e1579e1..0cbfff6 100644
--- a/tests/win_tests.bat
+++ b/tests/win_tests.bat
@@ -2,4 +2,44 @@ python .\parse_tests.py
cd ..
-pytest .\tests\win_tests\lic\licensed_tests.py && pytest .\tests\win_tests\unlic\unlicensed_tests.py && pytest .\tests\win_tests\ipc_lic\ipc_licensed_tests.py && pytest .\tests\win_tests\ipc_unlic\ipc_unlicensed_tests.py && pytest .\tests\win_tests\embedded\embedded_tests.py && pytest .\tests\win_tests\nep_lic\nep_licensed_tests.py && pytest .\tests\win_tests\nep_unlic\nep_unlicensed_tests.py && pytest .\tests\win_tests\pandas_lic\pandas_licensed_tests.py
+python -m pytest -vvv -n 0 --no-cov --junitxml=licensed_report.xml .\tests\win_tests\lic\licensed_tests.py
+SET /A licensed = %ERRORLEVEL%
+python -m pytest -vvv -n 0 --no-cov --junitxml=unlicensed_report.xml .\tests\win_tests\unlic\unlicensed_tests.py
+SET /A unlicensed = %ERRORLEVEL%
+python -m pytest -vvv -n 0 --no-cov --junitxml=ipc_licensed_report.xml .\tests\win_tests\ipc_lic\ipc_licensed_tests.py
+SET /A ipc_licensed = %ERRORLEVEL%
+python -m pytest -vvv -n 0 --no-cov --junitxml=ipc_unlicensed_report.xml .\tests\win_tests\ipc_unlic\ipc_unlicensed_tests.py
+SET /A ipc_unlicensed = %ERRORLEVEL%
+python -m pytest -vvv -n 0 --no-cov --junitxml=embedded_report.xml .\tests\win_tests\embedded\embedded_tests.py
+SET /A embedded = %ERRORLEVEL%
+python -m pytest -vvv -n 0 --no-cov --junitxml=nep_licensed_report.xml .\tests\win_tests\nep_lic\nep_licensed_tests.py
+SET /A nep_licensed = %ERRORLEVEL%
+python -m pytest -vvv -n 0 --no-cov --junitxml=nep_unlicensed_report.xml .\tests\win_tests\nep_unlic\nep_unlicensed_tests.py
+SET /A nep_unlicensed = %ERRORLEVEL%
+python -m pytest -vvv -n 0 --no-cov --junitxml=pandas_licensed_report.xml .\tests\win_tests\pandas_lic\pandas_licensed_tests.py
+SET /A pandas_licensed = %ERRORLEVEL%
+IF %licensed% NEQ 0 (
+ exit %licensed%
+)
+IF %unlicensed% NEQ 0 (
+ exit %unlicensed%
+)
+IF %ipc_licensed% NEQ 0 (
+ exit %ipc_licensed%
+)
+IF %ipc_unlicensed% NEQ 0 (
+ exit %ipc_unlicensed%
+)
+IF %embedded% NEQ 0 (
+ exit %embedded%
+)
+IF %nep_licensed% NEQ 0 (
+ exit %nep_licensed%
+)
+IF %nep_unlicensed% NEQ 0 (
+ exit %nep_unlicensed%
+)
+IF %pandas_licensed% NEQ 0 (
+ exit %pandas_licensed%
+)
+
diff --git a/w64_install.ps1 b/w64_install.ps1
index e248b12..2c2beea 100644
--- a/w64_install.ps1
+++ b/w64_install.ps1
@@ -1,10 +1,3 @@
-Install-Module VcRedist -Force -AllowClobber
-if (!(Test-Path -Path .\vcredist)){
- mkdir vcredist
-}
-$VcRedist = Get-VcList -Export All | Where-Object { $_.Release -eq '2010' -and $_.Architecture -eq 'x64' }
-Save-VcRedist -Path 'vcredist' $VcRedist
-Install-VcRedist -Path 'vcredist' -Silent $VcRedist
Invoke-WebRequest https://aka.ms/vs/16/release/vs_BuildTools.exe -UseBasicParsing -OutFile 'vs_BuildTools.exe'
./vs_BuildTools.exe --nocache --wait --quiet --norestart --includeRecommended --includeOptional --add Microsoft.VisualStudio.Workload.VCTools
if(Test-Path -Path .\vcpkg){