diff --git a/.gitignore b/.gitignore index ff25d76..0b3d7b2 100644 --- a/.gitignore +++ b/.gitignore @@ -65,6 +65,11 @@ __pycache__/ !e.* !libkurl.* !libobjstor.* +**/4-1-libs/*.q +**/4-1-libs/*.q_ +**/4-1-libs/*.k_ +**/4-1-libs/*.k +!**/4-1-libs/q.k # Distribution / packaging .Python @@ -223,6 +228,8 @@ coverage.xml # CCLS files .ccls-cache/* +.ccls +compile_commands.json # HDB Test files HDB/** diff --git a/README.md b/README.md index 564b604..14cc86d 100644 --- a/README.md +++ b/README.md @@ -35,18 +35,19 @@ For more information on using q/kdb+ and getting started with see the following Ensure you have a recent version of pip: +```bash pip install --upgrade pip - +``` Then install the latest version of PyKX with the following command: -``` +```bash pip install pykx ``` To install a specific version of PyKX run the following command replacing with a specific released semver version of the interface -``` +```bash pip install pykx== ``` @@ -68,9 +69,9 @@ The following steps outline the process by which a user can gain access to an in #### Commercial Evaluation License -The following steps outline the process by which a user can gain access to an install a kdb Insights license which provides access to PyKX +The following steps outline the process by which a user can gain access to an install a kdb Insights license which provides access to PyKX -1. Visit https://kx.com/kdb-insights-commercial-evaluation-license-download/ and fill in the attached form following the instructions provided. +1. Contact you KX sales representative or sales@kx.com requesting a trial license for PyKX evaluation. Alternately apply through https://kx.com/book-demo. 2. On receipt of an email from KX providing access to your license download this file and save to a secure location on your computer. 3. Set an environment variable on your computer pointing to the folder containing the license file (instructions for setting environment variables on PyKX supported operating systems can be found [here](https://chlee.co/how-to-setup-environment-variables-for-windows-mac-and-linux/). * Variable Name: `QLIC` @@ -107,6 +108,8 @@ When using PyKX with KX Dashboards users will be required to install `ast2json~= When using PyKX Beta features users will be required to install `dill>=0.2.0` this can be installed using the `beta` extra, e.g. `pip install pykx[beta]` +When using Streamlit users will be required to install `streamlit~=1.28` this can be installed using the `streamlit` extra, e.g. `pip install pykx[streamlit]` + **Warning:** Trying to use the `pa` conversion methods of `pykx.K` objects or the `pykx.toq.from_arrow` method when PyArrow is not installed (or could not be imported without error) will raise a `pykx.PyArrowUnavailable` exception. `pyarrow` is supported Python 3.8-3.10 but remains in Beta for Python 3.11. #### Optional Non-Python Dependencies diff --git a/custom_theme/main.html b/custom_theme/main.html index bffbb07..4c73cdc 100644 --- a/custom_theme/main.html +++ b/custom_theme/main.html @@ -8,4 +8,15 @@ 'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-PG938LS'); + + + {% endblock %} diff --git a/docs/api/pykx-q-data/type_conversions.md b/docs/api/pykx-q-data/type_conversions.md index 0e4b1d7..7373913 100644 --- a/docs/api/pykx-q-data/type_conversions.md +++ b/docs/api/pykx-q-data/type_conversions.md @@ -1,31 +1,58 @@ # PyKX to Pythonic data type mapping -A breakdown of each of the `pykx.K` types and their analogous `numpy`, `pandas`, and `pyarrow` types. - -??? "Cheat Sheet" - - PyKX type | Python type | Numpy dtype | Pandas dtype | PyArrow type | - ------------------------------- | ----------- | --------------- | --------------- | -------------- | - [List](#pykxlist) | list | object | object | Not Supported | - [Boolean](#pykxbooleanatom) | bool | bool | bool | Not Supported | - [GUID](#pykxguidatom) | uuid4 | uuid4 | uuid4 | uuid4 | - [Byte](#pykxbyteatom) | int | uint8 | uint8 | uint8 | - [Short](#pykxshortatom) | int | int16 | int16 | int16 | - [Int](#pykxintatom) | int | int32 | int32 | int32 | - [Long](#pykxlongatom) | int | int64 | int64 | int64 | - [Real](#pykxrealatom) | float | float32 | float32 | FloatArray | - [Float](#pykxfloatatom) | float | float64 | float64 | DoubleArray | - [Char](#pykxcharatom) | bytes | \|S1 | bytes8 | BinaryArray | - [Symbol](#pykxsymbolatom) | str | object | object | StringArray | - [Timestamp](#pykxtimestampatom) | datetime | datetime64[ns] | datetime64[ns] | TimestampArray | - [Month](#pykxmonthatom) | date | datetime64[M] | datetime64[ns] | Not Supported | - [Date](#pykxdateatom) | date | datetime64[D] | datetime64[ns] | Date32Array | - [Timespan](#pykxtimespanatom) | timedelta | timedelta[ns] | timedelta64[ns] | DurationArray | - [Minute](#pykxminuteatom) | timedelta | timedelta64[m] | timedelta64[ns] | Not Supported | - [Second](#pykxsecondatom) | timedelta | timedelta64[s] | timedelta64[ns] | DurationArray | - [Time](#TimeAtom) | timedelta | timedelta64[ms] | timedelta64[ns] | DurationArray | - [Dictionary](#pykxdictionary) | dict | Not Supported | Not Supported | Not Supported | - [Table](#pykxtable) | dict | records | DataFrame | Table | +A breakdown of each of the `pykx.K` types and their analogous `Python`, `NumPy`, `Pandas`, and `PyArrow` types. + +??? "Cheat Sheet: `Python`, `NumPy`, `PyArrow`" + + | PyKX type | Python type | Numpy dtype | PyArrow type | + | ------------------------------- | ----------- | --------------- | -------------- | + | [List](#pykxlist) | list | object | Not Supported | + | [Boolean](#pykxbooleanatom) | bool | bool | Not Supported | + | [GUID](#pykxguidatom) | uuid4 | uuid4 | uuid4 | + | [Byte](#pykxbyteatom) | int | uint8 | uint8 | + | [Short](#pykxshortatom) | int | int16 | int16 | + | [Int](#pykxintatom) | int | int32 | int32 | + | [Long](#pykxlongatom) | int | int64 | int64 | + | [Real](#pykxrealatom) | float | float32 | FloatArray | + | [Float](#pykxfloatatom) | float | float64 | DoubleArray | + | [Char](#pykxcharatom) | bytes | \\\|S1 | BinaryArray | + | [Symbol](#pykxsymbolatom) | str | object | StringArray | + | [Timestamp](#pykxtimestampatom) | datetime | datetime64[ns] | TimestampArray | + | [Month](#pykxmonthatom) | date | datetime64[M] | Not Supported | + | [Date](#pykxdateatom) | date | datetime64[D] | Date32Array | + | [Timespan](#pykxtimespanatom) | timedelta | timedelta64[ns] | DurationArray | + | [Minute](#pykxminuteatom) | timedelta | timedelta64[m] | Not Supported | + | [Second](#pykxsecondatom) | timedelta | timedelta64[s] | DurationArray | + | [Time](#TimeAtom) | timedelta | timedelta64[ms] | DurationArray | + | [Dictionary](#pykxdictionary) | dict | Not Supported | Not Supported | + | [Table](#pykxtable) | dict | records | Table | + +??? "Cheat Sheet: `Pandas 1.*`, `Pandas 2.*`, `Pandas 2.* PyArrow backed`" + + **Note:** Creating PyArrow backed Pandas objects uses `as_arrow=True` using NumPy arrays as an intermediate data format. + + | PyKX type | Pandas 1.\* dtype | Pandas 2.\* dtype | Pandas 2.\* as_arrow=True dtype | + | ------------------------------- | ----------------- | ----------------- | ------------------------------- | + | [List](#pykxlist) | object | object | object | + | [Boolean](#pykxbooleanatom) | bool | bool | bool[pyarrow] | + | [GUID](#pykxguidatom) | object | object | object | + | [Byte](#pykxbyteatom) | uint8 | uint8 | uint8[pyarrow] | + | [Short](#pykxshortatom) | int16 | int16 | int16[pyarrow] | + | [Int](#pykxintatom) | int32 | int32 | int32[pyarrow] | + | [Long](#pykxlongatom) | int64 | int64 | int64[pyarrow] | + | [Real](#pykxrealatom) | float32 | float32 | float[pyarrow] | + | [Float](#pykxfloatatom) | float64 | float64 | double[pyarrow] | + | [Char](#pykxcharatom) | bytes8 | bytes8 | fixed_size_binary[1][pyarrow] | + | [Symbol](#pykxsymbolatom) | object | object | string[pyarrow] | + | [Timestamp](#pykxtimestampatom) | datetime64[ns] | datetime64[ns] | timestamp[ns][pyarrow] | + | [Month](#pykxmonthatom) | datetime64[ns] | datetime64[s] | timestamp[s][pyarrow] | + | [Date](#pykxdateatom) | datetime64[ns] | datetime64[s] | timestamp[s][pyarrow] | + | [Timespan](#pykxtimespanatom) | timedelta64[ns] | timedelta64[ns] | duration[ns][pyarrow] | + | [Minute](#pykxminuteatom) | timedelta64[ns] | timedelta64[s] | duration[s][pyarrow] | + | [Second](#pykxsecondatom) | timedelta64[ns] | timedelta64[s] | duration[s][pyarrow] | + | [Time](#TimeAtom) | timedelta64[ns] | timedelta64[ms] | duration[ms][pyarrow] | + | [Dictionary](#pykxdictionary) | Not Supported | Not Supported | Not Supported | + | [Table](#pykxtable) | DataFrame | DataFrame | DataFrame | ## `pykx.List` @@ -611,14 +638,25 @@ True ``` === "Pandas" - Calling `.pd()` on a `pykx.TimestampVector` will return a pandas `Series` with `dtype` `datetime64[ns]`. - - ```Python - >>> kx.TimestampVector([datetime(2150, 10, 22, 20, 31, 15, 70713), datetime(2050, 10, 22, 20, 31, 15, 70713)]).pd() - 0 2150-10-22 20:31:15.070713 - 1 2050-10-22 20:31:15.070713 - dtype: datetime64[ns] - ``` + Calling `.pd()` on a `pykx.TimestampVector` will return a pandas `Series` with `dtype`: + + 1. `datetime64[ns]`: + + ```python + >>> kx.TimestampVector([datetime(2150, 10, 22, 20, 31, 15, 70713), datetime(2050, 10, 22, 20, 31, 15, 70713)]).pd() + 0 2150-10-22 20:31:15.070713 + 1 2050-10-22 20:31:15.070713 + dtype: datetime64[ns] + ``` + + 2. `timestamp[ns][pyarrow]` in pandas>=2.0 with `as_arrow=True`: + + ```python + >>> kx.TimestampVector([datetime(2150, 10, 22, 20, 31, 15, 70713), datetime(2050, 10, 22, 20, 31, 15, 70713)]).pd(as_arrow=True) + 0 2150-10-22 20:31:15.070713 + 1 2050-10-22 20:31:15.070713 + dtype: timestamp[ns][pyarrow] + ``` === "PyArrow" Calling `.pa()` on a `pykx.TimestampVector` will return a pyarrow `TimestampArray`. @@ -683,14 +721,34 @@ True ``` === "Pandas" - Calling `.pd()` on a `pykx.MonthVector` will return a pandas `Series` with `dtype` `datetime64[ns]`. - - ```Python - >>> kx.MonthVector([date(1972, 5, 1), date(1999, 5, 1)]).pd() - 0 1972-05-01 - 1 1999-05-01 - dtype: datetime64[ns] - ``` + Calling `.pd()` on a `pykx.MonthVector` will return a pandas `Series` with `dtype`: + + 1. `datetime64[ns]` in `pandas<2.0`: + + ```python + >>> kx.MonthVector([date(1972, 5, 1), date(1999, 5, 1)]).pd() + 0 1972-05-01 + 1 1999-05-01 + dtype: datetime64[ns] + ``` + + 2. `datetime64[s]` in `pandas>=2.0`: + + ```python + >>> kx.MonthVector([date(1972, 5, 1), date(1999, 5, 1)]).pd() + 0 1972-05-01 + 1 1999-05-01 + dtype: datetime64[s] + ``` + + 3. `timestamp[s][pyarrow]` in `pandas>=2.0` with `as_arrow=True`: + + ```python + >>> kx.MonthVector([date(1972, 5, 1), date(1999, 5, 1)]).pd(as_arrow=True) + 0 1972-05-01 00:00:00 + 1 1999-05-01 00:00:00 + dtype: timestamp[s][pyarrow] + ``` ## `pykx.DateAtom` @@ -742,14 +800,35 @@ True ``` === "Pandas" - Calling `.pd()` on a `pykx.DateVector` will return a pandas `Series` with `dtype` `datetime64[ns]`. + Calling `.pd()` on a `pykx.DateVector` will return a pandas `Series` with `dtype`: - ```Python - >>> kx.DateVector([date(1972, 5, 1), date(1999, 5, 1)]).pd() - 0 1972-05-01 - 1 1999-05-01 - dtype: datetime64[ns] - ``` + 1. `datetime64[ns]` in `pandas<2.0`: + + ```python + # pandas<2.0 + >>> kx.DateVector([date(1972, 5, 1), date(1999, 5, 1)]).pd() + 0 1972-05-01 + 1 1999-05-01 + dtype: datetime64[ns] + ``` + + 2. `datetime64[s]` in `pandas>=2.0`: + + ```python + >>> kx.DateVector([date(1972, 5, 1), date(1999, 5, 1)]).pd() + 0 1972-05-01 + 1 1999-05-01 + dtype: datetime64[s] + ``` + + 3. `timestamp[s][pyarrow]` in `pandas>=2.0` with `as_arrow=True`: + + ```python + >>> kx.DateVector([date(1972, 5, 1), date(1999, 5, 1)]).pd(as_arrow=True) + 0 1972-05-01 00:00:00 + 1 1999-05-01 00:00:00 + dtype: timestamp[s][pyarrow] + ``` === "PyArrow" Calling `.pa()` on a `pykx.DateVector` will return a pyarrow `Date32Array`. @@ -830,14 +909,25 @@ True ``` === "Pandas" - Calling `.pd()` on a `pykx.TimespanVector` will return a pandas `Series` with `dtype` `timedelta64[ns]`. + Calling `.pd()` on a `pykx.TimespanVector` will return a pandas `Series` with `dtype`: - ```Python - >>> kx.TimespanVector([timedelta(days=43938, seconds=68851, microseconds=664551), timedelta(days=43938, seconds=68851, microseconds=664551)]).pd() - 0 43938 days 19:07:31.664551 - 1 43938 days 19:07:31.664551 - dtype: timedelta64[ns] - ``` + 1. `timedelta64[ns]`: + + ```python + >>> kx.TimespanVector([timedelta(days=43938, seconds=68851, microseconds=664551), timedelta(days=43938, seconds=68851, microseconds=664551)]).pd() + 0 43938 days 19:07:31.664551 + 1 43938 days 19:07:31.664551 + dtype: timedelta64[ns] + ``` + + 2. `duration[ns][pyarrow]` in `pandas>=2.0` with `as_arrow=True`: + + ```python + >>> kx.TimespanVector([timedelta(days=43938, seconds=68851, microseconds=664551), timedelta(days=43938, seconds=68851, microseconds=664551)]).pd(as_arrow=True) + 0 43938 days 19:07:31.664551 + 1 43938 days 19:07:31.664551 + dtype: duration[ns][pyarrow] + ``` === "PyArrow" Calling `.pa()` on a `pykx.TimespanVector` will return a pyarrow `DurationArray`. @@ -901,14 +991,34 @@ True ``` === "Pandas" - Calling `.pd()` on a `pykx.MinuteVector` will return a pandas `Series` with `dtype` `timedelta64[ns]`. - - ```Python - >>> kx.MinuteVector([timedelta(minutes=216), timedelta(minutes=67)]).pd() - 0 0 days 03:36:00 - 1 0 days 01:07:00 - dtype: timedelta64[ns] - ``` + Calling `.pd()` on a `pykx.MinuteVector` will return a pandas `Series` with `dtype`: + + 1. `timedelta64[ns]` in `pandas<2.0`: + + ```python + >>> kx.MinuteVector([timedelta(minutes=216), timedelta(minutes=67)]).pd() + 0 0 days 03:36:00 + 1 0 days 01:07:00 + dtype: timedelta64[ns] + ``` + + 2. `timedelta64[s]` in `pandas>=2.0`: + + ```python + >>> kx.MinuteVector([timedelta(minutes=216), timedelta(minutes=67)]).pd() + 0 0 days 03:36:00 + 1 0 days 01:07:00 + dtype: timedelta64[s] + ``` + + 3. `duration[s][pyarrow]` in `pandas>=2.0` with `as_arrow=True`: + + ```python + >>> kx.MinuteVector([timedelta(minutes=216), timedelta(minutes=67)]).pd(as_arrow=True) + 0 0 days 03:36:00 + 1 0 days 01:07:00 + dtype: duration[s][pyarrow] + ``` ## `pykx.SecondAtom` @@ -960,14 +1070,34 @@ True ``` === "Pandas" - Calling `.pd()` on a `pykx.SecondVector` will return a pandas `Series` with `dtype` `timedelta64[ns]`. - - ```Python - >>> kx.SecondVector([timedelta(seconds=13019), timedelta(seconds=1019)]).pd() - 0 0 days 03:36:59 - 1 0 days 00:16:59 - dtype: timedelta64[ns] - ``` + Calling `.pd()` on a `pykx.SecondVector` will return a pandas `Series` with `dtype`: + + 1. `timedelta64[ns]` in `pandas<2.0`: + ```python + # pandas<2.0 + >>> kx.SecondVector([timedelta(seconds=13019), timedelta(seconds=1019)]).pd() + 0 0 days 03:36:59 + 1 0 days 00:16:59 + dtype: timedelta64[ns] + ``` + + 2. `timedelta64[s]` in `pandas>=2.0`: + + ```python + >>> kx.SecondVector([timedelta(seconds=13019), timedelta(seconds=1019)]).pd() + 0 0 days 03:36:59 + 1 0 days 00:16:59 + dtype: timedelta64[s] + ``` + + 3. `duration[s][pyarrow]` in `pandas>=2.0` with `as_arrow=True`: + + ```python + >>> kx.SecondVector([timedelta(seconds=13019), timedelta(seconds=1019)]).pd(as_arrow=True) + 0 0 days 03:36:59 + 1 0 days 00:16:59 + dtype: duration[s][pyarrow] + ``` === "PyArrow" Calling `.pa()` on a `pykx.SecondVector` will return a pyarrow `DurationArray`. @@ -1033,14 +1163,34 @@ True === "Pandas" - Calling `.pd()` on a `pykx.TimeVector` will return a pandas `Series` with `dtype` `timedelta64[ns]`. + Calling `.pd()` on a `pykx.TimeVector` will return a pandas `Series` with `dtype`: - ```Python - >>> kx.TimeVector([timedelta(seconds=59789, microseconds=214000), timedelta(seconds=23789, microseconds=214000)]).pd() - 0 0 days 16:36:29.214000 - 1 0 days 06:36:29.214000 - dtype: timedelta64[ns] - ``` + 1. `timedelta64[ns]` in `pandas<2.0`: + + ```python + >>> kx.TimeVector([timedelta(seconds=59789, microseconds=214000), timedelta(seconds=23789, microseconds=214000)]).pd() + 0 0 days 16:36:29.214000 + 1 0 days 06:36:29.214000 + dtype: timedelta64[ns] + ``` + + 2. `timedelta[ms]` in `pandas>=2.0`: + + ```python + >>> kx.TimeVector([timedelta(seconds=59789, microseconds=214000), timedelta(seconds=23789, microseconds=214000)]).pd() + 0 0 days 16:36:29.214000 + 1 0 days 06:36:29.214000 + dtype: timedelta64[ms] + ``` + + 3. `duration[ms][pyarrow]` in `pandas>=2.0` with `as_arrow=True`: + + ```python + >>> kx.TimeVector([timedelta(seconds=59789, microseconds=214000), timedelta(seconds=23789, microseconds=214000)]).pd(as_arrow=True) + 0 0 days 16:36:29.214000 + 1 0 days 06:36:29.214000 + dtype: duration[ms][pyarrow] + ``` === "PyArrow" Calling `.pa()` on a `pykx.TimeVector` will return a pyarrow `DurationArray`. diff --git a/docs/api/streamlit.md b/docs/api/streamlit.md new file mode 100644 index 0000000..0cf146f --- /dev/null +++ b/docs/api/streamlit.md @@ -0,0 +1,10 @@ +# Streamlit Integration + +::: pykx.streamlit + rendering: + show_root_heading: false + options: + show_root_heading: false + members_order: source + members: + - PyKXConnection diff --git a/docs/api/util.md b/docs/api/util.md new file mode 100644 index 0000000..55ffc4f --- /dev/null +++ b/docs/api/util.md @@ -0,0 +1,135 @@ +# PyKX Utilities + +The purpose of this page is to provide users with documentation for utility functions located within various modules within PyKX. + +!!! Note + + This functionality presently is not located in a centralized module but it is expected that with the next major release version of PyKX 3.0.0 they + +## `pykx.ssl_info` + +```python +pykx.ssl_info() +``` + +View information relating to the TLS Settings used by PyKX from your process + +**Returns:** + +| Type | Description | +|-------------------|------------------------------------------------------| +| `pykx.Dictionary` | A dictionary outlining the TLS settings used by PyKX | + +**Example:** + +```python +>>> import pykx as kx +>>> kx.ssl_info() +pykx.Dictionary(pykx.q(' +SSLEAY_VERSION | OpenSSL 1.1.1q 5 Jul 2022 +SSL_CERT_FILE | /usr/local/anaconda3/ssl/server-crt.pem +SSL_CA_CERT_FILE | /usr/local/anaconda3/ssl/cacert.pem +SSL_CA_CERT_PATH | /usr/local/anaconda3/ssl +SSL_KEY_FILE | /usr/local/anaconda3/ssl/server-key.pem +SSL_CIPHER_LIST | ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:.. +SSL_VERIFY_CLIENT| NO +SSL_VERIFY_SERVER| YES +')) +``` + +## `pykx.util.debug_environment` + +```python +pykx.util.debug_environment(detailed=False, return_info=False) +``` + +**Parameters:** + +| Name | Type | Description | Default | +|-------------|------|-----------------------------------------------------------------------------------------------------------|---------| +| detailed | bool | When returning information about a users license print the content of both `QHOME` and `QLIC` directories | `False` | +| return_info | bool | Should the information returned from the function be printed to console (default) or provided as a str | `False` | + + +**Returns:** + +| Type | Description | +|--------------------|-----------------------------------------------------------------------------------------------------| +| `Union[None, str]` | Returns `None` if return information is printed to console otherwise returns a `str` representation | + +**Example:** + +```python +>>> import pykx as kx +>>> kx.util.debug_environment() +**** PyKX information **** +pykx.args: () +pykx.qhome: /usr/local/anaconda3/envs/qenv/q +pykx.qlic: /usr/local/anaconda3/envs/qenv/q +pykx.licensed: True +pykx.__version__: 2.4.3 +pykx.file: /usr/local/anaconda3/lib/python3.8/site-packages/pykx/util.py + +**** Python information **** +sys.version: 3.8.3 (default, Jul 2 2020, 11:26:31) +[Clang 10.0.0 ] +pandas: 2.0.3 +numpy: 1.24.4 +pytz: 2023.3.post1 +which python: /usr/local/bin/python +which python3: /Library/Frameworks/Python.framework/Versions/3.12/bin/python3 +find_libpython: /usr/local/anaconda3/lib/libpython3.8.dylib + +**** Platform information **** +platform.platform: macOS-10.16-x86_64-i386-64bit + +**** PyKX Environment Variables **** +PYKX_IGNORE_QHOME: +PYKX_KEEP_LOCAL_TIMES: +PYKX_ALLOCATOR: +PYKX_GC: +PYKX_LOAD_PYARROW_UNSAFE: +PYKX_MAX_ERROR_LENGTH: +PYKX_NOQCE: +PYKX_Q_LIB_LOCATION: +PYKX_RELEASE_GIL: +PYKX_Q_LOCK: +PYKX_DEFAULT_CONVERSION: +PYKX_SKIP_UNDERQ: +PYKX_UNSET_GLOBALS: +PYKX_DEBUG_INSIGHTS_LIBRARIES: +PYKX_EXECUTABLE: /usr/local/anaconda3/bin/python +PYKX_PYTHON_LIB_PATH: +PYKX_PYTHON_BASE_PATH: +PYKX_PYTHON_HOME_PATH: +PYKX_DIR: /usr/local/anaconda3/lib/python3.8/site-packages/pykx +PYKX_QDEBUG: +PYKX_THREADING: +PYKX_4_1_ENABLED: + +**** PyKX Deprecated Environment Variables **** +SKIP_UNDERQ: +UNSET_PYKX_GLOBALS: +KEEP_LOCAL_TIMES: +IGNORE_QHOME: +UNDER_PYTHON: +PYKX_NO_SIGINT: + +**** q Environment Variables **** +QARGS: +QHOME: /usr/local/anaconda3/lib/python3.8/site-packages/pykx/lib +QLIC: /usr/local/anaconda3/envs/qenv/q +QINIT: + +**** License information **** +pykx.qlic directory: True +pykx.qhome writable: True +pykx.qhome lics: ['k4.lic'] +pykx.qlic lics: ['k4.lic'] + +**** q information **** +which q: /usr/local/anaconda3/envs/qenv/q/q +q info: +(`m64;4f;2020.05.04) +"insights.lib.embedq insights.lib.pykx.. +``` diff --git a/docs/beta-features/examples/streamlit.py b/docs/beta-features/examples/streamlit.py new file mode 100644 index 0000000..5b881da --- /dev/null +++ b/docs/beta-features/examples/streamlit.py @@ -0,0 +1,39 @@ +# Set environment variables needed to run Steamlit integration +import os +os.environ['PYKX_BETA_FEATURES'] = 'true' + +# This is optional but suggested as without it's usage caching +# is not supported within streamlit +os.environ['PYKX_THREADING'] = 'true' + +import streamlit as st +import pykx as kx +import matplotlib.pyplot as plt + + +def main(): + st.header('PyKX Demonstration') + connection = st.connection('pykx', + type=kx.streamlit.PyKXConnection, + port=5050, + username='user', + password='password') + if connection.is_healthy(): + tab = connection.query('select from tab where size<11') + else: + raise kx.QError('Connection object was not deemed to be healthy') + fig, x = plt.subplots() + x.scatter(tab['size'], tab['price']) + + st.write('Queried kdb+ remote table') + st.write(tab) + + st.write('Generated plot') + st.pyplot(fig) + + +if __name__ == "__main__": + try: + main() + finally: + kx.shutdown_thread() diff --git a/docs/beta-features/index.md b/docs/beta-features/index.md index ca6fecc..c23c398 100644 --- a/docs/beta-features/index.md +++ b/docs/beta-features/index.md @@ -15,7 +15,7 @@ Within PyKX beta features are enabled through the use of a configuration/environ >>> os.environ['PYKX_BETA_FEATURES'] = 'True' >>> import pykx as kx >>> kx.beta_features -['Database Management', 'Remote Functions'] +['Streamlit Integration', 'Compression and Encryption', 'Database Management', 'Remote Functions'] ``` Alternatively you can set beta features to be available at all times by adding `PYKX_BETA_FEATURES` to your `.pykx-config` file as outlined [here](../user-guide/configuration.md#configuration-file). An example of a configuration making use of this is as follows: @@ -50,3 +50,4 @@ The following are the currently available beta features: - [Remote Functions](remote-functions.md) let you define functions in Python which interact directly with kdb+ data on a q process. These functions can seamlessly integrate into existing Python infrastructures and also benefit systems that use q processes over Python for performance reasons or as part of legacy applications. - [PyKX Threading](threading.md) provides users with the ability to call into `EmbeddedQ` from multithreaded python programs and allow any thread to modify global state safely. +- [Streamlit Integration](streamlit.md) provides users with the ability to query kdb+ infrastructure through direct integration with Streamlit. diff --git a/docs/beta-features/remote-functions.md b/docs/beta-features/remote-functions.md index 41145f8..c103b77 100644 --- a/docs/beta-features/remote-functions.md +++ b/docs/beta-features/remote-functions.md @@ -8,7 +8,7 @@ Remote Functions let you define Python functions within your Python environment which can interact with kdb+ data on a q process. Once defined, these functions are registered to a [remote session object]() along with any Python dependencies which need to be imported. The [remote session object]() establishes and manages the remote connection to the kdb+/q server. -To execute kdb+/q functions using PyKX, please see [PyKX under q](../pykx-under-q/intro.html) +To execute kdb+/q functions using PyKX, please see [PyKX under q](../pykx-under-q/intro.md) ## Requirements and limitations diff --git a/docs/beta-features/streamlit.md b/docs/beta-features/streamlit.md new file mode 100644 index 0000000..3d03721 --- /dev/null +++ b/docs/beta-features/streamlit.md @@ -0,0 +1,111 @@ +# Streamlit Integration + +!!! Warning + + This module is a Beta Feature and is subject to change. To enable this functionality for testing please follow the configuration instructions [here](../user-guide/configuration.md) setting `PYKX_BETA_FEATURES='true'` + + This functionality is presently not supported on Windows, for full utilisation of this functionality `PYKX_THREADING='true'` nust be set in configuration. + +## Introduction + +[Streamlit](https://streamlit.io) provides an open source framework allowing users to turn Python scripts into sharable web applications. Functionally, Streamlit provides access to external data-sources using the concept of `connections` which allow users to develop conforming APIs which will integrate directly with streamlit applications as extension connection types. + +The integration outlined below makes use of this by generating a new `pykx.streamlit.PyKXConnection` connection type which provides the ability to create synchronous connections to existing q/kdb+ sessions. + +A full breakdown of the API documentation of this class can be found [here](../api/streamlit.md). + +## Requirements and limitations + +To run this functionality, users must have `streamlit>=1.28` installed local to their Python session. + +This can be installed using the following command: + +```bash +pip install pykx[streamlit] +``` + + +## Functional walkthrough + +This walkthrough will demonstrate the following steps: + +1. Initialize a q/kdb+ server on a specified port and populating some data. +1. Generate a `streamlit.py` script which queries the q server and creates a basic streamlit application. +1. Run the streamlit application and view locally + +### Initializing a q/kdb+ server + +This step ensures you have a q process running and a kdb+ table available to query. If you have this already, proceed to the next step. + +Ensure that you have q installed. If you do not have this installed please follow the guide provided [here](https://code.kx.com/q/learn/install/), retrieving your license following the instructions provided [here](https://kx.com/kdb-insights-personal-edition-license-download). + +```bash +q -p 5050 +``` + +Create a table which you will use within your Python analytics defined below. + +```q +q)N:1000 +q)tab:([]sym:N?`AAPL`MSFT`GOOG`FDP;price:100+N?100f;size:10+N?100) +``` + +Set a requirement for users to provide a username/password if you wish to add security to your q process. + +```q +.z.pw:{[u;p]$[(u~`user)&p~`password;1b;0b]} +``` + +### Generate a streamlit script/application + +The following script generates a simple streamlit application which: + +1. Set environment variables and import required libraries +1. Define a function to run for generation of the streamlit application + 1. Name the streamlit application. + 1. Create a connection to the q process initialized on port 5050 above. + 1. Query the q process retrieving a small tabular subset of data using a qsql statement. + 1. Generate a Matplotlib graph directly using the PyKX table. + 1. Display both the table and graph + +This script can additionally be downloaded [here](examples/streamlit.py). + +```python +# Set environment variables needed to run Steamlit integration +import os +os.environ['PYKX_BETA_FEATURES'] = 'true' + +# This is optional but suggested as without it's usage caching +# is not supported within streamlit +os.environ['PYKX_THREADING'] = 'true' + +import streamlit as st +import pykx as kx +import matplotlib.pyplot as plt + +def main(): + st.header('PyKX Demonstration') + connection = st.connection('pykx', + type=kx.streamlit.PyKXConnection, + port=5050, + username='user', + password='password') + if connection.is_healthy(): + tab = connection.query('select from tab where size<11') + else: + raise kx.QError('Connection object was not deemed to be healthy') + fig, x = plt.subplots() + x.scatter(tab['size'], tab['price']) + + st.write('Queried kdb+ remote table') + st.write(tab) + + st.write('Generated plot') + st.pyplot(fig) + +if __name__ == "__main__": + try: + main() + finally: + kx.shutdown_thread() +``` diff --git a/docs/blogs.md b/docs/blogs.md index 4ee878e..fbccacf 100644 --- a/docs/blogs.md +++ b/docs/blogs.md @@ -1,4 +1,4 @@ -# Blogs, Articles and Videos +# Blogs, Articles, Podcasts and Videos KX, Partners and members of the public regularly post articles, blogs and videos relating to their usage of PyKX and how it can be used as part of solutions to real-world problems. The intention of this page is to centralise these blogs and articles and will be kept up to date regularly. @@ -6,7 +6,7 @@ KX, Partners and members of the public regularly post articles, blogs and videos If you would like to contribute content to this site, feel free to raise a pull request [here](https://github.com/KxSystems/pykx/pull). We would love to hear from you. -_Last updated:_ 8th March 2024 +_Last updated:_ 10th May 2024 ## Blogs @@ -17,6 +17,7 @@ _Last updated:_ 8th March 2024 | [PyKX Boosts Trade Analytics](https://www.treliant.com/knowledge-center/pykx-boosts-trade-analytics/) | An introduction to the fundamental features and functionality of PyKX | Paul Douglas, Paul Walsh, and Thomas Smyth | June 26th 2023 | | [PyKX Highlights 2023](https://kx.com/blog/pykx-highlights-2023/) | A breakdown of new features and functionality added from January 2023 to version 2.1.1 in October 2023. | Rian Ó Cuinneagáin | 25th October 2023 | | [Build and Manage Databases using PyKX](https://kx.com/blog/how-to-build-and-manage-databases-using-pykx/) | A breakdown of how PyKX can be used to generate and maintain kdb+ databases using newly released functionality | Conor McCarthy | 24th January 2024 | +| [Contributing to PyKX](https://www.habla.dev/blog/2024/04/10/Contributing-to-PyKX.html) | Outlining how new developers can contribute to PyKX | Oscar Nydza Nicpoñ | 10th April 2024 | ## Articles @@ -29,6 +30,12 @@ _Last updated:_ 8th March 2024 ## Videos +### Using PyKX to Bring the Power of kdb+ to Python + +Conor McCarthy Introduces how PyKX can be used generate data, run analytics and create databases. + + + ### Accelerating Application Development with PyKX Jack Kiernan outlines the fundamentals of PyKX. @@ -48,3 +55,6 @@ Mohammad Noor and Oliver Stewart outline how Citadel make use of PyKX to acceler +## Podcasts + + diff --git a/docs/examples/charting.ipynb b/docs/examples/charting.ipynb new file mode 100644 index 0000000..325c11d --- /dev/null +++ b/docs/examples/charting.ipynb @@ -0,0 +1,380 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0cee3f27-46b2-4ed8-9199-a6c83968b76d", + "metadata": {}, + "source": [ + "# Charting Data with PyKX\n", + "\n", + "This workbook details example of interfacing PyKX with Python charting libraries.\n", + "\n", + "PyKX supports rich datatype mapping meaning you can convert data from PyKX objects to:\n", + "- Python objects using `.py()`\n", + "- NumPy objects using `.np()`\n", + "- Pandas objects using `.pd()`\n", + "- PyArrow objects using `.pa()`\n", + "\n", + "The full breakdown of how these map is documented [here.](https://code.kx.com/pykx/api/pykx-q-data/type_conversions.html)\n", + "\n", + "These resulting objects will behave as expected with all Python libraries.\n", + "\n", + "For efficiency and exactness the examples below aim to use PyKX objects directly, minimising conversions when possible." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6c62cd2", + "metadata": { + "tags": [ + "hide_code" + ] + }, + "outputs": [], + "source": [ + "import os\n", + "os.environ['IGNORE_QHOME'] = '1' # Ignore symlinking PyKX q libraries to QHOME\n", + "os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "bb0e7404-32f3-4f2d-874b-e596ad14be0a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sympricesizequantityin_stock
0a0.9094126451b
1a0.29884775181b
2c0.4540638110b
3b0.1569421361b
4c0.046992654431b
" + ], + "text/plain": [ + "pykx.Table(pykx.q('\n", + "sym price size quantity in_stock\n", + "-------------------------------------\n", + "a 0.9094126 4 5 1 \n", + "a 0.2988477 5 18 1 \n", + "c 0.454063 8 11 0 \n", + "b 0.156942 1 36 1 \n", + "c 0.04699265 4 43 1 \n", + "'))" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pykx as kx\n", + "tab = kx.Table(data={\n", + " 'sym':kx.random.random(1000, ['a', 'b', 'c']), \n", + " 'price':kx.random.random(1000, 1.0), \n", + " 'size':kx.random.random(1000, 10),\n", + " 'quantity':kx.random.random(1000,100),\n", + " 'in_stock':kx.random.random(1000, [True, False])})\n", + "tab.head()" + ] + }, + { + "cell_type": "markdown", + "id": "c238bc17-98a2-4014-ab38-5c13f8e7c8d1", + "metadata": {}, + "source": [ + "## Matplotlib\n", + "\n", + "Generating a scatter plot using the `price` and `size` columns of our table. \n", + "\n", + "The `scatter(tab['price'], tab['quantity'])` notation is used to access PyKX objects directly. \n", + "\n", + "To use `x=` and `y=` syntax requires conversion to a dataframe using `.pd()` .i.e `scatter(tab.pd(), x='price' ,y='quantity')` \n", + "\n", + "`scatter` fundamentally uses a series of 1D arrays and is therefore one of the only charts where the column values do not need to first be converted in Numpy objects using `.np()`." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "6bd7e251-7b25-432f-8e0e-0e32ac7e95b1", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "plt.scatter(tab['price'], tab['quantity'])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "2e76c5d1-7dd3-482c-90cb-c263d31ad808", + "metadata": {}, + "source": [ + "In order for the column values to be compatible with most of matplotlib charts, they first must be converted to numpy objects using the `.np()` function." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b62a4a3f-90bb-4f9f-8df6-46fdfb6bc4b9", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.bar(tab['size'].np(), tab['price'].np())\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "3d0e9c66-ef79-4e11-a9c1-6797d608c835", + "metadata": {}, + "source": [ + "## Plotly\n", + "\n", + "Plotly allows `vector` objects to be passed as the `color` argument. This parameter is set using the `sym` column resulting in the scatter chart below.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "4e673c63-fb40-4a22-bee6-01f6fdba7506", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import plotly.express as px\n", + "\n", + "fig = px.scatter(\n", + " x=tab['quantity'],\n", + " y=tab['price'],\n", + " size=tab['size'],\n", + " color=tab['sym'])\n", + "fig.show(renderer=\"png\")" + ] + }, + { + "cell_type": "markdown", + "id": "07595eb5-26ef-45c9-a15d-0edbe2bee955", + "metadata": {}, + "source": [ + "Unlike with Pandas, a PyKX table cannot be passed as the first argument with the following data being passed as column names. Each axis must be explicitly set. \n", + "\n", + "To use this feature, first convert to Pandas using the `.pd()` function" + ] + }, + { + "cell_type": "markdown", + "id": "cdd20942-1a7d-419c-a0ff-3e6e07f3acf9", + "metadata": {}, + "source": [ + "A density heatmap using Plotly. This time the table is converted to a Pandas Dataframe and then the axes are simply assigned the column names as strings." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "47431b0c-091a-436f-9a07-48eefc33c52a", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = px.density_heatmap(\n", + " tab.pd(),\n", + " x='price', \n", + " y='size')\n", + "fig.show(renderer=\"png\")" + ] + }, + { + "cell_type": "markdown", + "id": "3f9de707-f07a-4ca5-b46a-980abb51c640", + "metadata": {}, + "source": [ + "## Seaborn\n", + "\n", + "Seaborn allows the user to set `data` as a PyKX table name without conversions and then call the `x` and `y` parameters using only the column names of that table.\n", + "\n", + "A bar chart below demonstrates this with the data being set as the table object and all of the parameters being set using the column names, all without conversions." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "82306712-b9cd-480d-9d86-3e241f5717e0", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiUAAAHpCAYAAABdr0y5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAzX0lEQVR4nO3de1hVZaLH8d8G5aLIRVIuCXjJ1PKSUimi5Sgj45SPJpUxzvGCTVMPXplunFLTSsweL2WkaQ5Wk8euajZHzSjxOHnFLO1idpmwBPRMCYoJCuv80XFPO0Rlu2G9G76f51nPw1p78+4fZPDjXe9a22FZliUAAACb+dgdAAAAQKKUAAAAQ1BKAACAESglAADACJQSAABgBEoJAAAwAqUEAAAYocGXEsuyVFpaKm7HAgCA2Rp8KTl+/LhCQkJ0/Phxu6MAAIDzaPClBAAAeAdKCQAAMAKlBAAAGIFSAgAAjEApAQAARqCUAAAAI1BKAACAESglAADACJQSAABgBFtLSdu2beVwOKpt6enpkqRTp04pPT1d4eHhCgoKUkpKioqLi+2MDAAA6oitpWTXrl0qLCx0bps2bZIk3XbbbZKkqVOnat26dXrttdeUl5enw4cPa8SIEXZGBgAAdcRhGfROdVOmTNHbb7+tgwcPqrS0VK1atdLKlSt16623SpI+//xzdenSRdu2bVOfPn0uaszS0lKFhISopKREwcHBdRkfAABcAmPWlFRUVOhvf/ub0tLS5HA4lJ+fr9OnTyspKcn5nM6dOys2Nlbbtm2rcZzy8nKVlpa6bAAAwHzGlJI1a9bo2LFjGjt2rCSpqKhIfn5+Cg0NdXleRESEioqKahwnKytLISEhzi0mJqYOUwMAAE8xppQsX75cQ4YMUXR09CWNk5mZqZKSEud26NAhDyUEAAB1qYndASTp22+/1bvvvqs333zTeSwyMlIVFRU6duyYy2xJcXGxIiMjaxzL399f/v7+dRkXAADUASNmSnJyctS6dWvddNNNzmPx8fFq2rSpcnNznccOHDiggoICJSQk2BETAADUIdtnSqqqqpSTk6MxY8aoSZN/xwkJCdH48eOVkZGhli1bKjg4WBMnTlRCQsJFX3mDhsmyLJWVlTn3mzdvLofDYWMiAIAn2F5K3n33XRUUFCgtLa3aYwsWLJCPj49SUlJUXl6u5ORkPfvsszakhEnKyso0bNgw5/7atWsVFBRkYyIAgCcYdZ+SusB9ShqeEydOUEqARohZ0obP9pkSAAAuBrOkDZ8RC10BAAAoJQAAwAiUEgAAYATWlAANEAsCAXgjSgnQALEgEIA34vQNAAAwAqUEAAAYgVICAACMQCkBAABGoJQAAAAjUEoAAIARKCUAAMAIlBIAAGAESgkAADACpQQAABiBUgIAAIxAKQEAAEaglAAAACNQSgAAgBEoJQAAwAiUEgAAYARKCQAAMAKlBAAAGIFSAgAAjNDE7gBomCzLUllZmXO/efPmcjgcNiYCAJiOUoI6UVZWpmHDhjn3165dq6CgIBsTAQBMRylBo8aMDgCYg1KCRo0ZHQAwBwtdAQCAESglAADACJQSAABgBEoJAAAwAqUEAAAYgatvAHBpdCPGf3uYhFJSx/gfHt6AS6MbL0//t+dnHi4FpaSO8cMeQGPCzzxcCtaUAAAAI1BKAACAESglAADACKwpAYA6wIJPoPYoJbXEDxoAF4MFn0DtUUpqiR80AADUDdaUAAAAI1BKAACAESglAADACJQSAABgBEoJAAAwgu2l5Pvvv9cf//hHhYeHKzAwUN26ddPu3budj1uWpenTpysqKkqBgYFKSkrSwYMHbUwMAADqgq2l5Mcff1RiYqKaNm2q9evX69NPP9W8efMUFhbmfM7cuXP19NNPa8mSJdqxY4eaN2+u5ORknTp1ysbkAADA02y9T8kTTzyhmJgY5eTkOI+1a9fO+bFlWVq4cKEefvhh571BXnzxRUVERGjNmjW644476j0zAACoG7bOlLz11lu69tprddttt6l169bq2bOnli1b5nz8m2++UVFRkZKSkpzHQkJC1Lt3b23btu2cY5aXl6u0tNRluxDLsnTixAnnZlnWpX9xAACgVmwtJV9//bUWL16sjh07auPGjbrnnns0adIkvfDCC5KkoqIiSVJERITL50VERDgf+7WsrCyFhIQ4t5iYmAvmOHuX1rPbL28jDwAA6oetpaSqqkq9evXS7Nmz1bNnT911113605/+pCVLlrg9ZmZmpkpKSpzboUOHPJgYAADUFVtLSVRUlK666iqXY126dFFBQYEkKTIyUpJUXFzs8pzi4mLnY7/m7++v4OBglw0AYB5OnePXbC0liYmJOnDggMuxL774QnFxcZJ+XvQaGRmp3Nxc5+OlpaXasWOHEhIS6jUrAMCzOHWOX7P16pupU6eqb9++mj17tm6//Xbt3LlTS5cu1dKlSyVJDodDU6ZM0WOPPaaOHTuqXbt2mjZtmqKjozV8+HA7owMAAA+ztZRcd911Wr16tTIzMzVr1iy1a9dOCxcu1KhRo5zPuf/++1VWVqa77rpLx44dU79+/bRhwwYFBATYmBwAAHiaraVEkm6++WbdfPPNNT7ucDg0a9YszZo1qx5TATCRZVkuU/zNmzeXw+GwMREAT7K9lADAxTq7BuGstWvXKigoyMZEADyJUgLUgp1/qTNLAKCho5QAtWDnX+oXeu2CWd2cH58845DUyrn/3dy+atbk35dbxk7fV7dhAcANlBIAuAjMVAF1j1ICABeB9Sz1L3FRouuBCilA/77yMvm5ZMnv54//MfEf9ZgMdYVSAgCABzGr5j5KCQAAHsSsmvtsvc08AADAWZQSAABgBEoJAAAwAmtKAKCBYaElvBWlBJL4IQbP4d+S/VhoCW9FKYEkfojBc/i3BMBdrCkBAABGoJQAAAAjUEoAAIARKCUAAMAILHQFAMNxRRMaC0oJABiOK5rQWFBKgDrCX7cAUDuUEhgvcVGi64EKKUABzt3k55Ilv58//sfEf9RjsvPjr1vAbPzhYB5KCQCgUeIPB/NQStDoxN/3ovNjx5kKhfzisQHTVslq4ufcz39ydD0mA4DGjVICj/jlL3qJX/YAgNrjPiUAAMAIzJQ0YCziAgB4E0pJA8YiLtTEW69oAtCwUUpgO2Z0YBf+7QFmoZTAdszowC782wPMQikBAOACmFWrH5QSAAAugFm1+sElwQAAwAiUEgAAYARO3wCAB3jLZda1ySlxSTjqF6UEAOA23mICnsTpGwAAYARKCQAAMAKlBAAAGIFSAgAAjEApAQAARuDqmwsomNXNZf/kGYekVs797+b2VbMmlnM/dvq++ooGAECDwkwJAAAwAqUEAAAYgVICAACMwJoSD/OWW00DAGAaZkoAAIARmCkBYCvLslRWVubcb968uRwOh42JANiFUgLAVmVlZRo2bJhzf+3atQoKCrIxEQC72Hr65pFHHpHD4XDZOnfu7Hz81KlTSk9PV3h4uIKCgpSSkqLi4mIbEwMAgLpi+5qSq6++WoWFhc5t69atzsemTp2qdevW6bXXXlNeXp4OHz6sESNG2JgWAADUFdtP3zRp0kSRkZHVjpeUlGj58uVauXKlBg4cKEnKyclRly5dtH37dvXp06e+owIAgDpk+0zJwYMHFR0drfbt22vUqFEqKCiQJOXn5+v06dNKSkpyPrdz586KjY3Vtm3bahyvvLxcpaWlLhsAADCfrTMlvXv31ooVK9SpUycVFhZq5syZ6t+/v/bv36+ioiL5+fkpNDTU5XMiIiJUVFRU45hZWVmaOXNmHScHUB9qc98fiXv/AN7O1lIyZMgQ58fdu3dX7969FRcXp1dffVWBgYFujZmZmamMjAznfmlpqWJiYi45KwAAqFu2n775pdDQUF155ZX68ssvFRkZqYqKCh07dszlOcXFxedcg3KWv7+/goODXTYAAGA+o0rJiRMn9NVXXykqKkrx8fFq2rSpcnNznY8fOHBABQUFSkhIsDElAACoC7aevrn33ns1dOhQxcXF6fDhw5oxY4Z8fX2VmpqqkJAQjR8/XhkZGWrZsqWCg4M1ceJEJSQkcOUNAAANkK2l5LvvvlNqaqr+9a9/qVWrVurXr5+2b9+uVq1aSZIWLFggHx8fpaSkqLy8XMnJyXr22WftjAwAAOqIraVk1apV5308ICBA2dnZys7O9ujrxt/3osu+40yFQn6xP2DaKllNfl7Sv7qFR18aqBeBvpayE4+67AOA6Wy/eRpgsoJZ3Vz2T55xSGrl3P9ubl81a/LzL/zY6fvqM9p5ORxy5gIAb0EpAYAa/LKUnq+QSpLC7LvSrzbl2c6cwIVQSrwIb/EOAGZyudEfN/lzG6XEi/AW7wCAhsyo+5QAAIDGi1ICAACMwOkbAEC98ZbFw7AHMyUAAMAIzJQAhqrNTf4ke2/098usF8qZ/+ToekwGwJswUwIAAIxAKQEAAEaglAAAACNQSgAAgBEoJQAAwAhcfQMABqrNFU12XnkFeBKlBECj4U2XWQONEadvAACAESglAADACJy+ATwkcVGi64EKKUABzt3k55Klf58Z0D8m/qOekgGAd6CUNFL8AgUAmIbTNwAAwAjMlKBRs3ybqqR7qst+o9RUOvX7Uy77AFDfKCVo3BwOl0tAGy2HXE7XAYAdOH0DAACMwEwJ6gSnRQB4HKcZGzxKCeoGp0XQ2PEL1PMu8TRjbe7om//kaPdfCG6jlDQgBbO6ueyfPOOQ1Mq5/93cvmrWxPp5Jyy4HpMBjRDrdIBaY00JAAAwAjMlANCYcZoJBqGUAEBj5uHTTCxyx6WglMAWv1z/ct61LxLrXwBvwiJ3XAJKCQAAv1KrCwck/njyEBa6AgAAIzBTAqBecek6gJowUwIAAIxAKQEAAEaglAAAACNQSgAAgBEoJQAAwAiUEgAAYAQuCYb34b06AKBBopTA+/CW8ADQIHH6BgAAGIFSAgAAjMDpm1oK9LWUnXjUZR8AAFw6SkktORxyfWdIAADgEZQSAN6DK6+ABo1SAsB7cOUV0KC5tdB1xowZ+vbbbz0aZM6cOXI4HJoyZYrz2KlTp5Senq7w8HAFBQUpJSVFxcXFHn1dAABgBrdKydq1a9WhQwcNGjRIK1euVHl5+SWF2LVrl5577jl1797d5fjUqVO1bt06vfbaa8rLy9Phw4c1YsSIS3qtevf/081nN6abAQA4N7dKyd69e7Vr1y5dffXVmjx5siIjI3XPPfdo165dtR7rxIkTGjVqlJYtW6awsDDn8ZKSEi1fvlzz58/XwIEDFR8fr5ycHH3wwQfavn27O7HtcXa6+ezmsDcOAACmcvs+JT179tTTTz+tw4cPa/ny5fruu++UmJio7t2766mnnlJJSclFjZOenq6bbrpJSUlJLsfz8/N1+vRpl+OdO3dWbGystm3bVuN45eXlKi0tddkAAID5LvnmaZZl6fTp06qoqJBlWQoLC9MzzzyjmJgYvfLKK+f93FWrVmnPnj3Kysqq9lhRUZH8/PwUGhrqcjwiIkJFRUU1jpmVlaWQkBDnFhMT49bXBQAA6pfbpSQ/P18TJkxQVFSUpk6dqp49e+qzzz5TXl6eDh48qMcff1yTJk2q8fMPHTqkyZMn6+WXX1ZAQIC7MarJzMxUSUmJczt06JDHxgYAb3D2Jo9nN27yCG/h1iXB3bp10+eff67Bgwdr+fLlGjp0qHx9fV2ek5qaqsmTJ9c4Rn5+vo4cOaJevXo5j1VWVmrLli165plntHHjRlVUVOjYsWMusyXFxcWKjIyscVx/f3/5+/u782UBQIPATR7hrdwqJbfffrvS0tJ0+eWX1/icyy67TFVVVTU+PmjQIO3bt8/l2Lhx49S5c2c98MADiomJUdOmTZWbm6uUlBRJ0oEDB1RQUKCEhAR3YgMAAIO5VUrOrh35tZ9++klPPvmkpk+ffsExWrRooa5du7oca968ucLDw53Hx48fr4yMDLVs2VLBwcGaOHGiEhIS1KdPH3diA4DbeN8roO65taZk5syZOnHiRLXjJ0+e1MyZMy851FkLFizQzTffrJSUFN1www2KjIzUm2++6bHxAeBinT0lcnZzcHk/4HFuz5Q4zvF/5EcffaSWLVu6HWbz5s0u+wEBAcrOzlZ2drbbYwKexF/LAFB3alVKwsLC5HA45HA4dOWVV7oUk8rKSp04cUJ33323x0M2ZvH3vej82HGmQiG/eGzAtFWymvz7jUBWt6jHYI0UCwgBoO7UqpQsXLhQlmUpLS1NM2fOVEjIv39F+vn5qW3btixCBQA0brybtdtqVUrGjBkjSWrXrp369u2rpk35TgMA4IJ3s3bbRS90/eXt2nv27Kmffvqp2u3cua07AAD2ef3119WtWzcFBgYqPDxcSUlJysvLU9OmTavdDX3KlCnq37+/JGnFihUKDQ3V22+/rU6dOqlZs2a69dZbdfLkSb3wwgtq27atwsLCNGnSJFVWVtZZ/oueKQkLC1NhYaFat26t0NDQcy50PbsAti4DAzCb5dtUJd1TXfZN5U1ZgQspLCxUamqq5s6dq1tuuUXHjx/X//zP/yg+Pl7t27fXSy+9pPvuu0+SdPr0ab388suaO3eu8/NPnjypp59+WqtWrdLx48c1YsQI3XLLLQoNDdV///d/6+uvv1ZKSooSExM1cuTIOvkaLrqUvPfee84ra95///06CQOgAXA4XBZgG82bsgIXUFhYqDNnzmjEiBGKi4uT9PMd2KWf7/uVk5PjLCXr1q3TqVOndPvttzs///Tp01q8eLE6dOggSbr11lv10ksvqbi4WEFBQbrqqqv0m9/8Ru+//779peTGG290ftyuXTvFxMRUmy2xLIv3mgEAwAY9evTQoEGD1K1bNyUnJ2vw4MG69dZbFRYWprFjx+rhhx/W9u3b1adPH61YsUK33367mjdv7vz8Zs2aOQuJ9PMb4LZt21ZBQUEux44cOVJnX4NbN09r166djh49Wu34Dz/8oHbt2l1yKAAAUDu+vr7atGmT1q9fr6uuukqLFi1Sp06d9M0336h169YaOnSocnJyVFxcrPXr1ystLc3l83998YrD4TjnsfO9hcyl8ujN006cOOHRd/wFALD2BRfP4XAoMTFRiYmJmj59uuLi4rR69WplZGTozjvvVGpqqtq0aaMOHTooMTHR7rjV1KqUZGRkSPr5i542bZqaNWvmfKyyslI7duzQNddc49GAANDosfYFF2HHjh3Kzc3V4MGD1bp1a+3YsUNHjx5Vly5dJEnJyckKDg7WY489plmzZtmc9txqVUo+/PBDST/PlOzbt09+fv/+n8TPz089evTQvffe69mE9YC/QgAA3i44OFhbtmzRwoULVVpaqri4OM2bN09DhgyRJPn4+Gjs2LGaPXu2Ro8ebXPac6tVKTl71c24ceP01FNPKTg4uE5C1Tv+CgEAeLkuXbpow4YN533O999/r9///veKiopyOT527FiNHTvW5dgjjzyiRx55xOXYihUrPJC0Zm6tKcnJyfF0DgBAI8MbXNafkpIS7du3TytXrtRbb71ld5wauVVKysrKNGfOHOXm5urIkSPVVuJ+/fXXHgkHAGi4eIPL+jNs2DDt3LlTd999t37729/aHadGbpWSO++8U3l5efqP//gPRUVFnfNKHACexdonAO7avHmz3REuilulZP369fr73/9u5OVEQIPF2icADZxbpSQsLMx5y3kANeDtywGgVty6o+ujjz6q6dOn6+TJk57OAzQcZ9++/OzGWU4AOC+3ZkrmzZunr776ynlf/F/fhnbPnj0eCQcAABoPt0rJ8OHDPRwDAAA0dm6VkhkzZng6B4BGintVADjLrVICAJ7CvSqAmsXf92K9vl7+k7W//fyAAQN0zTXXaOHChZf8+m6VksrKSi1YsECvvvqqCgoKVFFR4fL4Dz/8cMnBAABA4+LW1TczZ87U/PnzNXLkSJWUlCgjI0MjRoyQj49PtfvkAwAAXAy3SsnLL7+sZcuW6S9/+YuaNGmi1NRUPf/885o+fbq2b9/u6Yxw09lz9We3856r//97apzduKcGAOBinTlzRhMmTFBISIguu+wyTZs2TZZV+9OybpWSoqIidevWTZIUFBSkkpISSdLNN9+sv//97+4MiTpw9lz92e287wbAPTUAAG564YUX1KRJE+3cuVNPPfWU5s+fr+eff77W47hVStq0aaPCwkJJUocOHfTOO+9Iknbt2iV/f393hgQAAF4qJiZGCxYsUKdOnTRq1ChNnDhRCxYsqPU4bpWSW265Rbm5uZKkiRMnatq0aerYsaNGjx6ttLQ0d4YEAABeqk+fPi5vzpuQkKCDBw+qsrKyVuO4dfXNnDlznB+PHDlSsbGx2rZtmzp27KihQ4e6MyQAAGjkPHKfkoSEBCUkJHhiKAAA4GV27Njhsr99+3Z17NhRvr6+tRrHrVLy4ovnv5nL6NG1v/kKAADwTgUFBcrIyNCf//xn7dmzR4sWLdK8efNqPY5bpWTy5Mku+6dPn9bJkyfl5+enZs2aUUoAAPAAd+6waofRo0frp59+0vXXXy9fX19NnjxZd911V63HcauU/Pjjj9WOHTx4UPfcc4/uu+8+d4YEAKBeWb5NVdI91WW/JrxHU802b97s/Hjx4sWXNJZbV9+cS8eOHTVnzpxqsygAABjJ4ZDVxM+5ne9mTrW67xPc5rFSIklNmjTR4cOHPTkkAABoJNw6ffPWW2+57FuWpcLCQj3zzDNKTEz0SDA0HkyLAgAkN0vJ8OHDXfYdDodatWqlgQMHurXaFo0bb10PAJDcLCVVVVWezgEAABo5t0pJRkbGRT93/vz57rwEAABoZNwqJR9++KH27NmjM2fOqFOnTpKkL774Qr6+vurVq5fzeQ6WJwMAgIvkVikZOnSoWrRooRdeeEFhYWGSfr53ybhx49S/f3/95S9/8WhIAADQ8Ll1SfC8efOUlZXlLCSSFBYWpscee4yFrgAAwC1uzZSUlpbq6NGj1Y4fPXpUx48fv+RQAABAKpjVrV5fL3b6vnp9vV9za6bklltu0bhx4/Tmm2/qu+++03fffac33nhD48eP14gRIzydEQAANAJulZIlS5ZoyJAh+sMf/qC4uDjFxcXpD3/4g373u9/p2Wef9XRGAABgqKqqKs2dO1dXXHGF/P39FRsbq8cff9ytsdw6fdOsWTM9++yzevLJJ/XVV19Jkjp06KDmzZu7FQIAAHinzMxMLVu2TAsWLFC/fv1UWFiozz//3K2x3ColZzVv3lzdu3e/lCEAAICXOn78uJ566ik988wzGjNmjKSfJyn69evn1ngefUM+AADQeHz22WcqLy/XoEGDPDKeraVk8eLF6t69u4KDgxUcHKyEhAStX7/e+fipU6eUnp6u8PBwBQUFKSUlRcXFxTYmBgAAZwUGBnp0PFtLSZs2bTRnzhzl5+dr9+7dGjhwoIYNG6ZPPvlEkjR16lStW7dOr732mvLy8nT48GGu7gEAwBAdO3ZUYGCgcnNzPTLeJa0puVRDhw512X/88ce1ePFibd++XW3atNHy5cu1cuVKDRw4UJKUk5OjLl26aPv27erTp48dkQEAwP8LCAjQAw88oPvvv19+fn5KTEzU0aNH9cknn2j8+PG1Hs/WUvJLlZWVeu2111RWVqaEhATl5+fr9OnTSkpKcj6nc+fOio2N1bZt22osJeXl5SovL3ful5aW1nl2AADqgt03M7sY06ZNU5MmTTR9+nQdPnxYUVFRuvvuu90ay/ZSsm/fPiUkJOjUqVMKCgrS6tWrddVVV2nv3r3y8/NTaGioy/MjIiJUVFRU43hZWVmaOXNmHacGAACS5OPjo4ceekgPPfTQpY/lgTyXpFOnTtq7d6927Nihe+65R2PGjNGnn37q9niZmZkqKSlxbocOHfJgWgAAUFdsnynx8/PTFVdcIUmKj4/Xrl279NRTT2nkyJGqqKjQsWPHXGZLiouLFRkZWeN4/v7+8vf3r+vYAADAw2yfKfm1qqoqlZeXKz4+Xk2bNnVZ0XvgwAEVFBQoISHBxoQAAKAu2DpTkpmZqSFDhig2NlbHjx/XypUrtXnzZm3cuFEhISEaP368MjIy1LJlSwUHB2vixIlKSEjgyhsAABogW0vJkSNHNHr0aBUWFiokJETdu3fXxo0b9dvf/laStGDBAvn4+CglJUXl5eVKTk7mDf8AAGigbC0ly5cvP+/jAQEBys7OVnZ2dj0lMpvl21Ql3VNd9gEAaChsX+iKWnA4ZDXxszsFAAB1wriFrgAAoHGilAAAACNQSgAAgBFYUwIAgKESFyXW6+v9Y+I/6vX1fo2ZEgAAYARKCQAAcNuGDRvUr18/hYaGKjw8XDfffLO++uort8ailAAAALeVlZUpIyNDu3fvVm5urnx8fHTLLbeoqqqq1mOxpgQAALgtJSXFZf+vf/2rWrVqpU8//VRdu3at1VjMlAAAALcdPHhQqampat++vYKDg9W2bVtJUkFBQa3HYqYEAAC4bejQoYqLi9OyZcsUHR2tqqoqde3aVRUVFbUei1ICAADc8q9//UsHDhzQsmXL1L9/f0nS1q1b3R6PUgIAANwSFham8PBwLV26VFFRUSooKNCDDz7o9niUEgAADGX3zcwuxMfHR6tWrdKkSZPUtWtXderUSU8//bQGDBjg1niUEgAA4LakpCR9+umnLscsy3JrLK6+AQAARqCUAAAAI1BKAACAESglAADACJQSAABgBEoJAAAwAqUEAAAYgVICAACMQCkBAABGoJQAAAAjUEoAAIARKCUAAMAIlBIAAGAESgkAADACpQQAABiBUgIAAIxAKQEAAEaglAAAACNQSgAAgBEoJQAAwAiUEgAAYARKCQAAMAKlBAAAGIFSAgAAjEApAQAARqCUAAAAI1BKAACAESglAADACJQSAABgBEoJAAAwAqUEAAAYgVICAACMQCkBAABGsLWUZGVl6brrrlOLFi3UunVrDR8+XAcOHHB5zqlTp5Senq7w8HAFBQUpJSVFxcXFNiUGAAB1xdZSkpeXp/T0dG3fvl2bNm3S6dOnNXjwYJWVlTmfM3XqVK1bt06vvfaa8vLydPjwYY0YMcLG1AAAoC40sfPFN2zY4LK/YsUKtW7dWvn5+brhhhtUUlKi5cuXa+XKlRo4cKAkKScnR126dNH27dvVp08fO2IDAIA6YNSakpKSEklSy5YtJUn5+fk6ffq0kpKSnM/p3LmzYmNjtW3btnOOUV5ertLSUpcNAACYz5hSUlVVpSlTpigxMVFdu3aVJBUVFcnPz0+hoaEuz42IiFBRUdE5x8nKylJISIhzi4mJqevoAADAA4wpJenp6dq/f79WrVp1SeNkZmaqpKTEuR06dMhDCQEAQF2ydU3JWRMmTNDbb7+tLVu2qE2bNs7jkZGRqqio0LFjx1xmS4qLixUZGXnOsfz9/eXv71/XkQEAgIfZOlNiWZYmTJig1atX67333lO7du1cHo+Pj1fTpk2Vm5vrPHbgwAEVFBQoISGhvuMCAIA6ZOtMSXp6ulauXKm1a9eqRYsWznUiISEhCgwMVEhIiMaPH6+MjAy1bNlSwcHBmjhxohISErjyBgCABsbWUrJ48WJJ0oABA1yO5+TkaOzYsZKkBQsWyMfHRykpKSovL1dycrKeffbZek4KAADqmq2lxLKsCz4nICBA2dnZys7OrodEAADALsZcfQMAABo3SgkAADACpQQAABiBUgIAAIxAKQEAAEaglAAAACNQSgAAgBEoJQAAwAiUEgAAYARKCQAAMAKlBAAAGIFSAgAAjEApAQAARqCUAAAAI1BKAACAESglAADACJQSAABgBEoJAAAwAqUEAAAYgVICAACMQCkBAABGoJQAAAAjUEoAAIARKCUAAMAIlBIAAGAESgkAADACpQQAABiBUgIAAIxAKQEAAEaglAAAACNQSgAAgBEoJQAAwAiUEgAAYARKCQAAMAKlBAAAGIFSAgAAjEApAQAARqCUAAAAI1BKAACAESglAADACJQSAABgBEoJAAAwAqUEAAAYgVICAACMQCkBAABGoJQAAAAjUEoAAIARKCUAAMAItpaSLVu2aOjQoYqOjpbD4dCaNWtcHrcsS9OnT1dUVJQCAwOVlJSkgwcP2hMWAADUKVtLSVlZmXr06KHs7OxzPj537lw9/fTTWrJkiXbs2KHmzZsrOTlZp06dquekAACgrjWx88WHDBmiIUOGnPMxy7K0cOFCPfzwwxo2bJgk6cUXX1RERITWrFmjO+64oz6jAgCAOmbsmpJvvvlGRUVFSkpKch4LCQlR7969tW3btho/r7y8XKWlpS4bAAAwn7GlpKioSJIUERHhcjwiIsL52LlkZWUpJCTEucXExNRpTgAA4BnGlhJ3ZWZmqqSkxLkdOnTI7kgAAOAiGFtKIiMjJUnFxcUux4uLi52PnYu/v7+Cg4NdNgAAYD5jS0m7du0UGRmp3Nxc57HS0lLt2LFDCQkJNiYDAAB1wdarb06cOKEvv/zSuf/NN99o7969atmypWJjYzVlyhQ99thj6tixo9q1a6dp06YpOjpaw4cPty80AACoE7aWkt27d+s3v/mNcz8jI0OSNGbMGK1YsUL333+/ysrKdNddd+nYsWPq16+fNmzYoICAALsiAwCAOmJrKRkwYIAsy6rxcYfDoVmzZmnWrFn1mAoAANjB2DUlAACgcaGUAAAAI1BKAACAESglAADACJQSAABgBEoJAAAwAqUEAAAYgVICAACMQCkBAABGoJQAAAAjUEoAAIARKCUAAMAIlBIAAGAESgkAADACpQQAABiBUgIAAIxAKQEAAEaglAAAACNQSgAAgBEoJQAAwAiUEgAAYARKCQAAMAKlBAAAGIFSAgAAjEApAQAARqCUAAAAI1BKAACAESglAADACJQSAABgBEoJAAAwAqUEAAAYgVICAACMQCkBAABGoJQAAAAjUEoAAIARKCUAAMAIlBIAAGAESgkAADACpQQAABiBUgIAAIxAKQEAAEaglAAAACNQSgAAgBEoJQAAwAiUEgAAYARKCQAAMAKlBAAAGIFSAgAAjOAVpSQ7O1tt27ZVQECAevfurZ07d9odCQAAeJjxpeSVV15RRkaGZsyYoT179qhHjx5KTk7WkSNH7I4GAAA8yPhSMn/+fP3pT3/SuHHjdNVVV2nJkiVq1qyZ/vrXv9odDQAAeFATuwOcT0VFhfLz85WZmek85uPjo6SkJG3btu2cn1NeXq7y8nLnfklJiSSptLTUeayy/KeLznC8aWWtMp/56cxFP/eXmWpSV1lrk1O6cNba5JTqLivf04vD9/Ti8D3le3qxLjVrixYt5HA4avWaDZHDsizL7hA1OXz4sC6//HJ98MEHSkhIcB6///77lZeXpx07dlT7nEceeUQzZ86sz5gAAFySkpISBQcH2x3DdkbPlLgjMzNTGRkZzv2qqir98MMPCg8P91gLLS0tVUxMjA4dOmT8PyJvyeotOSXvyeotOSXvyeotOSXvyeotOaW6zdqiRQuPjuetjC4ll112mXx9fVVcXOxyvLi4WJGRkef8HH9/f/n7+7scCw0NrZN8wcHBxv9PdJa3ZPWWnJL3ZPWWnJL3ZPWWnJL3ZPWWnJJ3ZfU2Ri909fPzU3x8vHJzc53HqqqqlJub63I6BwAAeD+jZ0okKSMjQ2PGjNG1116r66+/XgsXLlRZWZnGjRtndzQAAOBBxpeSkSNH6ujRo5o+fbqKiop0zTXXaMOGDYqIiLAtk7+/v2bMmFHtNJGJvCWrt+SUvCert+SUvCert+SUvCert+SUvCurtzL66hsAANB4GL2mBAAANB6UEgAAYARKCQAAMAKlBAAAGIFS4obs7Gy1bdtWAQEB6t27t3bu3Gl3pGq2bNmioUOHKjo6Wg6HQ2vWrLE70jllZWXpuuuuU4sWLdS6dWsNHz5cBw4csDtWNYsXL1b37t2dN01KSEjQ+vXr7Y51UebMmSOHw6EpU6bYHcXFI488IofD4bJ17tzZ7lg1+v777/XHP/5R4eHhCgwMVLdu3bR79267Y1XTtm3bat9Xh8Oh9PR0u6O5qKys1LRp09SuXTsFBgaqQ4cOevTRR2XitRfHjx/XlClTFBcXp8DAQPXt21e7du2yO1aDRCmppVdeeUUZGRmaMWOG9uzZox49eig5OVlHjhyxO5qLsrIy9ejRQ9nZ2XZHOa+8vDylp6dr+/bt2rRpk06fPq3BgwerrKzM7mgu2rRpozlz5ig/P1+7d+/WwIEDNWzYMH3yySd2RzuvXbt26bnnnlP37t3tjnJOV199tQoLC53b1q1b7Y50Tj/++KMSExPVtGlTrV+/Xp9++qnmzZunsLAwu6NVs2vXLpfv6aZNmyRJt912m83JXD3xxBNavHixnnnmGX322Wd64oknNHfuXC1atMjuaNXceeed2rRpk1566SXt27dPgwcPVlJSkr7//nu7ozU8Fmrl+uuvt9LT0537lZWVVnR0tJWVlWVjqvOTZK1evdruGBflyJEjliQrLy/P7igXFBYWZj3//PN2x6jR8ePHrY4dO1qbNm2ybrzxRmvy5Ml2R3IxY8YMq0ePHnbHuCgPPPCA1a9fP7tjuGXy5MlWhw4drKqqKrujuLjpppustLQ0l2MjRoywRo0aZVOiczt58qTl6+trvf322y7He/XqZT300EM2pWq4mCmphYqKCuXn5yspKcl5zMfHR0lJSdq2bZuNyRqOkpISSVLLli1tTlKzyspKrVq1SmVlZUa/3UF6erpuuukml3+vpjl48KCio6PVvn17jRo1SgUFBXZHOqe33npL1157rW677Ta1bt1aPXv21LJly+yOdUEVFRX629/+prS0NI+9Iamn9O3bV7m5ufriiy8kSR999JG2bt2qIUOG2JzM1ZkzZ1RZWamAgACX44GBgcbO7Hkz4+/oapL//d//VWVlZbW7yUZEROjzzz+3KVXDUVVVpSlTpigxMVFdu3a1O041+/btU0JCgk6dOqWgoCCtXr1aV111ld2xzmnVqlXas2eP0ee9e/furRUrVqhTp04qLCzUzJkz1b9/f+3fv9+4d0z9+uuvtXjxYmVkZOg///M/tWvXLk2aNEl+fn4aM2aM3fFqtGbNGh07dkxjx461O0o1Dz74oEpLS9W5c2f5+vqqsrJSjz/+uEaNGmV3NBctWrRQQkKCHn30UXXp0kURERH6r//6L23btk1XXHGF3fEaHEoJjJGenq79+/cb+9dHp06dtHfvXpWUlOj111/XmDFjlJeXZ1wxOXTokCZPnqxNmzZV++vOJL/8i7h79+7q3bu34uLi9Oqrr2r8+PE2JquuqqpK1157rWbPni1J6tmzp/bv368lS5YYXUqWL1+uIUOGKDo62u4o1bz66qt6+eWXtXLlSl199dXau3evpkyZoujoaOO+py+99JLS0tJ0+eWXy9fXV7169VJqaqry8/PtjtbgUEpq4bLLLpOvr6+Ki4tdjhcXFysyMtKmVA3DhAkT9Pbbb2vLli1q06aN3XHOyc/Pz/mXUXx8vHbt2qWnnnpKzz33nM3JXOXn5+vIkSPq1auX81hlZaW2bNmiZ555RuXl5fL19bUx4bmFhobqyiuv1Jdffml3lGqioqKqlc8uXbrojTfesCnRhX377bd699139eabb9od5Zzuu+8+Pfjgg7rjjjskSd26ddO3336rrKws40pJhw4dlJeXp7KyMpWWlioqKkojR45U+/bt7Y7W4LCmpBb8/PwUHx+v3Nxc57Gqqirl5uYavbbAZJZlacKECVq9erXee+89tWvXzu5IF62qqkrl5eV2x6hm0KBB2rdvn/bu3evcrr32Wo0aNUp79+41spBI0okTJ/TVV18pKirK7ijVJCYmVrtU/YsvvlBcXJxNiS4sJydHrVu31k033WR3lHM6efKkfHxcfwX5+vqqqqrKpkQX1rx5c0VFRenHH3/Uxo0bNWzYMLsjNTjMlNRSRkaGxowZo2uvvVbXX3+9Fi5cqLKyMo0bN87uaC5OnDjh8hfnN998o71796ply5aKjY21MZmr9PR0rVy5UmvXrlWLFi1UVFQkSQoJCVFgYKDN6f4tMzNTQ4YMUWxsrI4fP66VK1dq8+bN2rhxo93RqmnRokW1NTnNmzdXeHi4UWt17r33Xg0dOlRxcXE6fPiwZsyYIV9fX6WmptodrZqpU6eqb9++mj17tm6//Xbt3LlTS5cu1dKlS+2Odk5VVVXKycnRmDFj1KSJmT/mhw4dqscff1yxsbG6+uqr9eGHH2r+/PlKS0uzO1o1GzdulGVZ6tSpk7788kvdd9996ty5s3E/9xsEuy//8UaLFi2yYmNjLT8/P+v666+3tm/fbnekat5//31LUrVtzJgxdkdzca6MkqycnBy7o7lIS0uz4uLiLD8/P6tVq1bWoEGDrHfeecfuWBfNxEuCR44caUVFRVl+fn7W5Zdfbo0cOdL68ssv7Y5Vo3Xr1lldu3a1/P39rc6dO1tLly61O1KNNm7caEmyDhw4YHeUGpWWllqTJ0+2YmNjrYCAAKt9+/bWQw89ZJWXl9sdrZpXXnnFat++veXn52dFRkZa6enp1rFjx+yO1SA5LMvA2+cBAIBGhzUlAADACJQSAABgBEoJAAAwAqUEAAAYgVICAACMQCkBAABGoJQAAAAjUEoAAIARKCUAajR27FgNHz7c7hgAGgnu6AqgRiUlJbIsS6GhoXZHAdAIUEoAAIAROH0DQK+//rq6deumwMBAhYeHKykpSWVlZS6nb/75z3/K4XBU2wYMGOAcZ+vWrerfv78CAwMVExOjSZMmqayszJ4vCoDXoZQAjVxhYaFSU1OVlpamzz77TJs3b9aIESP060nUmJgYFRYWOrcPP/xQ4eHhuuGGGyRJX331lX73u98pJSVFH3/8sV555RVt3bpVEyZMsOPLAuCFOH0DNHJ79uxRfHy8/vnPfyouLs7lsbFjx+rYsWNas2aNy/FTp05pwIABatWqldauXSsfHx/deeed8vX11XPPPed83tatW3XjjTeqrKxMAQEB9fHlAPBiTewOAMBePXr00KBBg9StWzclJydr8ODBuvXWWxUWFlbj56Slpen48ePatGmTfHx+nnD96KOP9PHHH+vll192Ps+yLFVVVembb75Rly5d6vxrAeDdKCVAI+fr66tNmzbpgw8+0DvvvKNFixbpoYce0o4dO875/Mcee0wbN27Uzp071aJFC+fxEydO6M9//rMmTZpU7XNiY2PrLD+AhoPTNwBcVFZWKi4uThkZGfr4449dTt+88cYbSk1N1fr16zVo0CCXzxs1apSKi4v17rvv2pAaQEPAQlegkduxY4dmz56t3bt3q6CgQG+++aaOHj1a7XTL/v37NXr0aD3wwAO6+uqrVVRUpKKiIv3www+SpAceeEAffPCBJkyYoL179+rgwYNau3YtC10BXDRKCdDIBQcHa8uWLfr973+vK6+8Ug8//LDmzZunIUOGuDxv9+7dOnnypB577DFFRUU5txEjRkiSunfvrry8PH3xxRfq37+/evbsqenTpys6OtqOLwuAF+L0DQAAMAIzJQAAwAiUEgAAYARKCQAAMAKlBAAAGIFSAgAAjEApAQAARqCUAAAAI1BKAACAESglAADACJQSAABgBEoJAAAwwv8B/wz2yI654EAAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import seaborn as sns\n", + "\n", + "sns.catplot(\n", + " kind='bar',\n", + " data=tab,\n", + " x='size',\n", + " y='quantity',\n", + " hue='sym'\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "c6c63b3f-c8a2-48d3-a63f-334af2c158ab", + "metadata": {}, + "source": [ + "Seaborn supports joining plots together, allowing the user access to another layer of visualisation." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "209a7f8c-94a7-4199-a79b-be21b7c9df6a", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.jointplot(data=tab, x=\"quantity\", y=\"price\", hue=\"sym\")\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/examples/db-management.ipynb b/docs/examples/db-management.ipynb index b58935d..115d747 100644 --- a/docs/examples/db-management.ipynb +++ b/docs/examples/db-management.ipynb @@ -5,14 +5,14 @@ "id": "015ba887", "metadata": {}, "source": [ - "# Introduction\n", + "# Database Creation and Management\n", "\n", "This notebook provides a walkthrough of some of the functionality available for users looking to create and maintain large databases using PyKX.\n", "\n", "In particular, this notebook refers to creating and maintaining [partitioned kdb+ databases](https://code.kx.com/q/kb/partition/). Go to [Q for Mortals](https://code.kx.com/q4m3/14_Introduction_to_Kdb+/#143-partitioned-tables) for more in-depth information about partitioned databases in kdb+.\n", "\n", - "You can download this walkthrough as a `.ipynb` notebook file using the following link.", - "\n", + "You can download this walkthrough as a `.ipynb` notebook file using the following link.\n", + "\n", "This walkthrough provides examples of the following tasks:\n", "\n", "1. Creating a database from a historical dataset\n", @@ -33,6 +33,22 @@ "Import all required libraries and create a temporary directory which will be used to store the database we create for this walkthrough" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "04341da6", + "metadata": { + "tags": [ + "hide_code" + ] + }, + "outputs": [], + "source": [ + "import os\n", + "os.environ['IGNORE_QHOME'] = '1' # Ignore symlinking PyKX q libraries to QHOME\n", + "os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation." + ] + }, { "cell_type": "code", "execution_count": 1, diff --git a/docs/extras/known_issues.md b/docs/extras/known_issues.md index b16d656..cc711d0 100644 --- a/docs/extras/known_issues.md +++ b/docs/extras/known_issues.md @@ -1,17 +1,12 @@ # Known Issues -- Enabling the NEP-49 numpy allocators will often segfault when running in a multiprocess setting. +- Enabling the NEP-49 NumPy allocators will often segfault when running in a multiprocess setting. - The timeout value is always set to `0` when using `PYKX_Q_LOCK`. - Enabling `PYKX_ALLOCATOR` and using PyArrow tables can cause segfaults in certain scenarios. - `kurl` functions require their `options` dictionary to have mixed type values. Add a `None` value to bypass: `{'': None, ...}` (See [docs](https://code.kx.com/insights/core/kurl/kurl.html)) -- Pandas 2.0 has deprecated the `datetime64[D/M]` types. - - Due to this change it is not always possible to determine if the resulting q Table should - use a `MonthVector` or a `DayVector`. In the scenario that it is not possible to determine - the expected type a warning will be raised and the `DayVector` type will be used as a - default. - `None` and `pykx.Identity(pykx.q('::'))` do not pass through to single argument Python functions set under q. See [here](../pykx-under-q/known_issues.md#default-parameter). - ``` + ```python >>> def func(n=2): ... return n ... diff --git a/docs/getting-started/installing.md b/docs/getting-started/installing.md index ce7e0db..55127ca 100644 --- a/docs/getting-started/installing.md +++ b/docs/getting-started/installing.md @@ -1,80 +1,114 @@ -# Installing +--- +title: PyKX installation guide +description: Getting started with PyKX +date: April 2024 +author: KX Systems, Inc., +tags: PyKX, setup, install, +--- +# PyKX installation guide -Installation of PyKX is available in using three methods +_This section explains how to install PyKX on your machine._ -1. Installing PyKX from PyPI -2. Installation from source -3. Installation using Anaconda +## Pre-requisites -??? Warning "Anaconda OS support" +Before you start, make sure you have: - PyKX on Anaconda is only supported for Linux x86 and arm based architectures at this time +- **Python** (versions 3.8-3.12) +- **Pip** -!!! Note Python Support +Recommended: a virtual environment with packages such as [venv](https://docs.python.org/3/library/venv.html) from the standard library. - PyKX is only officially supported on Python versions 3.8-3.12, Python 3.7 has reached end of life and is no longer actively supported, please consider upgrading +## Supported environments -=== "Installing PyKX from PyPI" - Ensure you have a recent version of `pip`: - - ``` - pip install --upgrade pip - ``` - - Then install the latest version of PyKX with the following command: +KX only supports versions of PyKX built by KX (installed from wheel files) for: - ``` - pip install pykx - ``` +- **Linux** (`manylinux_2_17_x86_64`, `linux-arm64`) with CPython 3.8-3.12 +- **macOS** (`macosx_10_10_x86_64`, `macosx_10_10_arm`) with CPython 3.8-3.12 +- **Windows** (`win_amd64`) with CPython 3.8-3.12 -=== "Installing PyKX from source" - Installing PyKX from source requires you to have access to a [github](https://github.com) account, once you have access to github you can clone the PyKX repository as follows +??? Note "Special instructions for Windows users." - ``` - git clone https://github.com/kxsystems/pykx - ``` + To run q or PyKX on Windows, you have two options: - Once cloned you can move into the cloned directory and install PyKX using `pip` + - **Install** `#!bash msvcr100.dll`, included in the [Microsoft Visual C++ 2010 Redistributable](https://www.microsoft.com/en-ca/download/details.aspx?id=26999). - ``` + - **Or Execute** `#!bash w64_install.ps1` supplied at the root of the PyKX GitHub [here](https://github.com/KxSystems/pykx) as follows, using PowerShell: + + ```PowerShell + git clone https://github.com/kxsystems/pykx cd pykx - pip install . + .\w64_install.ps1 ``` +We provide assistance to user-built installations of PyKX only on a best-effort basis. -=== "Installing PyKX from Anaconda" - If you use `conda` you can install PyKX from the `kx` channel on Anaconda as follows typing `y` when prompted to accept the installation +## 1. Install PyKX - ``` - conda install -c kx pykx - ``` +You can install PyKX from three sources: + +!!! Note "" + + === "Install PyKX from PyPI" + + Ensure you have a recent version of `#!bash pip`: -!!! Warning + ``` + pip install --upgrade pip + ``` + Then install the latest version of PyKX with the following command: + + ``` + pip install pykx + ``` + + === "Install PyKX from Anaconda" + + For Linux x86 and arm-based architectures, you can install PyKX from the `#!bash kx` channel on Anaconda as follows: + + ``` + conda install -c kx pykx + ``` + Type `#!bash y` when prompted to accept the installation. - Python packages should typically be installed in a virtual environment. [This can be done with the venv package from the standard library](https://docs.python.org/3/library/venv.html). -## PyKX License access and enablement + === "Install PyKX from GitHub" + + Clone the PyKX repository: + + ``` + git clone https://github.com/kxsystems/pykx + ``` + + Enter the cloned repository and install PyKX using `#!bash pip`: + + ``` + cd pykx + pip install . + ``` + +At this point you have [partial access to PyKX](../user-guide/advanced/modes.md#operating-in-the-absence-of-a-kx-license). To gain access to all PyKX features, follow the steps in the next section, otherwise go straight to [3. Verify PyKX Installation](#3-verify-pykx-installation). -Installation of PyKX following the instructions above provides users with access to the library with limited functional scope, full details of these limitations can be found [here](../user-guide/advanced/modes.md). To access the full functionality of PyKX you must first download and install a KX license, this can be achieved either through use of a personal evaluation license or receipt of a commercial license. +## 2. Install a KDB Insights license -!!! Warning "Legacy kdb+/q licenses do not support PyKX by default" +To use all PyKX functionalities, you need to download and install a KDB Insights license. - PyKX will not operate with a vanilla or legacy kdb+ license which does not have access to specific feature flags embedded within the license. In the absence of a license with appropriate feature flags PyKX will fail to initialise with full feature functionality. +!!! Warning "Legacy kdb+/q licenses do not support all PyKX features." -### License installation from a Python session +There are two types of KDB Insights licenses for PyKX: personal and commercial. For either of them, you have two installation options: -The following steps outline the process by which a user can gain access to and install a kdb Insights personal evaluation license for PyKX from a Python session. + - a) from Python + - b) using environment variables -??? Note "Commercial evaluation installation workflow" +### 2.a Install license in Python - The same workflow used for the personal evaluations defined below can be used for commercial evaluations, the only difference being the link used when signing up for your evaluation license. In the case of commercial evaluation this should be https://kx.com/kdb-insights-commercial-evaluation-license-download/ +Follow the steps below to install a KDB Insights license for PyKX from Python: -1. Start your Python session +1. Start your Python session: ```bash $ python ``` -2. Import the PyKX library which will prompt for user input accept this message using `Y` or hitting enter +2. Import the PyKX library. When prompted to accept the installation, type `Y` or press `Enter`: ```python >>> import pykx as kx @@ -85,140 +119,194 @@ The following steps outline the process by which a user can gain access to and i Would you like to continue with license installation? [Y/n]: ``` -3. You will then be prompted asking if you would like to redirect to the kdb Insights personal license installation website +3. Choose whether you wish to install a personal or commercial license, type `Y` or press `Enter` to choose a personal license - ```bash - To apply for a PyKX license, please visit https://kx.com/kdb-insights-personal-edition-license-download. - Once the license application has completed, you will receive a welcome email containing your license information. - Would you like to open this page? [Y/n]: + ```python + Is the intended use of this software for: + [1] Personal use (Default) + [2] Commercial use + Enter your choice here [1/2]: ``` -4. Ensure that you have completed the form for accessing a kdb Insights personal evaluation license and have received your welcome email. -5. Your will be prompted asking if you wish to install your license based on downloaded license file or using the base64 encoded string provided in your email as follows. Enter `1`, `2` or `3` as appropriate. +4. When asked if you would like to apply for a license, type `Y` or press `Enter`: + + === "Personal license" + + ```bash + To apply for a PyKX license, navigate to https://kx.com/kdb-insights-personal-edition-license-download + Shortly after you submit your license application, you will receive a welcome email containing your license information. + Would you like to open this page? [Y/n]: + ``` + + === "Commercial license" + + ```bash + To apply for your PyKX license, contact your KX sales representative or sales@kx.com. + Alternately apply through https://kx.com/book-demo. + Would you like to open this page? [Y/n]: + ``` + +5. For personal use, complete the form to receive your welcome email. For commercial use, the license will be provided over email after the commercial evaluation process has been followed with the support of your sales representative. + +6. Choose the desired method to activate your license by typing `1`, `2`, or `3` as appropriate: ```bash - Please select the method you wish to use to activate your license: - [1] Download the license file provided in your welcome email and input the file path (Default) - [2] Input the activation key (base64 encoded string) provided in your welcome email - [3] Proceed with unlicensed mode: + Select the method you wish to use to activate your license: + [1] Download the license file provided in your welcome email and input the file path (Default) + [2] Input the activation key (base64 encoded string) provided in your welcome email + [3] Proceed with unlicensed mode Enter your choice here [1/2/3]: ``` -6. Once you have decided on decided on your option please finish your installation following the appropriate final step below +7. Depending on your choice (`1`, `2`, or `3`), complete the installation by following the final step as below: === "1" - ```bash - Please provide the download location of your license (E.g., ~/path/to/kc.lic) : - ``` + === "Personal license" + + ```bash + Provide the download location of your license (for example, ~/path/to/kc.lic): + ``` + + === "Commercial license" + + ```bash + Provide the download location of your license (for example, ~/path/to/k4.lic): + ``` === "2" ```bash - Please provide your activation key (base64 encoded string) provided with your welcome email : + Provide your activation key (base64 encoded string) provided with your welcome email: ``` + === "3" -7. Validate that your license has been installed correctly + ```bash + No further actions needed. + ``` + +8. Validate the correct installation of your license: ```python >>> kx.q.til(10) pykx.LongVector(pykx.q('0 1 2 3 4 5 6 7 8 9')) ``` -!!! Note "Troubleshooting and Support" +### 2.b Install license with environment variables - If once you have completed these installation steps you are still seeing issues please visit our [troubleshooting](../troubleshooting.md) guide and [support](../support.md) pages. +For environment-specific flexibility, there are two ways to install your license: by using a file or by copying text. Both are sourced in your welcome email. Click on the tabs below, read the instructions, and choose the method you wish to follow: -### License installation using environment variables +!!! Note "" -To provide environment specific flexibility there are two methods by which users can install a license using environment variables. In both cases this method is flexible to the installation of both `kc.lic` and `k4.lic` versions of a license. + === "Using a file" -#### Using a supplied license file directly + 1. For personal usage, navigate to the [personal license](https://kx.com/kdb-insights-personal-edition-license-download/) and complete the form. For commercial usage, contact your KX sales representative or sales@kx.com or apply through https://kx.com/book-demo. -1. Visit [here](https://kx.com/kdb-insights-personal-edition-license-download/) for a personal edition or [here](https://kx.com/kdb-insights-commercial-evaluation-license-download/) for a commercial evaluation license and fill in the attached form following the instructions provided. -2. On receipt of an email from KX providing access to your license download the license file and save to a secure location on your computer. -3. Set an environment variable on your computer pointing to the folder containing the license file (instructions for setting environment variables on PyKX supported operating systems can be found [here](https://chlee.co/how-to-setup-environment-variables-for-windows-mac-and-linux/). - * Variable Name: `QLIC` - * Variable Value: `/user/path/to/folder` + 2. On receipt of an email from KX, download and save the license file to a secure location on your computer. -#### Using the base64 encoded license content + 3. Set an environment variable pointing to the folder with the license file. (Learn how to set environment variables from [here](https://chlee.co/how-to-setup-environment-variables-for-windows-mac-and-linux/)). + * **Variable Name**: `#!bash QLIC` + * **Variable Value**: `#!bash /user/path/to/folder` -1. Visit [here](https://kx.com/kdb-insights-personal-edition-license-download/) for a personal edition or [here](https://kx.com/kdb-insights-commercial-evaluation-license-download/) for a commercial evaluation license and fill in the attached form following the instructions provided. -2. On receipt of an email from KX providing access to your license copy the base64 encoded contents of your license provided in plain-text within the email -3. Set an environment variable `KDB_LICENSE_B64` on your computer pointing with the value copied in step 2 (instructions for setting environment variables on PyKX supported operating systems can be found [here](https://chlee.co/how-to-setup-environment-variables-for-windows-mac-and-linux/). - * Variable Name: `KDB_LICENSE_B64` - * Variable Value: `` + === "Using text" -If looking to make use of a `k4.lic` you can do so by setting the base64 encoded content of your file as the environment variable `KDB_K4LICENSE_B64`. + 1. For personal usage, navigate to the [personal license](https://kx.com/kdb-insights-personal-edition-license-download/) and complete the form. For commercial usage, contact your KX sales representative or sales@kx.com or apply through https://kx.com/book-demo. -## Supported Environments + 2. On receipt of an email from KX, copy the `#!bash base64` encoded contents of your license provided in plain-text within the email. -KX only officially supports versions of PyKX built by KX, i.e. versions of PyKX installed from wheel files. Support for user-built installations of PyKX (e.g. built from the source distribution) is only provided on a best-effort basis. Currently, PyKX provides wheels for the following environments: + 3. On your computer, set an environment variable `#!bash KDB_LICENSE_B64` when using a personal license or `KDB_K4LICENSE_B64` for a commercial license, pointing with the value copied in step 2. (Learn how to set environment variables from [here](https://chlee.co/how-to-setup-environment-variables-for-windows-mac-and-linux/)). + * **Variable Name**: `KDB_LICENSE_B64` / `KDB_K4LICENSE_B64` + * **Variable Value**: `` -- Linux (`manylinux_2_17_x86_64`, `linux-arm64`) with CPython 3.8-3.12 -- macOS (`macosx_10_10_x86_64`, `macosx_10_10_arm`) with CPython 3.8-3.12 -- Windows (`win_amd64`) with CPython 3.8-3.12 +To validate if you successfully installed your license with environment variables, start Python and import PyKX as follows: -## Dependencies +```bash +$ python +>>> import pykx as kx +>>> kx.q.til(5) +pykx.LongVector(pykx.q('0 1 2 3 4')) +``` -### Python Dependencies +As you approach the expiry date for your license you can have PyKX automatically update your license by updating the environment variable `KDB_LICENSE_B64` or `KDB_K4LICENSE_B64` with your new license information. Once PyKX is initialised with your expired license it will attempt to overwrite your license with the newly supplied value. This is outlined as follows: -#### Required Python dependencies +```python +$python +>>> import pykx as kx +Initialisation failed with error: exp +Your license has been updated using the following information: + Environment variable: 'KDB_K4LICENSE_B64' + License write location: /user/path/to/license/k4.lic +``` -PyKX depends on the following third-party Python packages: +## 3. Verify PyKX installation -- `numpy~=1.20, <2.0; python_version=='3.7'` -- `numpy~=1.22, <2.0; python_version<'3.11', python_version>'3.7'` -- `numpy~=1.23, <2.0; python_version=='3.11'` -- `numpy~=1.26, <2.0; python_version=='3.12'` -- `pandas>=1.2, < 2.2.0` -- `pytz>=2022.1` -- `toml~=0.10.2` +To verify if you successfully installed PyKX on your system, run: -They are installed automatically by `pip` when PyKX is installed. +```bash +python -c"import pykx;print(pykx.__version__)" +``` -The following provides a breakdown of how these libraries are used within PyKX +This command should display the installed version of PyKX. -- [Numpy](https://pypi.org/project/numpy) is used by PyKX when converting data from PyKX objects to numpy equivalent array/recarray style objects, additionally low level integration allowing direct calls to numpy functions such as `numpy.max` with PyKX objects relies on the numpy Python API. -- [Pandas](https://pypi.org/project/pandas) is used by PyKX when converting PyKX data to Pandas Series/DataFrame equivalent objects, additionally when converting data to PyArrow data formats as supported by the optional dependencies below Pandas is used as an intermendiary data format. -- [pytz](https://pypi.org/project/pytz/) is used by PyKX when converting data with timezone information to PyKX objects in order to ensure that the timezone offsets are accurately applied. -- [toml](https://pypi.org/project/toml/) is used by PyKX for configuration parsing, in particular when users make use of `.pykx-config` files for configuration management as outlined [here](../user-guide/configuration.md). +## Dependencies +??? Info "Expand for Required and Optional PyKX dependencies" -#### Optional Python Dependencies + === "Required" -- `pyarrow>=3.0.0`, which can be included by installing the `pyarrow` extra, e.g. `pip install pykx[pyarrow]`. -- `find-libpython~=0.2`, which can be included by installing the `debug` extra, e.g. `pip install pykx[debug]`. -- `ast2json~=0.3`, which is required for KX Dashboards Direct integration and can be installed with the `dashboards` extra, e.g. `pip install pykx[dashboards]` -- `dill>=0.2`, which is required for the Beta feature `Remote Functions` can be installed via pip with the `beta` extra, e.g. `pip install pykx[beta]` + PyKX depends on the following third-party Python packages: -!!! Warning + - `numpy~=1.20, <2.0; python_version=='3.7'` + - `numpy~=1.22, <2.0; python_version<'3.11', python_version>'3.7'` + - `numpy~=1.23, <2.0; python_version=='3.11'` + - `numpy~=1.26, <2.0; python_version=='3.12'` + - `pandas>=1.2, < 2.2.0` + - `pytz>=2022.1` + - `toml~=0.10.2` - Trying to use the `pa` conversion methods of `pykx.K` objects or the `pykx.toq.from_arrow` method when PyArrow is not installed (or could not be imported without error) will raise a `pykx.PyArrowUnavailable` exception. `pyarrow` is supported Python 3.8-3.10 but remains in Beta for Python 3.11-3.12. + **Note**: All are installed automatically by `#!bash pip` when you install PyKX. -The following provides a breakdown of how these libraries are used within PyKX + Here's a breakdown of how PyKX uses these libraries: -- [PyArrow](https://pypi.org/project/pyarrow) is used by PyKX for the conversion of PyKX object to and from their PyArrow equivalent table/array objects. -- [find-libpython](https://pypi.org/project/find-libpython) can be used by developers using PyKX to source the `libpython.{so|dll|dylib}` file required by [PyKX under q](../pykx-under-q/intro.md). + - [NumPy](https://pypi.org/project/numpy): converts data from PyKX objects to NumPy equivalent Array/Recarray style objects; direct calls to NumPy functions such as `numpy.max` with PyKX objects relies on the NumPy Python API. + - [Pandas](https://pypi.org/project/pandas): converts PyKX data to Pandas Series/DataFrame equivalent objects or to PyArrow data formats. Pandas is used as an intermendiary data format. + - [pytz](https://pypi.org/project/pytz/): converts data with timezone information to PyKX objects to ensure that the offsets are accurately applied. + - [toml](https://pypi.org/project/toml/): for configuration parsing and management, with `.pykx-config` as outlined [here](../user-guide/configuration.md). -### Optional Non-Python Dependencies -- `libssl` for TLS on [IPC connections](../api/ipc.md). -- `libpthread` on Linux/MacOS when using the `PYKX_THREADING` environment variable. + === "Optional" -### Windows Dependencies + **Optional Python dependencies:** -To run q or PyKX on Windows, `msvcr100.dll` must be installed. It is included in the [Microsoft Visual C++ 2010 Redistributable](https://www.microsoft.com/en-ca/download/details.aspx?id=26999). + - **`pyarrow >=3.0.0`**: install `pyarrow` extra, for example `pip install pykx[pyarrow]`. + - **`find-libpython ~=0.2`**: install `debug` extra, for example `pip install pykx[debug]`. + - **`ast2json ~=0.3`**: install with `dashboards` extra, for example `pip install pykx[dashboards]` + - **`dill >=0.2`**: install via pip, with`beta` extra, for example `pip install pykx[beta]` -Alternatively installation of all required Windows dependencies can be completed through execution of the `w64_install.ps1` supplied at the root of the PyKX github [here](https://github.com/KxSystems/pykx) as follows using PowerShell: + Here's a breakdown of how PyKX uses these libraries: -```PowerShell -git clone https://github.com/kxsystems/pykx -cd pykx -.\w64_install.ps1 -``` + - [PyArrow](https://pypi.org/project/pyarrow): converts PyKX objects to and from their PyArrow equivalent table/array objects. + - [find-libpython](https://pypi.org/project/find-libpython): provides the `libpython.{so|dll|dylib}` file required by [PyKX under q](../pykx-under-q/intro.md). + - [ast2json](https://pypi.org/project/ast2json/): required for KX Dashboards Direct integration. + - [dill](https://pypi.org/project/dill/): required for the Beta feature `Remote Functions`. + + **Optional non-Python dependencies:** + + - `libssl` for TLS on [IPC connections](../api/ipc.md). + - `libpthread` on Linux/MacOS when using the `PYKX_THREADING` environment variable. + +!!! Note "Troubleshooting and Support" + + If you encounter any issues during the installation process, refer to the following sources for assistance: + + - Visit our [troubleshooting](../troubleshooting.md) guide. + - Ask a question on the KX community at [learninghub.kx.com](https://learninghub.kx.com/forums/forum/pykx/). + - Use Stack Overflow and tag [`pykx`](https://stackoverflow.com/questions/tagged/pykx) or [`kdb`](https://stackoverflow.com/questions/tagged/kdb) depending on the subject. + - Go to [support](../support.md). ## Next steps +That's it! You can now start using PyKX in your Python projects: + - [Quickstart guide](quickstart.md) - [User guide introduction](../user-guide/index.md) diff --git a/docs/getting-started/q_magic_command.ipynb b/docs/getting-started/q_magic_command.ipynb index b0ec902..7a0c430 100644 --- a/docs/getting-started/q_magic_command.ipynb +++ b/docs/getting-started/q_magic_command.ipynb @@ -4,12 +4,15 @@ "cell_type": "code", "execution_count": null, "metadata": { - "tags": ["hide_code"] + "tags": [ + "hide_code" + ] }, "outputs": [], "source": [ - "import os\n", - "os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation.\n" + "import os\n", + "os.environ['IGNORE_QHOME'] = '1' # Ignore symlinking PyKX q libraries to QHOME\n", + "os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation." ] }, { @@ -44,10 +47,15 @@ "source": [ "import subprocess\n", "import time\n", - "proc = subprocess.Popen(\n", - " ('q', '-p', '5001')\n", - ")\n", - "time.sleep(5)" + "\n", + "try:\n", + " with kx.PyKXReimport():\n", + " proc = subprocess.Popen(\n", + " ('q', '-p', '5000')\n", + " )\n", + " time.sleep(2)\n", + "except:\n", + " raise kx.QError('Unable to create q process on port 5000')" ] }, { @@ -71,24 +79,24 @@ ] }, { - "cell_type": "markdown", - "id": "89ec26e4", - "metadata": {}, - "source": [ - "#### Execution options\n", - "\n", - "Execution options can also be included after `%%q`.\n", - "\n", - "Here is the list of currently supported execution options.\n", - "\n", - "```\n", - "--debug: prints the q backtrace before raising a QError\n", - " if the cell errors\n", - "--display: calls display rather than the default print\n", - " on returned objects\n", - "```\n" - ] - }, + "cell_type": "markdown", + "id": "89ec26e4", + "metadata": {}, + "source": [ + "#### Execution options\n", + "\n", + "Execution options can also be included after `%%q`.\n", + "\n", + "Here is the list of currently supported execution options.\n", + "\n", + "```\n", + "--debug: prints the q backtrace before raising a QError\n", + " if the cell errors\n", + "--display: calls display rather than the default print\n", + " on returned objects\n", + "```\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -149,7 +157,7 @@ "metadata": {}, "outputs": [], "source": [ - "%%q --host localhost --port 5001 --user user --pass password --noctx\n", + "%%q --host localhost --port 5000 --user user --pass password --noctx\n", "til 10" ] }, @@ -168,7 +176,7 @@ "metadata": {}, "outputs": [], "source": [ - "%%q --port 5001\n", + "%%q --port 5000\n", "tab:([]a:1000?1000; b:1000?500.0; c:1000?`AAPL`MSFT`GOOG);" ] }, @@ -189,7 +197,7 @@ }, "outputs": [], "source": [ - "%%q --port 5001\n", + "%%q --port 5000\n", "afunc: {[x; y]\n", " x + y \n", " };\n", @@ -217,7 +225,7 @@ }, "outputs": [], "source": [ - "%%q --port 5001\n", + "%%q --port 5000\n", "\\l s.k_\n", "s) select * from tab where a>500 and b<250.0 limit 5" ] @@ -240,7 +248,7 @@ "metadata": {}, "outputs": [], "source": [ - "%%q --port 5001\n", + "%%q --port 5000\n", "\\d .example\n", "f: {[x] til x};" ] @@ -252,7 +260,7 @@ "metadata": {}, "outputs": [], "source": [ - "%%q --port 5001\n", + "%%q --port 5000\n", "\\d\n", ".example.f[10]" ] @@ -285,7 +293,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index 58c2634..58d4788 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -361,7 +361,39 @@ Objects generated via the PyKX library can be converted where reasonable to `Pyt 3 0.452041 4 4 0.019615 0 ``` - + + If using `pandas>=2.0` it is possible to also use the `as_arrow` keyword argument to convert to + pandas types using pyarrow as the backend instead of the default numpy backed pandas objects. + + ```python + >>> qvec = kx.toq(np.random.randint(5, size=10)) + >>> qvec.pd(as_arrow=True) + 0 1 + 1 2 + 2 3 + 3 4 + 4 2 + 5 3 + 6 0 + 7 0 + 8 2 + 9 0 + dtype: int64[pyarrow] + >>> df = pd.DataFrame(data={'x': [random() for _ in range(5)], 'x1': [randint(0, 4) for _ in range(5)]}) + >>> qtab = kx.toq(df) + >>> qtab.pd(as_arrow=True) + x x1 + 0 0.541059 3 + 1 0.886690 1 + 2 0.674300 4 + 3 0.532791 3 + 4 0.523147 4 + >>> qtab.pd(as_arrow=True).dtypes + x double[pyarrow] + x1 int64[pyarrow] + dtype: object + ``` + * Convert PyKX objects to PyArrow ```python diff --git a/docs/release-notes/changelog.md b/docs/release-notes/changelog.md index a5b8343..e8872cf 100644 --- a/docs/release-notes/changelog.md +++ b/docs/release-notes/changelog.md @@ -8,6 +8,559 @@ Currently PyKX is not compatible with Pandas 2.2.0 or above as it introduced breaking changes which cause data to be cast to the incorrect type. +## PyKX 2.5.0 + +#### Release Date + +2024-05-15 + +### Additions + +- Addition of a method for `pykx.Table` objects to apply `xbar` calculations on specified columns names + + ```python + >>> import pykx as kx + >>> N = 5 + >>> kx.random.seed(42) + >>> tab = kx.Table(data = { + ... 'x': kx.random.random(N, 100.0), + ... 'y': kx.random.random(N, 10.0)}) + >>> tab + pykx.Table(pykx.q(' + x y + ----------------- + 77.42128 8.200469 + 70.49724 9.857311 + 52.12126 4.629496 + 99.96985 8.518719 + 1.196618 9.572477 + ')) + >>> tab.xbar('x', 10) + pykx.Table(pykx.q(' + x y + ----------- + 70 8.200469 + 70 9.857311 + 50 4.629496 + 90 8.518719 + 0 9.572477 + ')) + ``` + +- Addition of the method `window_join` to `pykx.Table` objects allowing Window joins to be applied to specified tables + + ```python + >>> trades = kx.Table(data={ + ... 'sym': ['ibm', 'ibm', 'ibm'], + ... 'time': kx.q('10:01:01 10:01:04 10:01:08'), + ... 'price': [100, 101, 105]}) + >>> quotes = kx.Table(data={ + ... 'sym': 'ibm', + ... 'time': kx.q('10:01:01+til 9'), + ... 'ask': [101, 103, 103, 104, 104, 107, 108, 107, 108], + ... 'bid': [98, 99, 102, 103, 103, 104, 106, 106, 107, 108]}) + >>> windows = kx.q('{-2 1+\:x}', trades['time']) + >>> trades.window_join(quotes, + ... windows, + ... ['sym', 'time'], + ... {'ask_minus_bid': [lambda x, y: x - y, 'ask', 'bid'], + ... 'ask_max': [lambda x: max(x), 'ask']}) + pykx.Table(pykx.q(' + sym time price ask_minus_bid ask_max + ---------------------------------------- + ibm 10:01:01 100 3 4 103 + ibm 10:01:04 101 4 1 1 1 104 + ibm 10:01:08 105 3 2 1 1 108 + ')) + ``` + +- On failure to initialize PyKX with an expiry error PyKX can now install an updated license using the environment variables `KDB_LICENSE_B64` or `KDB_K4LICENSE_B64` for `kc.lic` and `k4.lic` licenses respectively. This allows users to pre-emptively set an environment variable to be used for upgrade prior to expiry. + + === "Successful update of License" + + ```python + >>> import pykx as kx + Initialisation failed with error: exp + Your license has been updated using the following information: + Environment variable: KDB_K4LICENSE_B64 + License write location: /user/path/to/license/k4.lic + >>> kx.q.til(5) + pykx.LongVector(pykx.q('0 1 2 3 4')) + ``` + + === "Error where environment variable matches license content" + + ```python + >>> import pykx as kx + We have been unable to update your license for PyKX using the following information: + Environment variable: KDB_K4LICENSE_B64 + License location: /user/path/to/license/k4.lic + Reason: License content matches supplied Environment variable + + Your PyKX license has now expired. + + Captured output from initialization attempt: + '2024.04.26T12:04:49.514 licence error: exp + + License location used: + /user/path/to/license/k4.lic + + Would you like to renew your license? [Y/n]: + ``` + +- Intialization workflow for PyKX using form based install process now allows users to install Commercial "k4.lic" licenses using this mechanism. The updated workflow provides the following outputs + + === "License initialization" + + ```python + >>> import pykx as kx + Thank you for installing PyKX! + + We have been unable to locate your license for PyKX. Running PyKX in unlicensed mode has reduced functionality. + Would you like to continue with license installation? [Y/n]: Y + + Is the intended use of this software for: + [1] Personal use (Default) + [2] Commercial use + Enter your choice here [1/2]: 2 + + To apply for your PyKX license, contact your KX sales representative or sales@kx.com. + Alternately apply through https://kx.com/book-demo. + Would you like to open this page? [Y/n]: n + + Select the method you wish to use to activate your license: + [1] Download the license file provided in your welcome email and input the file path (Default) + [2] Input the activation key (base64 encoded string) provided in your welcome email + [3] Proceed with unlicensed mode + Enter your choice here [1/2/3]: 1 + + Provide the download location of your license (for example, ~/path/to/k4.lic) : ~/path/to/k4.lic + ``` + + === "Unlicensed initialization" + + ```python + Thank you for installing PyKX! + + We have been unable to locate your license for PyKX. Running PyKX in unlicensed mode has reduced functionality. + Would you like to continue with license installation? [Y/n]: n + + PyKX unlicensed mode enabled. To set this as your default behavior please set the following environment variable PYKX_UNLICENSED='true' + + For more information on PyKX modes of operation, please visit https://code.kx.com/pykx/user-guide/advanced/modes.html. + To apply for a PyKX license please visit + + Personal License: https://kx.com/kdb-insights-personal-edition-license-download + Commercial License: Contact your KX sales representative or sales@kx.com or apply on https://kx.com/book-demo + ``` + +- Addition of `Table.replace()` method allowing users to replace all elements in a table of a given value with a different value. + + ```python + >>> tab = kx.q('([] a:2 2 3; b:4 2 6; c:(1b;0b;1b); d:(`a;`b;`c); e:(1;2;`a))') + >>> tab.replace(2, "test") + pykx.Table(pykx.q(' + a b c d e + --------------------- + `test 4 1 a 1 + `test `test 0 b `test + 3 6 1 c `a + ')) + ``` + +- Added `as_arrow` keyword to the `.pd()` method on PyKX Wrapped objects, using `as_arrow=True` will use PyArrow backed data types instead of the default NumPy backed data types. + +### Fixes and Improvements + +- When importing PyKX from a source file path containing a space initialisation would fail with an `nyi` error message, this has now been resolved + + === "Behaviour prior to change" + + ```python + >>> import pykx as kx + Traceback (most recent call last): + File "", line 1, in + File "C:\Program Files\choco\miniconda\lib\site-packages\pykx\__init__.py", line 285, in + from .embedded_q import EmbeddedQ, EmbeddedQFuture, q + .. + pykx.exceptions.QError: nyi + ``` + + === "Behaviour post change" + + ```python + >>> import pykx as kx + >>> kx.q.til(5) + pykx.LongVector(pykx.q('0 1 2 3 4')) + ``` + +- When using `pykx.q.system.load` users can now load files and splayed tables at folder locations containing spaces. +- Updated libq to 4.0 2024.05.07 and 4.1 to 2024.04.29 for all supported OS's. +- `kx.util.debug_environment()` now uses `PyKXReimport` when running the `q` subprocess and captures `stderr` in case of failure. +- When using debug mode, retrieval of unknown context's would incorrectly present a backtrace to a user, for example: + + === "Behaviour prior to change" + + ```python + >>> import os + >>> os.environ['PYKX_QDEBUG'] = 'true' + >>> import pykx as kx + >>> kx.q.read.csv('/usr/local/anaconda3/data/taxi/yellow_tripdata_2019-12.csv') + backtrace: + [2] k){x:. x;$[99h<@x;:`$"_pykx_fn_marker";99h~@x;if[` in!x;if[(::)~x`;:`$"_pykx_ctx_marker"]]]x} + ^ + [1] (.Q.trp) + + [0] {[pykxquery] .Q.trp[value; pykxquery; {2@"backtrace: + ^ + ",.Q.sbt y;'x}]} + + pykx.Table(pykx.q(' + VendorID tpep_pickup_datetime tpep_dropoff_datetime passenge.. + -----------------------------------------------------------------------------.. + 1 2019.12.01D00:26:58.000000000 2019.12.01D00:41:45.000000000 1 .. + 1 2019.12.01D00:12:08.000000000 2019.12.01D00:12:14.000000000 1 .. + 1 2019.12.01D00:25:53.000000000 2019.12.01D00:26:04.000000000 1 .. + ``` + + === "Behaviour post change" + + ```python + >>> import os + >>> os.environ['PYKX_QDEBUG'] = 'true' + >>> import pykx as kx + >>> kx.q.read.csv('/usr/local/anaconda3/data/taxi/yellow_tripdata_2019-12.csv') + pykx.Table(pykx.q(' + VendorID tpep_pickup_datetime tpep_dropoff_datetime passenge.. + -----------------------------------------------------------------------------.. + 1 2019.12.01D00:26:58.000000000 2019.12.01D00:41:45.000000000 1 .. + 1 2019.12.01D00:12:08.000000000 2019.12.01D00:12:14.000000000 1 .. + 1 2019.12.01D00:25:53.000000000 2019.12.01D00:26:04.000000000 1 .. + ``` + +- When using debug mode, PyKX could run into issues where attempts to compare single character atoms would result in an error. This has now been fixed. + + === "Behaviour prior to change" + + ```python + >>> import os + >>> os.environ['PYKX_QDEBUG'] = 'true' + >>> import pykx as kx + >>> kx.q('"z"') == b'z' + backtrace: + [2] =zz + ^ + [1] (.Q.trp) + + [0] {[pykxquery] .Q.trp[value; pykxquery; {2@"backtrace: + ^ + ",.Q.sbt y;'x}]} + Traceback (most recent call last): + File "", line 1, in + File "/usr/local/anaconda3/lib/python3.8/site-packages/pykx/wrappers.py", line 361, in __eq__ + return self._compare(other, '=') + File "/usr/local/anaconda3/lib/python3.8/site-packages/pykx/wrappers.py", line 338, in _compare + r = q(op_str, self, other) + File "/usr/local/anaconda3/lib/python3.8/site-packages/pykx/embedded_q.py", line 233, in __call__ + return factory(result, False) + File "pykx/_wrappers.pyx", line 493, in pykx._wrappers._factory + File "pykx/_wrappers.pyx", line 486, in pykx._wrappers.factory + pykx.exceptions.QError: = + ``` + + === "Behaviour post change" + + ```python + >>> import os + >>> os.environ['PYKX_QDEBUG'] = 'true' + >>> import pykx as kx + >>> kx.q('"z"') == b'z' + pykx.BooleanAtom(pykx.q('1b')) + ``` +- Update to system functions `tables` and `functions` to allow listing of tables and functions within dictionaries. Previously attempts to list entities within dictionaries would attempt to retrieve items in a namespace. The below example shows this behaviour for tables. + + === "Behaviour prior to change" + + ```python + >>> import pykx as kx + >>> kx.q('.test.table:([]100?1f;100?0b)') + >>> kx.q('test.tab:([]10?1f;10?5)') + >>> kx.q.system.tables('test') + pykx.SymbolVector(pykx.q(',`table')) + >>> kx.q.system.tables('.test') + pykx.SymbolVector(pykx.q(',`table')) + ``` + + === "Behaviour post change" + + ```python + >>> import pykx as kx + >>> kx.q('.test.table:([]100?1f;100?0b)') + >>> kx.q('test.tab:([]10?1f;10?5)') + >>> kx.q.system.tables('test') + pykx.SymbolVector(pykx.q(',`tab')) + >>> kx.q.system.tables('.test') + pykx.SymbolVector(pykx.q(',`table')) + ``` + +- Resolved issue in `PyKXReimport` which caused it to set empty environment variables to `None` rather than leaving them empty. +- The `_PyKX_base_types` attribute assigned to dataframes during `.pd()` conversion included `'>` in the contents. This has been removed: + + === "Behaviour prior to change" + + ```python + >>> kx.q('([] a:1 2)').pd().attrs['_PyKX_base_types'] + {'a': "LongVector'>"} + ``` + + === "Behaviour post change" + + ```python + >>> kx.q('([] a:1 2)').pd().attrs['_PyKX_base_types'] + {'a': "LongVector"} + ``` + +- IPC queries can now pass PyKX Functions like objects as the query parameter. + + === "Behaviour prior to change" + + ```python + >>> import pykx as kx + >>> conn = kx.SyncQConnection(port = 5050) + >>> conn(kx.q.sum, [1, 2]) + .. + ValueError: Cannot send Python function over IPC + >>> conn(kx.q('{x+y}'), 1, 2) + .. + ValueError: Cannot send Python function over IPC + >>> conn(kx.q.floor, 5.2) + .. + ValueError: Cannot send Python function over IPC + ``` + + === "Behaviour post change" + + ```python + >>> import pykx as kx + >>> conn = kx.SyncQConnection(port = 5050) + >>> conn(kx.q.sum, [1, 2]) + pykx.LongAtom(pykx.q('3')) + >>> conn(kx.q('{x+y}'), 1, 2) + pykx.LongAtom(pykx.q('3')) + >>> conn(kx.q.floor, 5.2) + pykx.LongAtom(pykx.q('5')) + ``` + +- When failing to initialise PyKX with an expired or invalid license PyKX will now point a user to the license location: + + === "Behaviour prior to change" + + ```python + Your PyKX license has now expired. + + Captured output from initialization attempt: + '2023.10.18T13:27:59.719 licence error: exp + + Would you like to renew your license? [Y/n]: + ``` + + === "Behaviour post change" + + ```python + Your PyKX license has now expired. + + Captured output from initialization attempt: + '2023.10.18T13:27:59.719 licence error: exp + + License location used: + /usr/local/anaconda3/pykx/kc.lic + + Would you like to renew your license? [Y/n]: + ``` +- Disabled raw conversions for `kx.List` types as the resulting converted object would be unusable, for example: + + === "Behaviour prior to change" + + ```python + >>> kx.q('(1j; 2f; 3i; 4e; 5h)').np(raw=True) + array([418404288, 1, 418403936, 1, 418404000], dtype=np.uintp) + ``` + + === "Behaviour post change" + + ```python + >>> kx.q('(1j; 2f; 3i; 4e; 5h)').np(raw=True) + array([1, 2.0, 3, 4.0, 5], dtype=object) + ``` + + - `handle_nulls` now operates on all of `datetime64[ns|us|ms|s]` and ensures that the contents of the original dataframe are not modified: + + === "Behaviour prior to change" + + ```python + >>> ns = np.array(['', '2020-09-08T07:06:05.123456789'], dtype='datetime64[ns]') + >>> us = np.array(['', '2020-09-08T07:06:05.123456789'], dtype='datetime64[us]') + >>> ms = np.array(['', '2020-09-08T07:06:05.123456789'], dtype='datetime64[ms]') + >>> s = np.array(['', '2020-09-08T07:06:05.123456789'], dtype='datetime64[s]') + >>> df = pd.DataFrame(data= {'ns':ns, 'us':us, 'ms':ms,'s':s}) + + >>> df + ns us ms s + 0 NaT NaT NaT NaT + 1 2020-09-08 07:06:05.123456789 2020-09-08 07:06:05.123456 2020-09-08 07:06:05.123 2020-09-08 07:06:05 + >>> kx.toq(df, handle_nulls=True) + :1: RuntimeWarning: WARN: Type information of column: s is not known falling back to DayVector type + pykx.Table(pykx.q(' + ns us ms s + ---------------------------------------------------------------------------------------------------- + 1970.01.01D00:00:00.000000000 1970.01.01D00:00:00.000000000 + 2020.09.08D07:06:05.123456789 2020.09.08D07:06:05.123456000 2020.09.08D07:06:05.123000000 2020.09.08 + ')) + >>> df + ns us ms s + 0 NaT NaT NaT NaT + 1 1990-09-09 07:06:05.123456789 2020-09-08 07:06:05.123456 2020-09-08 07:06:05.123 2020-09-08 07:06:05 + ``` + + === "Behaviour post change" + + ```python + >>> ns = np.array(['', '2020-09-08T07:06:05.123456789'], dtype='datetime64[ns]') + >>> us = np.array(['', '2020-09-08T07:06:05.123456789'], dtype='datetime64[us]') + >>> ms = np.array(['', '2020-09-08T07:06:05.123456789'], dtype='datetime64[ms]') + >>> s = np.array(['', '2020-09-08T07:06:05.123456789'], dtype='datetime64[s]') + >>> df = pd.DataFrame(data= {'ns':ns, 'us':us, 'ms':ms,'s':s}) + + >>> df + ns us ms s + 0 NaT NaT NaT NaT + 1 2020-09-08 07:06:05.123456789 2020-09-08 07:06:05.123456 2020-09-08 07:06:05.123 2020-09-08 07:06:05 + >>> kx.toq(df, handle_nulls=True) + pykx.Table(pykx.q(' + ns us ms s + ----------------------------------------------------------------------------------------------------------------------- + + 2020.09.08D07:06:05.123456789 2020.09.08D07:06:05.123456000 2020.09.08D07:06:05.123000000 2020.09.08D07:06:05.000000000 + ')) + >>> df + ns us ms s + 0 NaT NaT NaT NaT + 1 2020-09-08 07:06:05.123456789 2020-09-08 07:06:05.123456 2020-09-08 07:06:05.123 2020-09-08 07:06:05 + ``` + + - Fix for error when calling `.pd(raw=True)` on `EnumVector`: + + === "Behaviour prior to change" + + ```python + >>> kx.q('`s?`a`b`c').pd(raw=True) + Traceback (most recent call last): + File "", line 1, in + File "/home/user/.pyenv/versions/3.11.5/lib/python3.11/site-packages/pykx/wrappers.py", line 2601, in pd + return super(self).pd(raw=raw, has_nulls=has_nulls) + ^^^^^^^^^^^ + TypeError: super() argument 1 must be a type, not EnumVector + ``` + + === "Behaviour post change" + + ```python + >>> import pykx as kx + >>> kx.q('`s?`a`b`c').pd(raw=True) + 0 0 + 1 1 + 2 2 + dtype: int64 + ``` + +### Upgrade considerations + + - Since 2.1.0 when using Pandas >= 2.0 dataframe columns of type `datetime64[s]` converted to `DateVector` under `toq`. Now correctly converts to `TimestampVector`. See [conversion condsideratons](../user-guide/fundamentals/conversion_considerations.md#temporal-types) for further details. + + === "Behaviour prior to change" + + ```python + >>> kx.toq(pd.DataFrame(data= {'a':np.array(['2020-09-08T07:06:05'], dtype='datetime64[s]')})) + :1: RuntimeWarning: WARN: Type information of column: a is not known falling back to DayVector type + pykx.Table(pykx.q(' + a + ---------- + 2020.09.08 + ')) + ``` + + === "Behaviour post change" + + ```python + >>> kx.toq(pd.DataFrame(data= {'a':np.array(['2020-09-08T07:06:05'], dtype='datetime64[s]')})) + pykx.Table(pykx.q(' + a + ----------------------------- + 2020.09.08D07:06:05.000000000 + ')) + #Licensed users can pass `ktype` specifying column types if they wish to override the default behaviour + >>> kx.toq(pd.DataFrame(data= {'a':np.array(['2020-09-08T07:06:05'], dtype='datetime64[s]')}), ktype={'a':kx.DateVector}) + pykx.Table(pykx.q(' + a + ---------- + 2020.09.08 + ')) + ``` + + - Configuration option `PYKX_DISABLE_PANDAS_WARNING` has been removed. + - Deprecated `.pd(raw_guids)` keyword. + +### Beta Features + +- Addition of [streamlit](https://streamlit.io/) connection class `pykx.streamlit.Connection` to allow querying of q processes when building a streamlit application. For an example of this functionality and an introduction to it's usage see [here](../beta-features/streamlit.md). + +## PyKX 2.4.2 + +#### Release Date + +2024-04-03 + +### Fixes and Improvements + +- Updated `libq` to 2024.03.28 for all supported OS's. + +## PyKX 2.4.1 + +#### Release Date + +2024-03-27 + +### Fixes and Improvements + +- Previously calls to `qsql.select`, `qsql.exec`, `qsql.update` and `qsql.delete` would require multiple calls to parse the content of `where`, `colums` and `by` clauses. These have now been removed with all parsing now completed within the functional query when called via IPC or local to the Python process. +- Linux x86 and Mac x86/ARM unlicensed mode `e.o` library updated to 2023.11.22. Fixes subnormals issue: + + === "Behavior prior to change" + + ```python + >>> import os + >>> os.environ['PYKX_UNLICENSED']='true' + >>> import pykx as kx + >>> import numpy as np + >>> np.finfo(np.float64).smallest_subnormal + 0. + /usr/local/anaconda3/lib/python3.8/site-packages/numpy/core/getlimits.py:518: UserWarning: The value of the smallest subnormal for type is zero. + setattr(self, word, getattr(machar, word).flat[0]) + /usr/local/anaconda3/lib/python3.8/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for type is zero. + return self._float_to_str(self.smallest_subnormal) + 0.0 + ``` + + === "Behavior post change" + + ```python + >>> import os + >>> os.environ['PYKX_UNLICENSED']='true' + >>> import pykx as kx + >>> import numpy as np + >>> np.finfo(np.float64).smallest_subnormal + 0. + 5e-324 + ``` + ## PyKX 2.4.0 #### Release Date @@ -174,7 +727,6 @@ raise LicenseException("run q code via 'pykx.q'") pykx.exceptions.LicenseException: A valid q license must be in a known location (e.g. `$QLIC`) to run q code via 'pykx.q'. ``` - === "Behavior post change" ```python @@ -222,6 +774,8 @@ === "Behavior post change" ```python + >>> tab = kx.Table(data = {'sym': ['a', 'b', 'c'], 'num': [1, 2, 3]}) + >>> tab.astype({'sym': kx.SymbolAtom}) pykx.Table(pykx.q(' sym num ------- @@ -278,7 +832,6 @@ >>> tab1.merge(tab2_keyed, how='left', q_join=True) ``` - ### Beta Features - Addition of `Compress` and `Encrypt` classes to allow users to set global configuration and for usage within Database partition persistence. @@ -940,29 +1493,29 @@ - Addition of negative slicing to `list` , `vector` and `table` objects - ```python - >>> import pykx as kx - >>> qlist = kx.q('("a";2;3.3;`four)') - >>> qlist[-3:] - pykx.List(pykx.q(' - 2 - 3.3 - `four - ')) + ```python + >>> import pykx as kx + >>> qlist = kx.q('("a";2;3.3;`four)') + >>> qlist[-3:] + pykx.List(pykx.q(' + 2 + 3.3 + `four + ')) - >>> vector = kx.q('til 5') - >>> vector[:-1] - pykx.LongVector(pykx.q('0 1 2 3')) + >>> vector = kx.q('til 5') + >>> vector[:-1] + pykx.LongVector(pykx.q('0 1 2 3')) - >>> table = kx.q('([] a:1 2 3; b:4 5 6; c:7 8 9)') - >>> table[-2:] - pykx.Table(pykx.q(' - a b c - ----- - 2 5 8 - 3 6 9 - ')) - ``` + >>> table = kx.q('([] a:1 2 3; b:4 5 6; c:7 8 9)') + >>> table[-2:] + pykx.Table(pykx.q(' + a b c + ----- + 2 5 8 + 3 6 9 + ')) + ``` ### Fixes and Improvements @@ -1165,8 +1718,8 @@ the following reads a CSV file and specifies the types of the three columns name !!! Warning "Pandas 2.0 has deprecated the `datetime64[D/M]` types." Due to this change it is not always possible to determine if the resulting q Table should - use a `MonthVector` or a `DayVector`. In the scenario that it is not possible to determine - the expected type a warning will be raised and the `DayVector` type will be used as a + use a `MonthVector` or a `DateVector`. In the scenario that it is not possible to determine + the expected type a warning will be raised and the `DateVector` type will be used as a default. ### Fixes and Improvements diff --git a/docs/release-notes/underq-changelog.md b/docs/release-notes/underq-changelog.md index f248ca3..0659ef4 100644 --- a/docs/release-notes/underq-changelog.md +++ b/docs/release-notes/underq-changelog.md @@ -6,6 +6,36 @@ This changelog provides updates from PyKX 2.0.0 and above, for information relat The changelog presented here outlines changes to PyKX when operating within a q environment specifically, if you require changelogs associated with PyKX operating within a Python environment see [here](./changelog.md). +## PyKX 2.5.0 + +#### Release Date + +TBD + +### Fixes and Improvements + +- When loading PyKX under from a source file path containing a space initialisation would fail with an `nyi` error message, this has now been resolved. + +## PyKX 2.4.1 + +#### Release Date + +2024-03-27 + +### Fixes and Improvements + +- When loading PyKX under q users who had previously loaded [embedPy](https://github.com/KxSystems/embedPy) into their process would cause a segfault of unspecified origin. With this release we have added a warning prior to loading of PyKX which specifies that if a value of `.p.e` has been specified which does not match that expected of PyKX a user should consider installing PyKX under q fully: + + ```q + q)\l p.q // Load embedPy + q)\l pykx.q + Warning: Detected invalid '.p.e' function definition expected for PyKX. + Have you loaded another Python integration first? + + Please consider full installation of PyKX under q following instructions at: + https://code.kx.com/pykx/pykx-under-q/intro.html#installation + ``` + ## PyKX 2.3.1 #### Release Date diff --git a/docs/roadmap.md b/docs/roadmap.md index 45e84b3..660fe3b 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -1,29 +1,53 @@ # PyKX Roadmap -This page outlines areas of development focus for the PyKX team to provide you with an understanding of the development direction of the library. This is not an exhaustive list of all features/areas of focus but should give you a view on what to expect from the team over the coming months. Additionally this list is subject to change based on the complexity of the features and any customer feature requests raised following the publishing of this list. +This page outlines areas of development focus for the PyKX team to provide you with an understanding of the development direction of the library. This is not an exhaustive list of all features/areas of focus but should give you a view on what to expect from the team over the coming months. Additionally this list is subject to change, particularly for any example code provided based on the complexity of the features and any customer feature requests raised following the publishing of this list. If you need a feature that's not included in this list please let us know by raising a [Github issue](https://github.com/KxSystems/pykx/issues)! -## Nov 2023 - Jan 2024 - -- Support Python 3.12 -- Tighter integration with [Streamlit](https://streamlit.io/) allowing streamlit applications to interact with kdb+ servers and on-disk databases -- User defined Python functions to be supported when operating with local qsql.select functionality -- [JupyterQ](https://github.com/KxSystems/jupyterq) and [ML-Toolkit](https://github.com/KxSystems/ml) updates to allow optional PyKX backend replacing embedPy -- Pythonic data sorting for PyKX Tables - -## Feb - Apr 2024 - -- Database management functionality allowing for Pythonic persistence and management of on-disk kdb+ Databases (Beta) -- Improvements to multi-threaded PyKX efficiency, reducing per-call overhead for running PyKX on separate threads +## Upcoming Changes + +- More Pythonic query syntax when querying PyKX Tables. Syntax for this will be similar to the following: + + ```python + >>> import pykx as kx + >>> N = 10000 + >>> table = kx.Table(data = { + ... 'x' : kx.random.random(N, ['a', 'b', 'c]), + ... 'x1': kx.random.random(N, 100.0), + ... 'x2': kx.random.random(N, 100) + ... }) + >>> table.select(where = kx.col('x') == 'a') + >>> table.select(kx.col('x1').max()) + >>> table.select(kx.col('x1').wavg('x2')) + ``` + +- Addition of support for q primatives as methods off PyKX Vector and Table objects. Syntax for this will be similar to the following + + ```python + >>> import pykx as kx + >>> N = 1000 + >>> vec = kx.random.random(N, 100.0) + >>> vec.mavg(3) + >>> vec.abs() + ``` + +- Performance improvements for conversions from Numpy arrays to PyKX Vector objects and vice-versa through enhanced use of C++ over Cython. +- Additions to the Pandas Like API for PyKX. + - `isnull` + - `idxmax` + - `kurt` + - `sem` + +- Addition of functionality for the development of streaming workflows using PyKX. - Configurable initialisation logic in the absence of a license. Thus allowing users who have their own workflows for license access to modify the instructions for their users. -- Addition of `cast` keyword when inserting/upserting data into a table reducing mismatch issues +- Promotion of Beta functionality currently available in PyKX to full production support + - Database Management + - Compression and Encryption + - Multi-threaded execution + - Remote function execution ## Future - Tighter integration between PyKX/q objects and PyArrow arrays/Tables - Expansion of supported datatypes for translation to/from PyKX -- Continued additions of Pandas-like functionality on PyKX Table objects -- Performance improvements through enhanced usage of Cython -- Real-time/Streaming functionality utilities - Data pre-processing and statistics modules for operation on PyKX tables and vector objects diff --git a/docs/stylesheets/pykx.css b/docs/stylesheets/pykx.css index d3b2872..a726c37 100644 --- a/docs/stylesheets/pykx.css +++ b/docs/stylesheets/pykx.css @@ -1,5 +1,5 @@ .md-grid { - max-width: 75rem; + max-width: 100%; } /* Indentation with bars on the left */ diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 7aaf644..3b7eb7d 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -9,7 +9,7 @@ The following section outlines practical information useful when dealing with ge A number of trial and enterprise type licenses exist for q/kdb+. Not all licenses for q/kdb+ however are valid for PyKX. In particular users require access to a license which contains the feature flags `pykx` and `embedq` which provide access to the PyKX functionality. The following locations can be used for the retrieval of evaluation/personal licenses - For non-commercial personal users you can access a 12 month kdb+ license with PyKX enabled [here](https://kx.com/kdb-insights-personal-edition-license-download). -- For commercial evaluation you can download a 30 day PyKX license [here](https://kx.com/kdb-insights-commercial-evaluation-license-download/). +- For commercial evaluation, contact your KX sales representative or sales@kx.com requesting a PyKX trial license. Alternately apply through https://kx.com/book-demo. For non-personal or non-commercial usage please contact sales@kx.com. @@ -20,7 +20,7 @@ Once you have access to your license you can install the license following the w >>> kx.license.install('/path/to/downloaded/kc.lic') ``` -### Initialization failing with a 'embedq' error +### Initialization failing with a 'embedq' error Failure to initialize PyKX while raising an error `embedq` indicates that the license you are attempting to use for PyKX in [licensed modality](user-guide/advanced/modes.md) does not have the sufficient feature flags necessary to run PyKX. To access a license which does allow for running PyKX in this modality please following the instructions [here](#accessing-a-license-valid-for-pykx) to get a new license with appropriate feature flags. @@ -93,7 +93,6 @@ It usually indicates that your license was not correctly written to disk or a li The following section outlines how a user can get access to a verbose set of environment configuration associated with PyKX. This information is helpful when debugging your environment and should be provided if possible with support requests. - ```python >>> import pykx as kx >>> kx.util.debug_environment() # see below for output diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb index 3ad3bd9..ed3e281 100644 --- a/docs/user-guide/advanced/Pandas_API.ipynb +++ b/docs/user-guide/advanced/Pandas_API.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "dfa26ef1", + "id": "d2a3ccf7", "metadata": {}, "source": [ "# Pandas API\n", @@ -22,8 +22,8 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "5b2f27e1", + "execution_count": 2, + "id": "13267c00", "metadata": { "tags": [ "hide_code" @@ -33,13 +33,13 @@ "source": [ "import os\n", "os.environ['IGNORE_QHOME'] = '1' # Ignore symlinking PyKX q libraries to QHOME \n", - "os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation.\n" + "os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation." ] }, { "cell_type": "code", - "execution_count": null, - "id": "356b337c", + "execution_count": 3, + "id": "44c90043", "metadata": {}, "outputs": [], "source": [ @@ -51,7 +51,7 @@ }, { "cell_type": "markdown", - "id": "b5c9b878", + "id": "06e3f624", "metadata": {}, "source": [ "## Constructing Tables" @@ -59,7 +59,7 @@ }, { "cell_type": "markdown", - "id": "15884a6f", + "id": "31561309", "metadata": {}, "source": [ "### Table\n", @@ -88,7 +88,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a3d8e590", + "id": "170587aa", "metadata": {}, "outputs": [], "source": [ @@ -97,7 +97,7 @@ }, { "cell_type": "markdown", - "id": "1967dbd6", + "id": "273de502", "metadata": {}, "source": [ "Create a Table from an array like object." @@ -106,7 +106,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b8c67d04", + "id": "62b9f5c1", "metadata": {}, "outputs": [], "source": [ @@ -115,7 +115,7 @@ }, { "cell_type": "markdown", - "id": "b59c678b", + "id": "51d82353", "metadata": {}, "source": [ "Create a Table from an array like object and provide names for the columns to use." @@ -124,7 +124,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6469f77e", + "id": "e9751924", "metadata": {}, "outputs": [], "source": [ @@ -133,7 +133,7 @@ }, { "cell_type": "markdown", - "id": "a3074cc5", + "id": "36edf1de", "metadata": {}, "source": [ "### Keyed Table\n", @@ -163,7 +163,7 @@ { "cell_type": "code", "execution_count": null, - "id": "03162ab2", + "id": "0ab1d288", "metadata": {}, "outputs": [], "source": [ @@ -172,7 +172,7 @@ }, { "cell_type": "markdown", - "id": "eda04de8", + "id": "1a2f9b56", "metadata": {}, "source": [ "Create a keyed table from a list of rows." @@ -181,7 +181,7 @@ { "cell_type": "code", "execution_count": null, - "id": "de9fcc81", + "id": "8a0b5ce8", "metadata": {}, "outputs": [], "source": [ @@ -190,7 +190,7 @@ }, { "cell_type": "markdown", - "id": "ab5393c3", + "id": "804183ed", "metadata": {}, "source": [ "Create a keyed table from a list of rows and provide names for the resulting columns." @@ -199,7 +199,7 @@ { "cell_type": "code", "execution_count": null, - "id": "576e4254", + "id": "21b018fe", "metadata": {}, "outputs": [], "source": [ @@ -208,7 +208,7 @@ }, { "cell_type": "markdown", - "id": "cca4e246", + "id": "b91e990b", "metadata": {}, "source": [ "Create a keyed table with a specified index column." @@ -217,7 +217,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a29d1521", + "id": "d2631bad", "metadata": {}, "outputs": [], "source": [ @@ -226,7 +226,7 @@ }, { "cell_type": "markdown", - "id": "73bf284f", + "id": "f1f43263", "metadata": {}, "source": [ "## Metadata" @@ -235,18 +235,23 @@ { "cell_type": "code", "execution_count": null, - "id": "4b363f07", + "id": "15b9c003", "metadata": {}, "outputs": [], "source": [ - "kx.q('N: 1000')\n", - "tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')\n", + "N = 1000\n", + "tab = kx.Table(data = {\n", + " 'x': kx.q.til(N),\n", + " 'y': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']),\n", + " 'z': kx.random.random(N, 500.0),\n", + " 'w': kx.random.random(N, 1000),\n", + " 'v': kx.random.random(N, [kx.LongAtom.null, 0, 50, 100, 200, 250])})\n", "tab" ] }, { "cell_type": "markdown", - "id": "40155b78", + "id": "c2122f58", "metadata": {}, "source": [ "### Table.columns\n", @@ -257,7 +262,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e8a0395e", + "id": "6e35b1b4", "metadata": {}, "outputs": [], "source": [ @@ -266,7 +271,7 @@ }, { "cell_type": "markdown", - "id": "13516f56", + "id": "fc006fd7", "metadata": {}, "source": [ "### Table.dtypes\n", @@ -277,7 +282,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5a312477", + "id": "c8f571f6", "metadata": {}, "outputs": [], "source": [ @@ -286,7 +291,7 @@ }, { "cell_type": "markdown", - "id": "10124c07", + "id": "5b4d25bf", "metadata": {}, "source": [ "### Table.empty\n", @@ -297,7 +302,7 @@ { "cell_type": "code", "execution_count": null, - "id": "751fc442", + "id": "b01c0791", "metadata": {}, "outputs": [], "source": [ @@ -306,7 +311,7 @@ }, { "cell_type": "markdown", - "id": "c973fb82", + "id": "550c1126", "metadata": {}, "source": [ "### Table.ndim\n", @@ -317,7 +322,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ee6b55a0", + "id": "88affa6b", "metadata": {}, "outputs": [], "source": [ @@ -326,7 +331,7 @@ }, { "cell_type": "markdown", - "id": "07ac8e54", + "id": "f479bdcc", "metadata": {}, "source": [ "### Table.shape\n", @@ -337,7 +342,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8d6f890c", + "id": "a0609e97", "metadata": {}, "outputs": [], "source": [ @@ -346,7 +351,7 @@ }, { "cell_type": "markdown", - "id": "654129cc", + "id": "42bc2bc3", "metadata": {}, "source": [ "### Table.size\n", @@ -357,7 +362,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0e621250", + "id": "886296f3", "metadata": {}, "outputs": [], "source": [ @@ -366,7 +371,7 @@ }, { "cell_type": "markdown", - "id": "8e210a91", + "id": "1439bde3", "metadata": {}, "source": [ "## Querying and Data Interrogation" @@ -375,19 +380,24 @@ { "cell_type": "code", "execution_count": null, - "id": "77ab64ab", + "id": "776b5725", "metadata": {}, "outputs": [], "source": [ "# The examples in this section will use this example table filled with random data\n", - "kx.q('N: 1000')\n", - "tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')\n", + "N = 1000\n", + "tab = kx.Table(data = {\n", + " 'x': kx.q.til(N),\n", + " 'y': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']),\n", + " 'z': kx.random.random(N, 500.0),\n", + " 'w': kx.random.random(N, 1000),\n", + " 'v': kx.random.random(N, [kx.LongAtom.null, 0, 50, 100, 200, 250])})\n", "tab" ] }, { "cell_type": "markdown", - "id": "9bd3dada", + "id": "d356c82f", "metadata": {}, "source": [ "### Table.all()\n", @@ -416,7 +426,7 @@ { "cell_type": "code", "execution_count": null, - "id": "95aa447d", + "id": "b1c046de", "metadata": {}, "outputs": [], "source": [ @@ -425,7 +435,7 @@ }, { "cell_type": "markdown", - "id": "4ac12eb0", + "id": "e9c11a2e", "metadata": {}, "source": [ "### Table.any()\n", @@ -454,7 +464,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a43aabc4", + "id": "501721e8", "metadata": {}, "outputs": [], "source": [ @@ -463,7 +473,7 @@ }, { "cell_type": "markdown", - "id": "81a8e19f", + "id": "cb69b61a", "metadata": {}, "source": [ "### Table.at[]\n", @@ -481,7 +491,7 @@ }, { "cell_type": "markdown", - "id": "44a37aff", + "id": "8262b005", "metadata": {}, "source": [ "**Examples:**\n", @@ -492,7 +502,7 @@ { "cell_type": "code", "execution_count": null, - "id": "618fe622", + "id": "3664be9c", "metadata": {}, "outputs": [], "source": [ @@ -501,7 +511,7 @@ }, { "cell_type": "markdown", - "id": "23203909", + "id": "043ed9ca", "metadata": {}, "source": [ "Reassign the value of the `z` column in the 997th row to `3.14159`." @@ -510,7 +520,7 @@ { "cell_type": "code", "execution_count": null, - "id": "978d991d", + "id": "3c7c4bc7", "metadata": {}, "outputs": [], "source": [ @@ -520,7 +530,7 @@ }, { "cell_type": "markdown", - "id": "3d62cbbc", + "id": "903c0aac", "metadata": {}, "source": [ "### Table.get()\n", @@ -547,7 +557,7 @@ }, { "cell_type": "markdown", - "id": "00c06637", + "id": "3d094b7b", "metadata": {}, "source": [ "**Examples:**\n", @@ -558,7 +568,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f950cc1e", + "id": "7809ac4a", "metadata": { "scrolled": true }, @@ -569,7 +579,7 @@ }, { "cell_type": "markdown", - "id": "78608b1c", + "id": "2ddd9659", "metadata": {}, "source": [ "Get the `y` and `z` columns from the table." @@ -578,7 +588,7 @@ { "cell_type": "code", "execution_count": null, - "id": "02d4d586", + "id": "78c9f224", "metadata": { "scrolled": true }, @@ -589,7 +599,7 @@ }, { "cell_type": "markdown", - "id": "2a2186aa", + "id": "379219ef", "metadata": {}, "source": [ "Attempt to get the `q` column from the table and receive none as that column does not exist." @@ -598,7 +608,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a88ef7dc", + "id": "010d9d98", "metadata": {}, "outputs": [], "source": [ @@ -607,7 +617,7 @@ }, { "cell_type": "markdown", - "id": "ea3dc01a", + "id": "3ee99633", "metadata": {}, "source": [ "Attempt to get the `q` column from the table and receive the default value `not found` as that column does not exist." @@ -616,7 +626,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2f3abc92", + "id": "ffd3a851", "metadata": {}, "outputs": [], "source": [ @@ -625,7 +635,7 @@ }, { "cell_type": "markdown", - "id": "b2195cfe", + "id": "34016a3f", "metadata": {}, "source": [ "### Table.head()\n", @@ -651,7 +661,7 @@ }, { "cell_type": "markdown", - "id": "18a0ca1e", + "id": "d823513a", "metadata": {}, "source": [ "**Examples:**\n", @@ -662,7 +672,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5120ce1c", + "id": "5618880c", "metadata": {}, "outputs": [], "source": [ @@ -671,7 +681,7 @@ }, { "cell_type": "markdown", - "id": "08f158a8", + "id": "c5a8b2e8", "metadata": {}, "source": [ "Return the first 10 rows of the table." @@ -680,7 +690,7 @@ { "cell_type": "code", "execution_count": null, - "id": "de9c2842", + "id": "90071dcf", "metadata": {}, "outputs": [], "source": [ @@ -857,7 +867,7 @@ }, { "cell_type": "markdown", - "id": "07e31d96", + "id": "a3945130", "metadata": {}, "source": [ "**Examples:**\n", @@ -868,7 +878,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f8108853", + "id": "1f83db52", "metadata": { "scrolled": true }, @@ -879,7 +889,7 @@ }, { "cell_type": "markdown", - "id": "30c429f4", + "id": "72b468a1", "metadata": {}, "source": [ "Get the first 5 rows from a table." @@ -888,7 +898,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2f817967", + "id": "5354ca81", "metadata": {}, "outputs": [], "source": [ @@ -897,7 +907,7 @@ }, { "cell_type": "markdown", - "id": "2eb41e47", + "id": "9295eddc", "metadata": {}, "source": [ "Get all rows of the table where the `y` column is equal to `AAPL`." @@ -906,7 +916,7 @@ { "cell_type": "code", "execution_count": null, - "id": "69e14007", + "id": "6410e870", "metadata": { "scrolled": true }, @@ -917,7 +927,7 @@ }, { "cell_type": "markdown", - "id": "7861f193", + "id": "08792c1d", "metadata": {}, "source": [ "Get all rows of the table where the `y` column is equal to `AAPL`, and only return the `y`, `z` and `w` columns." @@ -926,7 +936,7 @@ { "cell_type": "code", "execution_count": null, - "id": "323cc0f8", + "id": "d61b8396", "metadata": {}, "outputs": [], "source": [ @@ -935,7 +945,7 @@ }, { "cell_type": "markdown", - "id": "9de566f3", + "id": "4525b646", "metadata": {}, "source": [ "Replace all null values in the column `v` with the value `-100`." @@ -944,7 +954,7 @@ { "cell_type": "code", "execution_count": null, - "id": "be66947d", + "id": "b65e7a05", "metadata": {}, "outputs": [], "source": [ @@ -954,7 +964,7 @@ }, { "cell_type": "markdown", - "id": "ed37aa73", + "id": "dc97669c", "metadata": {}, "source": [ "### Table.loc[]\n", @@ -990,7 +1000,7 @@ }, { "cell_type": "markdown", - "id": "c68e21f1", + "id": "f90efe27", "metadata": {}, "source": [ "**Examples:**\n", @@ -1001,7 +1011,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e46092cc", + "id": "20974780", "metadata": { "scrolled": true }, @@ -1012,7 +1022,7 @@ }, { "cell_type": "markdown", - "id": "9e136f10", + "id": "ceccd5a9", "metadata": {}, "source": [ "Get all rows of the table where the value in the `z` column is greater than `250.0`" @@ -1021,7 +1031,7 @@ { "cell_type": "code", "execution_count": null, - "id": "52d2f0fe", + "id": "e99478b5", "metadata": {}, "outputs": [], "source": [ @@ -1030,7 +1040,7 @@ }, { "cell_type": "markdown", - "id": "52c058a6", + "id": "5300666e", "metadata": {}, "source": [ "Replace all null values in the column `v` with the value `-100`." @@ -1039,19 +1049,19 @@ { "cell_type": "code", "execution_count": null, - "id": "960f1933", + "id": "889ddbd3", "metadata": { "scrolled": true }, "outputs": [], "source": [ - "tab.loc[tab['v'] == kx.q('0N'), 'v'] = -100\n", + "tab.loc[tab['v'] == kx.LongAtom.null, 'v'] = -100\n", "tab" ] }, { "cell_type": "markdown", - "id": "9b262eca", + "id": "e52f569f", "metadata": {}, "source": [ "Replace all locations in column `v` where the value is `-100` with a null." @@ -1060,17 +1070,17 @@ { "cell_type": "code", "execution_count": null, - "id": "f4c974c7", + "id": "2df5ddff", "metadata": {}, "outputs": [], "source": [ - "tab[tab['v'] == -100, 'v'] = kx.q('0N')\n", + "tab[tab['v'] == -100, 'v'] = kx.LongAtom.null\n", "tab" ] }, { "cell_type": "markdown", - "id": "ddc94e12", + "id": "ca371dea", "metadata": {}, "source": [ "Usage of the `loc` functionality under the hood additionally allows users to set columns within a table for single or multiple columns. Data passed for this can be q/Python." @@ -1079,26 +1089,26 @@ { "cell_type": "code", "execution_count": null, - "id": "f9d06838", + "id": "2c5b1db2", "metadata": {}, "outputs": [], "source": [ - "tab['new_col'] = kx.q('1000?1f')" + "tab['new_col'] = kx.random.random(1000, 1.0)" ] }, { "cell_type": "code", "execution_count": null, - "id": "1505d9bb", + "id": "87d71574", "metadata": {}, "outputs": [], "source": [ - "tab[['new_col1', 'new_col2']] = [20, kx.q('1000?0Ng')]" + "tab[['new_col1', 'new_col2']] = [20, kx.random.random(1000, kx.GUIDAtom.null)]" ] }, { "cell_type": "markdown", - "id": "05124590", + "id": "53c9631f", "metadata": {}, "source": [ "### Table.sample()\n", @@ -1131,21 +1141,26 @@ { "cell_type": "code", "execution_count": null, - "id": "8b4a10be", + "id": "845e22d6", "metadata": { "scrolled": true }, "outputs": [], "source": [ "# The examples in this section will use this example table filled with random data\n", - "kx.q('N: 1000')\n", - "tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')\n", + "N = 1000\n", + "tab = kx.Table(data = {\n", + " 'x': kx.q.til(N),\n", + " 'y': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']),\n", + " 'z': kx.random.random(N, 500.0),\n", + " 'w': kx.random.random(N, 1000),\n", + " 'v': kx.random.random(N, [kx.LongAtom.null, 0, 50, 100, 200, 250])})\n", "tab.head()" ] }, { "cell_type": "markdown", - "id": "970c8ea4", + "id": "c9d84056", "metadata": {}, "source": [ "**Examples:**\n", @@ -1156,7 +1171,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9dde77b1", + "id": "ebfeeec5", "metadata": {}, "outputs": [], "source": [ @@ -1165,7 +1180,7 @@ }, { "cell_type": "markdown", - "id": "1d14afe9", + "id": "d3150483", "metadata": {}, "source": [ "Sample 10% of the rows." @@ -1174,7 +1189,7 @@ { "cell_type": "code", "execution_count": null, - "id": "32772c46", + "id": "67844a62", "metadata": {}, "outputs": [], "source": [ @@ -1183,7 +1198,7 @@ }, { "cell_type": "markdown", - "id": "82a7a79d", + "id": "dce42092", "metadata": {}, "source": [ "Sample 10% of the rows and allow the same row to be sampled twice." @@ -1192,7 +1207,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4c96839b", + "id": "1a2326fd", "metadata": {}, "outputs": [], "source": [ @@ -1201,7 +1216,7 @@ }, { "cell_type": "markdown", - "id": "82b501a6", + "id": "7d42cde9", "metadata": {}, "source": [ "### Table.select_dtypes()\n", @@ -1238,7 +1253,7 @@ }, { "cell_type": "markdown", - "id": "0570165c", + "id": "bb6fc886", "metadata": {}, "source": [ "**Examples:**\n", @@ -1249,16 +1264,21 @@ { "cell_type": "code", "execution_count": null, - "id": "74ade8d1", + "id": "ca9b5532", "metadata": {}, "outputs": [], "source": [ - "df = kx.q('([] c1:`a`b`c; c2:1 2 3h; c3:1 2 3j; c4:1 2 3i)')" + "df = kx.Table(data = {\n", + " 'c1': kx.SymbolVector(['a', 'b', 'c']),\n", + " 'c2': kx.ShortVector([1, 2, 3]),\n", + " 'c3': kx.LongVector([1, 2, 3]),\n", + " 'c4': kx.IntVector([1, 2, 3])\n", + " })" ] }, { "cell_type": "markdown", - "id": "b889d7c7", + "id": "8eb25b29", "metadata": {}, "source": [ "Exclude columns containing symbols" @@ -1267,7 +1287,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e8a792da", + "id": "de81564b", "metadata": {}, "outputs": [], "source": [ @@ -1276,7 +1296,7 @@ }, { "cell_type": "markdown", - "id": "c87f28c4", + "id": "1e842cc3", "metadata": {}, "source": [ "Include a list of column types" @@ -1285,7 +1305,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ac2af334", + "id": "ba874cb6", "metadata": {}, "outputs": [], "source": [ @@ -1294,7 +1314,7 @@ }, { "cell_type": "markdown", - "id": "ede98735", + "id": "5bb4eaa2", "metadata": {}, "source": [ "### Table.tail()\n", @@ -1320,7 +1340,7 @@ }, { "cell_type": "markdown", - "id": "a7b6bd44", + "id": "2c9de3b3", "metadata": {}, "source": [ "**Examples:**\n", @@ -1331,7 +1351,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d1f5f644", + "id": "5c31fc24", "metadata": {}, "outputs": [], "source": [ @@ -1340,7 +1360,7 @@ }, { "cell_type": "markdown", - "id": "181a4d86", + "id": "5ad81954", "metadata": {}, "source": [ "Return the last 10 rows of the table." @@ -1349,7 +1369,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c8a0bb7b", + "id": "02974f05", "metadata": {}, "outputs": [], "source": [ @@ -1358,7 +1378,7 @@ }, { "cell_type": "markdown", - "id": "32d2194b-fe6e-4789-9437-fa8cec5f9287", + "id": "a2edb648", "metadata": {}, "source": [ "## Sorting" @@ -1366,7 +1386,7 @@ }, { "cell_type": "markdown", - "id": "38d04a7b-603d-4ecb-afb0-c7999b6d23ec", + "id": "ee65b6ab", "metadata": {}, "source": [ "### Table.sort_values()\n", @@ -1394,7 +1414,7 @@ }, { "cell_type": "markdown", - "id": "b71e942a-1247-4931-9a0f-edd2fd97b185", + "id": "6b4c5b68", "metadata": {}, "source": [ "**Examples:**" @@ -1403,17 +1423,20 @@ { "cell_type": "code", "execution_count": null, - "id": "2b8e2204-1e4e-4776-8f6a-22589ff66124", + "id": "e996a181", "metadata": {}, "outputs": [], "source": [ - "tab = kx.Table(data={'column_a': [20, 3, 100],'column_b': [56, 15, 42], 'column_c': [45, 80, 8]})\n", + "tab = kx.Table(data={\n", + " 'column_a': [20, 3, 100],\n", + " 'column_b': [56, 15, 42],\n", + " 'column_c': [45, 80, 8]})\n", "tab" ] }, { "cell_type": "markdown", - "id": "9494343e-34d1-4303-8007-38afe9ee6ead", + "id": "5093808f", "metadata": {}, "source": [ "Sort a Table by the second column" @@ -1422,7 +1445,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fce9c74a-ed0b-4d2f-92f4-2b9b42762d4b", + "id": "08eb698c", "metadata": {}, "outputs": [], "source": [ @@ -1431,7 +1454,7 @@ }, { "cell_type": "markdown", - "id": "6ee86878-634f-4383-bb90-af361b785f59", + "id": "4a48687d", "metadata": {}, "source": [ "Sort a Table by the third column in descending order" @@ -1440,7 +1463,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f8edac0a-f6f0-4a70-ae51-7c8599ee4da9", + "id": "4ba2b42f", "metadata": {}, "outputs": [], "source": [ @@ -1449,7 +1472,7 @@ }, { "cell_type": "markdown", - "id": "2b61d8b5-52a1-4c05-9347-c205ba6934d7", + "id": "29930425", "metadata": {}, "source": [ "### Table.nsmallest()\n", @@ -1480,7 +1503,7 @@ }, { "cell_type": "markdown", - "id": "c2430479-e832-4c6a-8cc0-651dd6af57b4", + "id": "64976edc", "metadata": {}, "source": [ "**Examples:**\n", @@ -1491,17 +1514,20 @@ { "cell_type": "code", "execution_count": null, - "id": "768f4e97-79a4-4abb-bced-5fa99f87c4ca", + "id": "302d4b08", "metadata": {}, "outputs": [], "source": [ - "tab = kx.Table(data={'column_a': [2, 3, 2, 2, 1],'column_b': [56, 15, 42, 102, 32], 'column_c': [45, 80, 8, 61, 87]})\n", + "tab = kx.Table(data={\n", + " 'column_a': [2, 3, 2, 2, 1],\n", + " 'column_b': [56, 15, 42, 102, 32],\n", + " 'column_c': [45, 80, 8, 61, 87]})\n", "tab" ] }, { "cell_type": "markdown", - "id": "79600d41-ef99-478e-89e6-5e67eadb6ee7", + "id": "c687bc12", "metadata": {}, "source": [ "Get the row where the first column is the smallest" @@ -1510,7 +1536,7 @@ { "cell_type": "code", "execution_count": null, - "id": "287c6905-d508-441b-887b-b71233e1d133", + "id": "5f2e6e8b", "metadata": {}, "outputs": [], "source": [ @@ -1519,7 +1545,7 @@ }, { "cell_type": "markdown", - "id": "48f5485e-4353-4523-8cc8-8655b1b8a9c3", + "id": "580d8d06", "metadata": {}, "source": [ "Get the 4 rows where the first column is the smallest, then any equal values are sorted based on the second column" @@ -1528,7 +1554,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1772dd9b-389e-4da2-8994-245cfaa6d942", + "id": "039083ba", "metadata": {}, "outputs": [], "source": [ @@ -1537,7 +1563,7 @@ }, { "cell_type": "markdown", - "id": "7869e8c1-a303-466f-8afc-3ebdb59a379d", + "id": "db0280b3", "metadata": {}, "source": [ "Get the 2 rows with the smallest values for the first column and in case of duplicates, take the last entry in the table" @@ -1546,7 +1572,7 @@ { "cell_type": "code", "execution_count": null, - "id": "425a2841-610f-4cb2-9703-105ea14ac900", + "id": "eb02553b", "metadata": {}, "outputs": [], "source": [ @@ -1555,7 +1581,7 @@ }, { "cell_type": "markdown", - "id": "64ee5a21-7234-40f1-b720-e176740f4fc4", + "id": "fbb4e07f", "metadata": {}, "source": [ "### Table.nlargest()\n", @@ -1586,7 +1612,7 @@ }, { "cell_type": "markdown", - "id": "66b7c0a9-3d23-47c9-af79-8020c52d32e2", + "id": "394bdd98", "metadata": {}, "source": [ "**Examples:**\n", @@ -1597,17 +1623,20 @@ { "cell_type": "code", "execution_count": null, - "id": "1fa56308-8ede-448c-9cb6-0c232aac0dee", + "id": "ead5bfc0", "metadata": {}, "outputs": [], "source": [ - "tab = kx.Table(data={'column_a': [2, 3, 2, 2, 1],'column_b': [102, 15, 42, 56, 32], 'column_c': [45, 80, 8, 61, 87]})\n", + "tab = kx.Table(data={\n", + " 'column_a': [2, 3, 2, 2, 1],\n", + " 'column_b': [102, 15, 42, 56, 32],\n", + " 'column_c': [45, 80, 8, 61, 87]})\n", "tab" ] }, { "cell_type": "markdown", - "id": "2d8a45f7-a91a-41d5-854b-4bdfb7f696ef", + "id": "efc9b4c7", "metadata": {}, "source": [ "Get the row with the largest value for the first column" @@ -1616,7 +1645,7 @@ { "cell_type": "code", "execution_count": null, - "id": "88fa3ff8-4e31-4006-aec2-c697390e2b29", + "id": "c7c6363a", "metadata": {}, "outputs": [], "source": [ @@ -1625,7 +1654,7 @@ }, { "cell_type": "markdown", - "id": "68da7ae5-e181-45dd-8fe4-ae078da131a6", + "id": "18b2a6ce", "metadata": {}, "source": [ "Get the 4 rows where the first column is the largest, then any equal values are sorted based on the third column" @@ -1634,7 +1663,7 @@ { "cell_type": "code", "execution_count": null, - "id": "81647d24-282a-48ee-bf75-d08838211e94", + "id": "9162934a", "metadata": {}, "outputs": [], "source": [ @@ -1643,7 +1672,7 @@ }, { "cell_type": "markdown", - "id": "d538d7f0-c9ff-42a0-9dd5-c95792637775", + "id": "65fce7c3", "metadata": {}, "source": [ "Get the 2 rows with the smallest values for the first column and in case of duplicates, take all rows of the same value for that column" @@ -1652,7 +1681,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c898e01e-60ef-4763-9728-2e215962f393", + "id": "f0bc8991", "metadata": {}, "outputs": [], "source": [ @@ -1661,7 +1690,7 @@ }, { "cell_type": "markdown", - "id": "ed1a193f-b02f-4af3-bdf2-acf46d374901", + "id": "ffc7e449", "metadata": {}, "source": [ "## Data Joins/Merging" @@ -1669,7 +1698,7 @@ }, { "cell_type": "markdown", - "id": "ef401426", + "id": "6a4c9fc9", "metadata": {}, "source": [ "### Table.merge()\n", @@ -1721,7 +1750,7 @@ }, { "cell_type": "markdown", - "id": "61d1567a", + "id": "3fbf575d", "metadata": {}, "source": [ "**Examples:**\n", @@ -1732,7 +1761,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8a9acd51", + "id": "0f5f134f", "metadata": { "scrolled": true }, @@ -1745,7 +1774,7 @@ }, { "cell_type": "markdown", - "id": "7350d9db", + "id": "e9a9809e", "metadata": {}, "source": [ "Merge tab1 and tab2 with specified left and right suffixes appended to any overlapping columns." @@ -1754,7 +1783,7 @@ { "cell_type": "code", "execution_count": null, - "id": "23685dcb", + "id": "86b35497", "metadata": {}, "outputs": [], "source": [ @@ -1763,7 +1792,7 @@ }, { "cell_type": "markdown", - "id": "3b2c65d4", + "id": "c2a3ed1a", "metadata": {}, "source": [ "Merge tab1 and tab2 but raise an exception if the Tables have any overlapping columns." @@ -1772,7 +1801,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b5d16312", + "id": "a6326a4c", "metadata": { "scrolled": true }, @@ -1787,7 +1816,7 @@ { "cell_type": "code", "execution_count": null, - "id": "793df3f3", + "id": "9d56ecee", "metadata": {}, "outputs": [], "source": [ @@ -1797,7 +1826,7 @@ }, { "cell_type": "markdown", - "id": "d58a52a3", + "id": "c97d6764", "metadata": {}, "source": [ "Merge tab1 and tab2 on the `a` column using an inner join." @@ -1806,7 +1835,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1180e6f4", + "id": "756423a2", "metadata": { "scrolled": true }, @@ -1817,7 +1846,7 @@ }, { "cell_type": "markdown", - "id": "b14e36da", + "id": "cad8a08e", "metadata": {}, "source": [ "Merge tab1 and tab2 on the `a` column using a left join." @@ -1826,7 +1855,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4b0098da", + "id": "e3511b35", "metadata": {}, "outputs": [], "source": [ @@ -1835,7 +1864,7 @@ }, { "cell_type": "markdown", - "id": "00d0ad6a", + "id": "cba56e88", "metadata": {}, "source": [ "Merge tab1 and tab2 using a cross join." @@ -1844,7 +1873,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b55be868", + "id": "3d8eb680", "metadata": { "scrolled": true }, @@ -1857,7 +1886,7 @@ }, { "cell_type": "markdown", - "id": "d552054e-883a-41ae-96b7-3e4394d6a0d9", + "id": "caa8cb07", "metadata": {}, "source": [ "Merge tab1 and tab2_keyed using a left join with `q_join` set to `True`. Inputs/Outputs will match q [lj](https://code.kx.com/q/ref/lj/) behaviour." @@ -1866,7 +1895,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4d3d70c5-9ad9-45ee-b69f-d855c3f116af", + "id": "1a7fb401", "metadata": {}, "outputs": [], "source": [ @@ -1878,7 +1907,7 @@ }, { "cell_type": "markdown", - "id": "e4e4b882-1fd9-4069-93ae-18848301a5fc", + "id": "b465b9fc", "metadata": {}, "source": [ "Inputs/Outputs will match q [ij](https://code.kx.com/q/ref/ij/) behaviour." @@ -1887,7 +1916,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bf32cdee-0b20-44f9-b0f5-db44be5e8d91", + "id": "bb0c0f70", "metadata": {}, "outputs": [], "source": [ @@ -1898,7 +1927,7 @@ }, { "cell_type": "markdown", - "id": "5e619567-b73d-4821-976e-4b5f9bdddef4", + "id": "125d8479", "metadata": {}, "source": [ "Merge using `q_join` set to `True`, and `how` set to `left`, will fail when `tab2` is not a keyed table." @@ -1907,7 +1936,7 @@ { "cell_type": "code", "execution_count": null, - "id": "03a3e697-8ee8-47ee-9cf9-299e1ebfef61", + "id": "6d71a5e4", "metadata": {}, "outputs": [], "source": [ @@ -1920,7 +1949,7 @@ }, { "cell_type": "markdown", - "id": "7583c015", + "id": "42158c05", "metadata": {}, "source": [ "### Table.merge_asof()\n", @@ -1977,7 +2006,7 @@ }, { "cell_type": "markdown", - "id": "908499df", + "id": "8712f68e", "metadata": {}, "source": [ "**Examples:**\n", @@ -1988,7 +2017,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e660e496", + "id": "16fbf21a", "metadata": {}, "outputs": [], "source": [ @@ -2000,7 +2029,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e456e4ad", + "id": "c8d023aa", "metadata": {}, "outputs": [], "source": [ @@ -2010,7 +2039,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d4616f6d", + "id": "b2f2766b", "metadata": {}, "outputs": [], "source": [ @@ -2019,7 +2048,7 @@ }, { "cell_type": "markdown", - "id": "496d5a72", + "id": "e10eced6", "metadata": {}, "source": [ "Perform a asof join on two tables but first merge them on the by column." @@ -2028,7 +2057,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3f0fcc13", + "id": "943dd5b1", "metadata": {}, "outputs": [], "source": [ @@ -2074,7 +2103,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b7259913", + "id": "20657aed", "metadata": {}, "outputs": [], "source": [ @@ -2084,7 +2113,7 @@ { "cell_type": "code", "execution_count": null, - "id": "32e41b85", + "id": "a858ec29", "metadata": {}, "outputs": [], "source": [ @@ -2093,7 +2122,7 @@ }, { "cell_type": "markdown", - "id": "04e022a9", + "id": "e6280a9a", "metadata": {}, "source": [ "## Analytic functionality" @@ -2102,19 +2131,24 @@ { "cell_type": "code", "execution_count": null, - "id": "c167fdc9", + "id": "b5d4844f", "metadata": {}, "outputs": [], "source": [ "# All the examples in this section will use this example table.\n", - "kx.q('N: 100')\n", - "tab = kx.q('([] sym: N?`AAPL`GOOG`MSFT; price: 250f - N?500f; traded: 100 - N?200; hold: N?0b)')\n", + "N = 100\n", + "kx.Table(data={\n", + " 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']),\n", + " 'price': 250 + kx.random.random(N, 500.0),\n", + " 'traded': 100 - kx.random.random(N, 200),\n", + " 'hold': kx.random.random(N, False)\n", + " })\n", "tab" ] }, { "cell_type": "markdown", - "id": "be074715", + "id": "fa9c8fc5", "metadata": {}, "source": [ "### Table.abs()\n", @@ -2141,7 +2175,7 @@ { "cell_type": "code", "execution_count": null, - "id": "52f27400", + "id": "032c6006", "metadata": { "scrolled": true }, @@ -2152,7 +2186,7 @@ }, { "cell_type": "markdown", - "id": "85d42035", + "id": "d644f8ee", "metadata": {}, "source": [ "### Table.count()\n", @@ -2180,7 +2214,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a53125cb", + "id": "cd70f67c", "metadata": {}, "outputs": [], "source": [ @@ -2189,7 +2223,7 @@ }, { "cell_type": "markdown", - "id": "77a5a83f", + "id": "f8554641", "metadata": {}, "source": [ "### Table.max()\n", @@ -2218,7 +2252,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5aea50f5", + "id": "743d7fb5", "metadata": {}, "outputs": [], "source": [ @@ -2256,7 +2290,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9f13e8a7", + "id": "d730d7e0", "metadata": {}, "outputs": [], "source": [ @@ -2427,7 +2461,7 @@ { "cell_type": "code", "execution_count": null, - "id": "09975a7a", + "id": "4303521e", "metadata": {}, "outputs": [], "source": [ @@ -2436,7 +2470,7 @@ }, { "cell_type": "markdown", - "id": "97920009", + "id": "3fd35bc7", "metadata": {}, "source": [ "### Table.mean()\n", @@ -2463,7 +2497,7 @@ }, { "cell_type": "markdown", - "id": "dee2e8cc", + "id": "4ce8168f", "metadata": {}, "source": [ "**Examples:**\n", @@ -2474,7 +2508,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9d4c8a22", + "id": "50b58aad", "metadata": {}, "outputs": [], "source": [ @@ -2492,7 +2526,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d02c4cfd", + "id": "fc7ab777", "metadata": {}, "outputs": [], "source": [ @@ -2501,7 +2535,7 @@ }, { "cell_type": "markdown", - "id": "c6feb4ea", + "id": "f3b85934", "metadata": {}, "source": [ "Calculate the mean across the rows of a table" @@ -2510,7 +2544,7 @@ { "cell_type": "code", "execution_count": null, - "id": "506a6867", + "id": "8f85e05c", "metadata": {}, "outputs": [], "source": [ @@ -2519,7 +2553,7 @@ }, { "cell_type": "markdown", - "id": "cd714c1b", + "id": "b0eff83a", "metadata": {}, "source": [ "### Table.median()\n", @@ -2546,7 +2580,7 @@ }, { "cell_type": "markdown", - "id": "00d44518", + "id": "80f2f2a1", "metadata": {}, "source": [ "**Examples:**\n", @@ -2557,7 +2591,7 @@ { "cell_type": "code", "execution_count": null, - "id": "df20ecfc", + "id": "46ca7078", "metadata": {}, "outputs": [], "source": [ @@ -2575,7 +2609,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6e9dc5be", + "id": "0bd18f87", "metadata": {}, "outputs": [], "source": [ @@ -2584,7 +2618,7 @@ }, { "cell_type": "markdown", - "id": "585d9d01", + "id": "8312046c", "metadata": {}, "source": [ "Calculate the median across the rows of a table" @@ -2593,7 +2627,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6ccf50df", + "id": "6fd7558c", "metadata": {}, "outputs": [], "source": [ @@ -2602,7 +2636,7 @@ }, { "cell_type": "markdown", - "id": "aeec2045", + "id": "929fe196", "metadata": {}, "source": [ "### Table.mode()\n", @@ -2630,7 +2664,7 @@ }, { "cell_type": "markdown", - "id": "c52ffed8", + "id": "880e64c2", "metadata": {}, "source": [ "**Examples:**\n", @@ -2641,7 +2675,7 @@ { "cell_type": "code", "execution_count": null, - "id": "786fe3b6", + "id": "b0b087e3", "metadata": {}, "outputs": [], "source": [ @@ -2659,7 +2693,7 @@ { "cell_type": "code", "execution_count": null, - "id": "58909ffa", + "id": "19d3a003", "metadata": { "scrolled": true }, @@ -2670,7 +2704,7 @@ }, { "cell_type": "markdown", - "id": "7d437b70", + "id": "85ce92d2", "metadata": {}, "source": [ "Calculate the median across the rows of a table" @@ -2679,7 +2713,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cfa17533", + "id": "3d418ed9", "metadata": {}, "outputs": [], "source": [ @@ -2688,7 +2722,7 @@ }, { "cell_type": "markdown", - "id": "4c270df3", + "id": "097ff9d9", "metadata": {}, "source": [ "Calculate the mode across columns and keep null values." @@ -2697,7 +2731,7 @@ { "cell_type": "code", "execution_count": null, - "id": "80afc141", + "id": "503efd21", "metadata": { "scrolled": true }, @@ -2716,7 +2750,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4e3300f5", + "id": "94f25640", "metadata": {}, "outputs": [], "source": [ @@ -2725,7 +2759,7 @@ }, { "cell_type": "markdown", - "id": "4117c73f", + "id": "7371feb5", "metadata": {}, "source": [ "### Table.prod()\n", @@ -2755,7 +2789,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a6c64b75", + "id": "7852e009", "metadata": { "scrolled": true }, @@ -2763,8 +2797,13 @@ "source": [ "# This example will use a smaller version of the above table\n", "# as the result of calculating the product quickly goes over the integer limits.\n", - "kx.q('N: 10')\n", - "tab = kx.q('([] sym: N?`AAPL`GOOG`MSFT; price: 2.5f - N?5f; traded: 10 - N?20; hold: N?0b)')\n", + "N = 10\n", + "tab = kx.Table(data={\n", + " 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']),\n", + " 'price': 2.5 - kx.random.random(N, 5.0),\n", + " 'traded': 10 - kx.random.random(N, 20),\n", + " 'hold': kx.random.random(N, False)\n", + " })\n", "tab[tab['traded'] == 0, 'traded'] = 1\n", "tab[tab['price'] == 0, 'price'] = 1.0\n", "tab" @@ -2773,7 +2812,7 @@ { "cell_type": "code", "execution_count": null, - "id": "540297e2", + "id": "5ced8761", "metadata": {}, "outputs": [], "source": [ @@ -2999,7 +3038,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fc109f0f", + "id": "af65b9ab", "metadata": {}, "outputs": [], "source": [ @@ -3008,7 +3047,7 @@ }, { "cell_type": "markdown", - "id": "22940e03", + "id": "b054645b", "metadata": {}, "source": [ "### Table.std()\n", @@ -3038,7 +3077,7 @@ }, { "cell_type": "markdown", - "id": "292f9c39", + "id": "9a0c1a5d", "metadata": {}, "source": [ "**Examples:**\n", @@ -3049,7 +3088,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f2df159e", + "id": "42c3e6bf", "metadata": {}, "outputs": [], "source": [ @@ -3067,7 +3106,7 @@ { "cell_type": "code", "execution_count": null, - "id": "63d45751", + "id": "947435db", "metadata": {}, "outputs": [], "source": [ @@ -3076,7 +3115,7 @@ }, { "cell_type": "markdown", - "id": "2e9705de", + "id": "463894f1", "metadata": {}, "source": [ "Calculate the std across the rows of a table" @@ -3085,7 +3124,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8edf71a4", + "id": "7d918f6a", "metadata": {}, "outputs": [], "source": [ @@ -3094,7 +3133,7 @@ }, { "cell_type": "markdown", - "id": "1ef61cd5", + "id": "ad38071b", "metadata": {}, "source": [ "Calculate std accross columns with ddof=0:" @@ -3103,7 +3142,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0f66fe87", + "id": "77c7aaa3", "metadata": {}, "outputs": [], "source": [ @@ -3112,7 +3151,7 @@ }, { "cell_type": "markdown", - "id": "c80d90ae", + "id": "5f1e5350", "metadata": {}, "source": [ "## Group By" @@ -3120,7 +3159,7 @@ }, { "cell_type": "markdown", - "id": "2e1d05d5", + "id": "57fe61a2", "metadata": {}, "source": [ "### Table.groupby()\n", @@ -3171,7 +3210,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c0454f7d", + "id": "aa82d895", "metadata": { "scrolled": true }, @@ -3188,7 +3227,7 @@ }, { "cell_type": "markdown", - "id": "55b6b4e0", + "id": "0487cfe5", "metadata": {}, "source": [ "Group on the `Animal` column and calculate the mean of the resulting `Max Speed` and `Max Altitude` columns." @@ -3197,7 +3236,7 @@ { "cell_type": "code", "execution_count": null, - "id": "30c55810", + "id": "db5f0dd6", "metadata": { "scrolled": true }, @@ -3208,7 +3247,7 @@ }, { "cell_type": "markdown", - "id": "0e62a99f", + "id": "361019ba", "metadata": {}, "source": [ "Example table with multiple columns to group on." @@ -3217,23 +3256,23 @@ { "cell_type": "code", "execution_count": null, - "id": "0ceddbbf", + "id": "c1985906", "metadata": {}, "outputs": [], "source": [ - "tab = kx.q('2!', kx.Table(\n", + "tab = kx.Table(\n", " data={\n", " 'Animal': ['Falcon', 'Falcon', 'Parrot', 'Parrot', 'Parrot'],\n", " 'Type': ['Captive', 'Wild', 'Captive', 'Wild', 'Wild'],\n", " 'Max Speed': [390., 350., 30., 20., 25.]\n", - " }\n", - "))\n", + " })\n", + "tab = tab.set_index(2)\n", "tab" ] }, { "cell_type": "markdown", - "id": "7e43e1bc", + "id": "ae3d3244", "metadata": {}, "source": [ "Group on multiple columns using thier indexes." @@ -3242,7 +3281,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c281e305", + "id": "bb9dd53b", "metadata": {}, "outputs": [], "source": [ @@ -3251,7 +3290,7 @@ }, { "cell_type": "markdown", - "id": "e5d04220", + "id": "14dfdd2a", "metadata": {}, "source": [ "Example table with Nulls." @@ -3260,14 +3299,14 @@ { "cell_type": "code", "execution_count": null, - "id": "ae67684c", + "id": "8f389591", "metadata": {}, "outputs": [], "source": [ "tab = kx.Table(\n", " [\n", " [\"a\", 12, 12],\n", - " [kx.q('`'), 12.3, 33.],\n", + " [kx.SymbolAtom.null, 12.3, 33.],\n", " [\"b\", 12.3, 123],\n", " [\"a\", 1, 1]\n", " ],\n", @@ -3278,7 +3317,7 @@ }, { "cell_type": "markdown", - "id": "512021d7", + "id": "62e3f5f5", "metadata": {}, "source": [ "Group on column `a` and keep null groups." @@ -3287,7 +3326,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a09a6d3a", + "id": "bcca967d", "metadata": { "scrolled": true }, @@ -3298,7 +3337,7 @@ }, { "cell_type": "markdown", - "id": "4ca2006b", + "id": "2ddc596a", "metadata": {}, "source": [ "Group on column `a` keeping null groups and not using the groups as an index column." @@ -3307,7 +3346,7 @@ { "cell_type": "code", "execution_count": null, - "id": "caa2576e", + "id": "c8f9a0b4", "metadata": {}, "outputs": [], "source": [ @@ -3316,7 +3355,7 @@ }, { "cell_type": "markdown", - "id": "660b3c92", + "id": "56cf152e", "metadata": {}, "source": [ "## Apply\n", @@ -3364,7 +3403,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d746cddb", + "id": "0a85caee", "metadata": {}, "outputs": [], "source": [ @@ -3375,7 +3414,7 @@ }, { "cell_type": "markdown", - "id": "54c09d0c", + "id": "e4cddd7b", "metadata": {}, "source": [ "Apply square root on each item within a column" @@ -3384,7 +3423,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f8bbcac7", + "id": "0895f9c5", "metadata": {}, "outputs": [], "source": [ @@ -3393,7 +3432,7 @@ }, { "cell_type": "markdown", - "id": "09a61483", + "id": "47b6ca70", "metadata": {}, "source": [ "Apply a reducing function sum on either axis" @@ -3402,7 +3441,7 @@ { "cell_type": "code", "execution_count": null, - "id": "84b92b9b", + "id": "901a692b", "metadata": {}, "outputs": [], "source": [ @@ -3412,7 +3451,7 @@ { "cell_type": "code", "execution_count": null, - "id": "169d8ed3", + "id": "43ab33ab", "metadata": {}, "outputs": [], "source": [ @@ -3421,7 +3460,7 @@ }, { "cell_type": "markdown", - "id": "ed4d720c", + "id": "c20acb8a", "metadata": {}, "source": [ "## Aggregate\n", @@ -3465,7 +3504,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2696cf42", + "id": "0fd05e6e", "metadata": {}, "outputs": [], "source": [ @@ -3480,7 +3519,7 @@ }, { "cell_type": "markdown", - "id": "3f90677b", + "id": "cecd45f0", "metadata": {}, "source": [ "Aggregate a list of functions over rows" @@ -3489,7 +3528,7 @@ { "cell_type": "code", "execution_count": null, - "id": "861e5787", + "id": "857ff7cf", "metadata": {}, "outputs": [], "source": [ @@ -3498,7 +3537,7 @@ }, { "cell_type": "markdown", - "id": "ccdaee01", + "id": "8bc17135", "metadata": {}, "source": [ "Perform an aggregation using a user specified function" @@ -3507,7 +3546,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b5f9f25b", + "id": "4108f2e5", "metadata": {}, "outputs": [], "source": [ @@ -3519,7 +3558,7 @@ }, { "cell_type": "markdown", - "id": "667d9961", + "id": "ba013165", "metadata": {}, "source": [ "Apply an aggregation supplying column specification for supplied function" @@ -3528,7 +3567,7 @@ { "cell_type": "code", "execution_count": null, - "id": "60845603", + "id": "1cf2c721", "metadata": {}, "outputs": [], "source": [ @@ -3537,7 +3576,7 @@ }, { "cell_type": "markdown", - "id": "256f5496", + "id": "dc726b75", "metadata": {}, "source": [ "## Data Preprocessing" @@ -3545,7 +3584,7 @@ }, { "cell_type": "markdown", - "id": "976e633c", + "id": "d508891a", "metadata": {}, "source": [ "### Table.add_prefix()\n", @@ -3572,7 +3611,7 @@ }, { "cell_type": "markdown", - "id": "77ff0376", + "id": "4255701a", "metadata": {}, "source": [ "**Examples:**\n", @@ -3583,7 +3622,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c71b39c6", + "id": "905c810d", "metadata": {}, "outputs": [], "source": [ @@ -3592,7 +3631,7 @@ }, { "cell_type": "markdown", - "id": "8b6968da", + "id": "cd6a4005", "metadata": {}, "source": [ "Add \"col_\" to table columns:" @@ -3601,7 +3640,7 @@ { "cell_type": "code", "execution_count": null, - "id": "aa98ca46", + "id": "11296af4", "metadata": {}, "outputs": [], "source": [ @@ -3610,7 +3649,7 @@ }, { "cell_type": "markdown", - "id": "5f87eeba", + "id": "8fb874ba", "metadata": {}, "source": [ "### Table.add_suffix()\n", @@ -3637,7 +3676,7 @@ }, { "cell_type": "markdown", - "id": "dc449e82", + "id": "47618c02", "metadata": {}, "source": [ "**Examples:**\n", @@ -3648,7 +3687,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4471a14b", + "id": "1e5c17b3", "metadata": {}, "outputs": [], "source": [ @@ -3657,7 +3696,7 @@ }, { "cell_type": "markdown", - "id": "b01dfa6c", + "id": "e93f30cb", "metadata": {}, "source": [ "Add \"_col\" to table columns:" @@ -3666,7 +3705,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c7c46631", + "id": "5625768b", "metadata": {}, "outputs": [], "source": [ @@ -3675,7 +3714,7 @@ }, { "cell_type": "markdown", - "id": "d56eeae9", + "id": "a5bb7631", "metadata": {}, "source": [ "### Table.astype()\n", @@ -3704,7 +3743,7 @@ }, { "cell_type": "markdown", - "id": "5d27ccde", + "id": "e0af2087", "metadata": {}, "source": [ "**Examples:**\n", @@ -3715,16 +3754,21 @@ { "cell_type": "code", "execution_count": null, - "id": "63d18dce", + "id": "deb4809e", "metadata": {}, "outputs": [], "source": [ - "df = kx.q('([] c1:1 2 3i; c2:1 2 3j; c3:1 2 3h; c4:1 2 3i)')" + "df = kx.Table(data = {\n", + " 'c1': kx.IntVector([1, 2, 3]),\n", + " 'c2': kx.LongVector([1, 2, 3]),\n", + " 'c3': kx.ShortVector([1, 2, 3]),\n", + " 'c4': kx.IntVector([1, 2, 3])\n", + " })" ] }, { "cell_type": "markdown", - "id": "4e6fad4f", + "id": "9126a84d", "metadata": {}, "source": [ "Cast all columns to dtype LongVector" @@ -3733,7 +3777,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0ef76c1e", + "id": "da1b75cb", "metadata": {}, "outputs": [], "source": [ @@ -3742,7 +3786,7 @@ }, { "cell_type": "markdown", - "id": "1846286e", + "id": "3799183f", "metadata": {}, "source": [ "Casting as specified in the dictionary supplied with given dtype per column" @@ -3751,7 +3795,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a4cc4bb7", + "id": "77de55f5", "metadata": {}, "outputs": [], "source": [ @@ -3760,7 +3804,7 @@ }, { "cell_type": "markdown", - "id": "c77a5800", + "id": "e73a33cd", "metadata": {}, "source": [ "The next example will use this table" @@ -3769,16 +3813,24 @@ { "cell_type": "code", "execution_count": null, - "id": "78b91d9f", + "id": "73e47ecc", "metadata": {}, "outputs": [], "source": [ - "df = kx.q('([] c1:3#.z.p; c2:`abc`def`ghi; c3:1 2 3j; c4:(\"abc\";\"def\";\"ghi\");c5:\"abc\";c6:(1 2 3;4 5 6;7 8 9))')" + "df = kx.Table(data={\n", + " 'c1': kx.TimestampAtom('now'),\n", + " 'c2': ['abc', 'def', 'ghi'],\n", + " 'c3': [1, 2, 3],\n", + " 'c4': [b'abc', b'def', b'ghi'],\n", + " 'c5': b'abc',\n", + " 'c6': [[1, 2, 3], [4, 5, 6], [7, 8, 9]]\n", + " })\n", + "df" ] }, { "cell_type": "markdown", - "id": "e89a0596", + "id": "5eb8e9f2", "metadata": {}, "source": [ "Casting char and string columns to symbol columns" @@ -3787,7 +3839,7 @@ { "cell_type": "code", "execution_count": null, - "id": "599dca72", + "id": "b56e61ab", "metadata": {}, "outputs": [], "source": [ @@ -3796,7 +3848,7 @@ }, { "cell_type": "markdown", - "id": "92ab62d2", + "id": "c7422edd", "metadata": {}, "source": [ "### Table.drop()\n", @@ -3823,7 +3875,7 @@ }, { "cell_type": "markdown", - "id": "756e1611", + "id": "6b589694", "metadata": {}, "source": [ "**Examples:**\n", @@ -3834,20 +3886,25 @@ { "cell_type": "code", "execution_count": null, - "id": "60fb2684", + "id": "e0df894a", "metadata": {}, "outputs": [], "source": [ "# The examples in this section will use this example table filled with random data\n", - "kx.q('N: 1000')\n", - "tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')\n", + "N = 1000\n", + "tab = kx.Table(data = {\n", + " 'x': kx.q.til(N),\n", + " 'y': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']),\n", + " 'z': kx.random.random(N, 500.0),\n", + " 'w': kx.random.random(N, 1000),\n", + " 'v': kx.random.random(N, [kx.LongAtom.null, 0, 50, 100, 200, 250])})\n", "tab.head()" ] }, { "cell_type": "code", "execution_count": null, - "id": "bc0db439", + "id": "f7553c97", "metadata": {}, "outputs": [], "source": [ @@ -3856,7 +3913,7 @@ }, { "cell_type": "markdown", - "id": "b6b79c9b", + "id": "3b68fcbf", "metadata": {}, "source": [ "Drop columns from a table." @@ -3865,7 +3922,7 @@ { "cell_type": "code", "execution_count": null, - "id": "41eb79c1", + "id": "1a07c27f", "metadata": {}, "outputs": [], "source": [ @@ -3874,7 +3931,7 @@ }, { "cell_type": "markdown", - "id": "e34706ea", + "id": "d30d870b", "metadata": {}, "source": [ "### Table.drop_duplicates()\n", @@ -3894,7 +3951,7 @@ }, { "cell_type": "markdown", - "id": "e9e064d1", + "id": "3c633610", "metadata": {}, "source": [ "**Examples:**\n", @@ -3905,17 +3962,21 @@ { "cell_type": "code", "execution_count": null, - "id": "7c8be915", + "id": "672ae369", "metadata": {}, "outputs": [], "source": [ - "tab2 = kx.q('([] 100?`AAPL`GOOG`MSFT; 100?3)')\n", + "N = 100\n", + "tab2 = kx.Table(data ={\n", + " 'x': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']),\n", + " 'x1': kx.random.random(N, 3)\n", + " })\n", "tab2" ] }, { "cell_type": "markdown", - "id": "4af0c99d", + "id": "5912fc4e", "metadata": {}, "source": [ "Drop all duplicate rows from the table." @@ -3924,7 +3985,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5f6ec5c7", + "id": "9cc0d387", "metadata": {}, "outputs": [], "source": [ @@ -3933,7 +3994,7 @@ }, { "cell_type": "markdown", - "id": "77282b77", + "id": "6110d8d9", "metadata": {}, "source": [ "### Table.pop()\n", @@ -3959,7 +4020,7 @@ }, { "cell_type": "markdown", - "id": "6846f6a1", + "id": "70c2c22a", "metadata": {}, "source": [ "**Examples:**\n", @@ -3970,7 +4031,7 @@ { "cell_type": "code", "execution_count": null, - "id": "40ab2931", + "id": "cc1770f6", "metadata": { "scrolled": true }, @@ -3985,7 +4046,7 @@ }, { "cell_type": "markdown", - "id": "45aca79f", + "id": "e4843e47", "metadata": {}, "source": [ "Remove the `z` and `w` columns from the table and return them." @@ -3994,7 +4055,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2f381911", + "id": "3c9dda2a", "metadata": {}, "outputs": [], "source": [ @@ -4007,7 +4068,7 @@ }, { "cell_type": "markdown", - "id": "2f4954bb", + "id": "68e67196", "metadata": {}, "source": [ "### Table.rename()\n", @@ -4041,7 +4102,7 @@ }, { "cell_type": "markdown", - "id": "ddd7f1f2", + "id": "08c8748e", "metadata": {}, "source": [ "**Examples:**\n", @@ -4052,7 +4113,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d844c2c3", + "id": "e131bae9", "metadata": {}, "outputs": [], "source": [ @@ -4062,7 +4123,7 @@ }, { "cell_type": "markdown", - "id": "9b819386", + "id": "b5ef3e3d", "metadata": {}, "source": [ "Rename column `x` to `index` and `y` to `symbol` using the `columns` keyword." @@ -4071,7 +4132,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e352c9ba", + "id": "e03e5b8e", "metadata": {}, "outputs": [], "source": [ @@ -4080,7 +4141,7 @@ }, { "cell_type": "markdown", - "id": "4f9e2895-a82a-4f8e-ae2c-d3f898ece131", + "id": "6d25ea19", "metadata": {}, "source": [ "Rename column `x` to `index` and `y` to `symbol` by setting the `axis` keyword." @@ -4089,7 +4150,7 @@ { "cell_type": "code", "execution_count": null, - "id": "16ae0555-9d92-4642-9671-03a2790216c8", + "id": "4a8da84c", "metadata": {}, "outputs": [], "source": [ @@ -4098,7 +4159,7 @@ }, { "cell_type": "markdown", - "id": "70e2735a-b582-47f7-9557-5f64f2238e89", + "id": "9d887f84", "metadata": {}, "source": [ "Rename index of a keyed table by using literal `index` as the `axis` parameter." @@ -4107,7 +4168,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7b2bcbd6-32ef-4988-ac81-3de73222face", + "id": "4619e64e", "metadata": {}, "outputs": [], "source": [ @@ -4116,7 +4177,274 @@ }, { "cell_type": "markdown", - "id": "b85d53ba", + "id": "fda14bd0-5be3-44f3-a5ba-36ab067eb384", + "metadata": {}, + "source": [ + "### Table.replace()\n", + "``` Table.replace(to_replace, value) ```\n", + "\n", + "Replace all values in a table with another given value.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :-------: | :--- | :------------------------------------------------------------------------------------------| :-----: |\n", + "| to_replace| any | Value of element in table you wish to replace. | None |\n", + "| value | any | New value to perform replace with. | None |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :---: | :----------------------------------------------------------------- |\n", + "| Table | A table with the given elements replaced with new value. |" + ] + }, + { + "cell_type": "markdown", + "id": "d211a836-b74c-42df-9da4-b20896c6c1f7", + "metadata": {}, + "source": [ + "**Examples**\n", + "\n", + "Create an unkeyed `Table` and a `KeyedTable` with elements to be replaced." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "bbbec511-0395-4be3-b9b4-e6d3c09a21a7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
bcde
a
241ba1
220bb2
361bc`a
" + ], + "text/plain": [ + "pykx.KeyedTable(pykx.q('\n", + "a| b c d e \n", + "-| --------\n", + "2| 4 1 a 1 \n", + "2| 2 0 b 2 \n", + "3| 6 1 c `a\n", + "'))" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tab = kx.q('([] a:2 2 3; b:4 2 6; c:(1b;0b;1b); d:(`a;`b;`c); e:(1;2;`a))')\n", + "ktab = kx.q('([a:2 2 3]b:4 2 6; c:(1b;0b;1b); d:(`a;`b;`c); e:(1;2;`a))')\n", + "ktab" + ] + }, + { + "cell_type": "markdown", + "id": "cbfcf189-628d-45fe-ab85-2330b46fdcc9", + "metadata": {}, + "source": [ + "Replace all instances of `2` in the `KeyedTable` with `123`. Note the key column remains unchanged." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "3a36a978-022a-4e49-8191-05a768d5f30e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
bcde
a
241ba1
21230bb123
361bc`a
" + ], + "text/plain": [ + "pykx.KeyedTable(pykx.q('\n", + "a| b c d e \n", + "-| -----------\n", + "2| 4 1 a 1 \n", + "2| 123 0 b 123\n", + "3| 6 1 c `a \n", + "'))" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ktab.replace(2,123)" + ] + }, + { + "cell_type": "markdown", + "id": "6cc51c70-af14-4061-bdd7-d2fa7d8df20b", + "metadata": {}, + "source": [ + "Replace all `True` values with a list of strings." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "a1b87680-f2aa-4434-bcb6-2f4b384b735c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcde
024`one`two`threea1
1220bb2
236`one`two`threec`a
" + ], + "text/plain": [ + "pykx.Table(pykx.q('\n", + "a b c d e \n", + "-----------------------\n", + "2 4 `one`two`three a 1 \n", + "2 2 0b b 2 \n", + "3 6 `one`two`three c `a\n", + "'))" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tab.replace(True, (\"one\", \"two\", \"three\"))" + ] + }, + { + "cell_type": "markdown", + "id": "73059996", "metadata": {}, "source": [ "### Table.reset_index()\n", @@ -4158,7 +4486,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a723d14d", + "id": "05f5d858", "metadata": {}, "outputs": [], "source": [ @@ -4176,7 +4504,7 @@ }, { "cell_type": "markdown", - "id": "089ad779", + "id": "ac9a7e94", "metadata": {}, "source": [ "Resetting the index of the table will result in original index columns being added to the table directly" @@ -4185,7 +4513,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4662c138", + "id": "35f78f09", "metadata": {}, "outputs": [], "source": [ @@ -4194,7 +4522,7 @@ }, { "cell_type": "markdown", - "id": "4e019e54", + "id": "ea62a377", "metadata": {}, "source": [ "Reset the index adding a specified named column to the table" @@ -4203,7 +4531,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a990ea29", + "id": "c136c0f7", "metadata": {}, "outputs": [], "source": [ @@ -4212,7 +4540,7 @@ }, { "cell_type": "markdown", - "id": "f186c5fb", + "id": "4a4223bb", "metadata": {}, "source": [ "Reset the index using multiple named columns" @@ -4221,7 +4549,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9c62edc4", + "id": "be677606", "metadata": {}, "outputs": [], "source": [ @@ -4230,7 +4558,7 @@ }, { "cell_type": "markdown", - "id": "c6f54a5c", + "id": "535841af", "metadata": {}, "source": [ "Reset the index specifying the column `number` which is to be added to the table" @@ -4239,7 +4567,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c52367f4", + "id": "b3e6bda0", "metadata": {}, "outputs": [], "source": [ @@ -4248,7 +4576,7 @@ }, { "cell_type": "markdown", - "id": "ee76fa24", + "id": "80719030", "metadata": {}, "source": [ "Reset the index specifying multiple numbered columns" @@ -4257,7 +4585,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0cf6b213", + "id": "fab2e4e7", "metadata": {}, "outputs": [], "source": [ @@ -4266,7 +4594,7 @@ }, { "cell_type": "markdown", - "id": "7fc928a5", + "id": "ed82d445", "metadata": {}, "source": [ "Drop index columns from table" @@ -4275,7 +4603,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8596e5a1", + "id": "945b8293", "metadata": {}, "outputs": [], "source": [ @@ -4284,7 +4612,7 @@ }, { "cell_type": "markdown", - "id": "e95b57dd", + "id": "db72bcbb", "metadata": {}, "source": [ "Drop specified key columns from table" @@ -4293,7 +4621,7 @@ { "cell_type": "code", "execution_count": null, - "id": "dde1ee77", + "id": "b9646f1d", "metadata": {}, "outputs": [], "source": [ @@ -4302,7 +4630,7 @@ }, { "cell_type": "markdown", - "id": "8e19ddeb", + "id": "2201d826", "metadata": {}, "source": [ "### Table.set_index()\n", @@ -4343,18 +4671,23 @@ { "cell_type": "code", "execution_count": null, - "id": "6ede4322", + "id": "e2ef05c3", "metadata": {}, "outputs": [], "source": [ - "kx.q('N: 10')\n", - "tab = kx.q('([] sym: N?`AAPL`GOOG`MSFT; price: 2.5f - N?5f; traded: N?0 1; hold: N?01b)')" + "N = 10\n", + "tab = kx.Table(data={\n", + " 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']),\n", + " 'price': 2.5 - kx.random.random(N, 5.0),\n", + " 'traded': 10 - kx.random.random(N, 20),\n", + " 'hold': kx.random.random(N, False)\n", + " })" ] }, { "cell_type": "code", "execution_count": null, - "id": "f6708166", + "id": "f561efd4", "metadata": {}, "outputs": [], "source": [ @@ -4365,7 +4698,7 @@ { "cell_type": "code", "execution_count": null, - "id": "abf46438", + "id": "66f9b964", "metadata": {}, "outputs": [], "source": [ @@ -4376,7 +4709,7 @@ { "cell_type": "code", "execution_count": null, - "id": "567ff8e9", + "id": "00dda488", "metadata": {}, "outputs": [], "source": [ @@ -4387,7 +4720,7 @@ }, { "cell_type": "markdown", - "id": "fb24895d", + "id": "965ef63a", "metadata": {}, "source": [ "Appending:" @@ -4396,7 +4729,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d080737f", + "id": "cf53a132", "metadata": {}, "outputs": [], "source": [ @@ -4407,7 +4740,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c33e779e", + "id": "19f88bde", "metadata": {}, "outputs": [], "source": [ @@ -4417,7 +4750,7 @@ }, { "cell_type": "markdown", - "id": "c7eab4a6", + "id": "7d605454", "metadata": {}, "source": [ "Verify Integrity:" @@ -4426,7 +4759,7 @@ { "cell_type": "code", "execution_count": null, - "id": "98fc7587", + "id": "63c810f0", "metadata": {}, "outputs": [], "source": [ @@ -4437,7 +4770,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b17c1a22", + "id": "266dbc68", "metadata": {}, "outputs": [], "source": [ diff --git a/docs/user-guide/advanced/limitations.md b/docs/user-guide/advanced/limitations.md index b55cc5a..eed8a0f 100644 --- a/docs/user-guide/advanced/limitations.md +++ b/docs/user-guide/advanced/limitations.md @@ -2,7 +2,7 @@ When q is run embedded within a Python process (as opposed to over IPC), it is restricted in how it can operate. This is a result of the fact that when running embedded it does not have the main loop or timers that one would expect from a typical q process. The following are a number of examples showing these limitations in action -## IPC Interface +## IPC Interface As a result of the lack of a main loop PyKX cannot be used to respond to q IPC requests as a server. Callback functions such as [`.z.pg`](https://code.kx.com/q/ref/dotz/#zpg-get) defined within a Python process will not operate as expected. @@ -52,4 +52,3 @@ Attempting to use the timer callback function directly using PyKX will raise an >>> kx.q.z.ts AttributeError: ts: .z.ts is not exposed through the context interface because the main loop is inactive in PyKX. ``` - diff --git a/docs/user-guide/configuration.md b/docs/user-guide/configuration.md index 12f1cc3..1f07f44 100644 --- a/docs/user-guide/configuration.md +++ b/docs/user-guide/configuration.md @@ -88,7 +88,6 @@ The following variables can be used to enable or disable advanced features of Py | `PYKX_UNLICENSED` | `False` | `1` or `true` | Set PyKX to make use of the library in `unlicensed` mode at all times. | | | `PYKX_LICENSED` | `False` | `1` or `true` | Set PyKX to make use of the library in `licensed` mode at all times. | | | `PYKX_THREADING` | `False` | `1` or `true` | When importing PyKX start EmbeddedQ within a background thread. This allows calls into q from any thread to modify state, this environment variable is only supported for licensed users. | | -| `PYKX_SKIP_SIGNAL_OVERWRITE` | `False` | `1` or `true` | Skip overwriting of [signal](https://docs.python.org/3/library/signal.html) definitions by PyKX, these are presently overwritten by default to reset Pythonic default definitions with are reset by PyKX on initialisation in licensed modality. | | | `PYKX_NO_SIGNAL` | `False` | `1` or `true` | Skip overwriting of [signal](https://docs.python.org/3/library/signal.html) definitions by PyKX, these are presently overwritten by default to reset Pythonic default definitions with are reset by PyKX on initialisation in licensed modality. | | | `PYKX_4_1_ENABLED` | `False` | `1` or `true` | Load version 4.1 of `libq` when starting `PyKX` in licensed mode, this environment variable does not work without a valid `q` license. | | | `PYKX_NO_SIGINT` | `False` | `1` or `true` | Avoid setting `signal.signal(signal.SIGINT)` once PyKX is loaded, these are presently set to the Python default values once PyKX is loaded to ensure that PyKX licensed modality does not block their use by Python. | `DEPRECATED`, please use `PYKX_NO_SIGNAL` | diff --git a/docs/user-guide/fundamentals/conversion_considerations.md b/docs/user-guide/fundamentals/conversion_considerations.md new file mode 100644 index 0000000..1fa75f4 --- /dev/null +++ b/docs/user-guide/fundamentals/conversion_considerations.md @@ -0,0 +1,147 @@ +# PyKX Conversion Considerations + +PyKX attempts to make conversions between q and Python as seamless as possible. +However due to differences in their underlying implementations there are cases where 1 to 1 mappings are not possible. + +## Data types and conversions + +The key PyKX APIs around data types and conversions are outlined under: + +* [Convert Pythonic data to PyKX](../../api/pykx-q-data/toq.md) +* [PyKX type wrappers](../../api/pykx-q-data/wrappers.md) +* [PyKX to Pythonic data type mapping](../../api/pykx-q-data/type_conversions.md) +* [Registering Custom Conversions](../../api/pykx-q-data/register.md) + +## Text representation in PyKX + +[Text representation in PyKX](../fundamentals/text.md) requires consideration as there are some key differences between the `Symbol` and `Char` data types. + +## Nulls and Infinites + +Most q datatypes have the concepts of null, negative infinity, and infinity. Python does not have the concept of infinites and it's null behaviour differs in implementation. The page [handling nulls and infinities](./nulls_and_infinities.md) details the needed considerations when dealing with these special values. + +## Temporal types + +### Timestamp/Datetime types + +Particular care is needed when converting temporal types as Python and q use different [epoch](https://en.wikipedia.org/wiki/Epoch_(computing)) values: + +* q 2000 +* Python 1970 + +__Note:__ The following details focus on `NumPy` but similar considerations should be taken in to account when converting Python, Pandas, and PyArrow objects. + +The 30 year epoch offset means there are times which are unreachable in one or the other language: + +| | TimestampVector | datetime64[ns] | +|---------------|---------------------------------|---------------------------------| +| Minimum value | `1707.09.22D00:12:43.145224194` | `1677-09-21T00:12:43.145224194` | +| Maximum value | `2292.04.10D23:47:16.854775806` | `2262-04-11T23:47:16.854775807` | + +As such the range of times which can be directly converted should be considered: + +* Minimum value: `1707-09-22T00:12:43.145224194` +* Maximum value: `2262-04-11T23:47:16.854775807` + +As mentioned [above](#nulls-and-infinites) most q data types have null, negative infinity, and infinity values. + +| | q representation | datetime64[ns] | +|-------------------|------------------|---------------------------------| +| Null | `0Np` | `NaT` | +| Negative Infinity | `-0Wp` | `1707-09-22T00:12:43.145224193` | +| Infinity | `0Wp` | Overflow cannot be represented | + +Converting from q to NumPy using `.np()`, `0Np` and `-0Wp` convert to meaningful values but `0Wp` overflows: + +```q +>>> kx.q('0N -0W 0Wp').np() +array(['NaT', '1707-09-22T00:12:43.145224193', '1707-09-22T00:12:43.145224191'], dtype='datetime64[ns]') +``` + +Converting to q using `toq` by default only the NumPy maximum values converts to a meaningful value: + +```q +>>> arr = np.array(['NaT', '1677-09-21T00:12:43.145224194', '2262-04-11T23:47:16.854775807'], dtype='datetime64[ns]') +>>> kx.toq(arr) +pykx.TimestampVector(pykx.q('2262.04.11D23:47:16.854775808 2262.04.11D23:47:16.854775810 2262.04.11D23:47:16.854775807')) +``` + +To additionally handle `NaT` being converted the `handle_nulls` keyword can be used: + +```q +>>> arr = np.array(['NaT', '1677-09-21T00:12:43.145224194', '2262-04-11T23:47:16.854775807'], dtype='datetime64[ns]', handle_nulls=True) +>>> kx.toq(arr) +pykx.TimestampVector(pykx.q('0N 2262.04.11D23:47:16.854775810 2262.04.11D23:47:16.854775807')) +``` + +Using `raw=True` we can request that the epoch offset is not applied. This allows for the underlying numeric values to be accessed directly: + +```python +>>> kx.q('0N -0W 0Wp').np(raw=True) +array([-9223372036854775808, -9223372036854775807, 9223372036854775807]) +``` + +Passing back to q with `toq` these are then presented as the long null, negative infinity, and infinity: + +```python +>>> kx.toq(kx.q('0N -0W 0Wp').np(raw=True)) +pykx.LongVector(pykx.q('0N -0W 0W')) +``` + +`ktype` can be passed during `toq` to specify desired types: + +```python +>>> kx.toq(pd.DataFrame(data= {'d':np.array(['2020-09-08T07:06:05'], dtype='datetime64[s]')}), ktype={'d':kx.DateVector}) +pykx.Table(pykx.q(' +d +---------- +2020.09.08 +')) +``` + +Note that: + +* Dictionary based conversion is only supported when operating in [licensed mode](../../user-guide/advanced/modes.md). +* Data is first converted to the default type and then cast to the desired type. + +Other items of note: + +* In NumPy further data types exist `datetime64[us]`, `datetime64[ms]`, `datetime64[s]` which due to their lower precision have a wider range of dates they can represent. When converted using to q using `toq` these all present as q `Timestamp` type and as such only dates within the range this data type can represent should be converted. +* Pandas 2.* changes behavior and conversions should be reviewed as part of an upgrade of this package. [PyKX to Pythonic data type mapping](../../api/pykx-q-data/type_conversions.md) includes examples showing differences seen when calling `.pd()`. + +### Duration types + +Duration types do not have the issue of epoch offsets but some range limitations exist when converting between Python and PyKX. + +`kx.SecondVector` and `kx.MinuteVector` convert to `timedelta64[s]`: + +| | q representation | timedelta64[s] | +|-------------------------------------|------------------|---------------------------| +| `kx.SecondVector` Null | `0Nv` | `NaT` | +| `kx.SecondVector` Negative Infinity | `-0Wv` | `-24856 days +20:45:53` | +| `kx.SecondVector` Infinity | `0Wv` | `24855 days 03:14:07` | +| `kx.MinuteVector` Null | `0Nu` | `NaT` | +| `kx.MinuteVector` Negative Infinity | `-0Wu` | `-1491309 days +21:53:00` | +| `kx.MinuteVector` Infinity | `0Wu` | `1491308 days 02:07:00` | + +When converting Python to q using `toq` care must be taken as `timedelta64[s]` is 64 bit and converts to `kx.SecondVector` which is 32 bit: + +| | SecondVector | timedelta64[s] | +|---------------|--------------|-----------------------------------| +| Minimum value | `**:14:06` | `106751991167300 days 15:30:07` | +| Maximum value | `-**:14:06` | `-106751991167301 days +08:29:53` | + +As such the range of times which can be directly converted should be considered: + +* Minimum value: `-24856 days +20:45:54` +* Maximum value: `24855 days 03:14:06` + +q does not display values of second type over `99:59:59`, beyond this `**` is displayed in the hour field. +The data is still stored correctly and will display when converted: + +```python +>>> kx.q('99:59:59 +1') +pykx.SecondAtom(pykx.q('**:00:00')) +>>> kx.q('99:59:59 +1').pd() +Timedelta('4 days 04:00:00') +``` diff --git a/docs/user-guide/fundamentals/creating.md b/docs/user-guide/fundamentals/creating.md index 679b5cb..8b6357c 100644 --- a/docs/user-guide/fundamentals/creating.md +++ b/docs/user-guide/fundamentals/creating.md @@ -360,11 +360,13 @@ x1: double Care should be taken in particular when converting q temporal data to Python native data types. As Python temporal data types only support microsecond precision roundtrip conversions will reduce temporal granularity for q data. - ```python - >>> import pykx as kx - >>> qtime = kx.TimestampAtom('now') - >>> qtime - pykx.TimestampAtom(pykx.q('2024.01.05D03:16:23.736627552')) - >>> kx.toq(qtime.py()) - pykx.TimestampAtom(pykx.q('2024.01.05D03:16:23.736627000')) - ``` + ```python + >>> import pykx as kx + >>> qtime = kx.TimestampAtom('now') + >>> qtime + pykx.TimestampAtom(pykx.q('2024.01.05D03:16:23.736627552')) + >>> kx.toq(qtime.py()) + pykx.TimestampAtom(pykx.q('2024.01.05D03:16:23.736627000')) + ``` + + See [here](../fundamentals/conversion_considerations.md#temporal-types) for further details. diff --git a/docs/user-guide/fundamentals/types.md b/docs/user-guide/fundamentals/types.md deleted file mode 100644 index e69de29..0000000 diff --git a/examples/notebooks/interface_overview.ipynb b/examples/notebooks/interface_overview.ipynb index 1623b7e..1f62f6c 100644 --- a/examples/notebooks/interface_overview.ipynb +++ b/examples/notebooks/interface_overview.ipynb @@ -34,23 +34,25 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": ["hide_code"] - }, - "outputs": [], - "source": [ - "import os\n", - "os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation.\n" - ] - }, - { "cell_type": "code", "execution_count": null, "metadata": { + "tags": [ + "hide_code" + ] }, "outputs": [], + "source": [ + "import os\n", + "os.environ['IGNORE_QHOME'] = '1' # Ignore symlinking PyKX q libraries to QHOME\n", + "os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "import pykx as kx\n", "kx.q.system.console_size = [10, 80]" @@ -547,12 +549,10 @@ "import time\n", "\n", "try:\n", - " proc = subprocess.Popen(\n", - " ('q', '-p', '5000'),\n", - " stdin=subprocess.PIPE,\n", - " stdout=subprocess.DEVNULL,\n", - " stderr=subprocess.DEVNULL,\n", - " )\n", + " with kx.PyKXReimport():\n", + " proc = subprocess.Popen(\n", + " ('q', '-p', '5000')\n", + " )\n", " time.sleep(2)\n", "except:\n", " raise kx.QError('Unable to create q process on port 5000')" @@ -637,7 +637,6 @@ "metadata": {}, "outputs": [], "source": [ - "proc.stdin.close()\n", "proc.kill()" ] }, @@ -1083,7 +1082,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.10.12" }, "mimetype": "text/x-python", "name": "python", diff --git a/mkdocs.yml b/mkdocs.yml index fe1d2c0..cd5906c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -52,6 +52,7 @@ markdown_extensions: - pymdownx.arithmatex: generic: true - pymdownx.caret + - pymdownx.inlinehilite - pymdownx.details - pymdownx.emoji: emoji_index: !!python/name:materialx.emoji.twemoji @@ -115,6 +116,12 @@ plugins: - https://pandas.pydata.org/docs/objects.inv - render_swagger - search + - exclude-search: + exclude: + - getting-started/q_magic_command.ipynb + - user-guide/advanced/Pandas_API.ipynb + - getting-started/PyKX Introduction Notebook.ipynb + - examples/db-management.ipynb - spellcheck: known_words: spelling.txt ignore_code: true # Ignore words in tags @@ -129,6 +136,7 @@ plugins: - user-guide/advanced/Pandas_API.ipynb - getting-started/PyKX Introduction Notebook.ipynb - examples/db-management.ipynb + - examples/charting.ipynb theme: @@ -144,8 +152,10 @@ theme: - content.tabs.link # Insiders - header.autohide - navigation.tabs + - navigation.footer - content.code.annotate - content.action.edit + - content.code.copy palette: - media: "(prefers-color-scheme: light)" scheme: kx-light @@ -183,6 +193,7 @@ nav: - Interacting with PyKX objects: user-guide/fundamentals/evaluating.md - Querying data: user-guide/fundamentals/querying.md - Indexing PyKX objects: user-guide/fundamentals/indexing.md + - Conversion considerations: user-guide/fundamentals/conversion_considerations.md - Text Representation in PyKX: user-guide/fundamentals/text.md - Handling nulls and infinities: user-guide/fundamentals/nulls_and_infinities.md - Advanced usage and performance considerations: @@ -190,7 +201,7 @@ nav: - Database interactions: user-guide/advanced/database.md - Using q functions in a Pythonic way: user-guide/advanced/context_interface.md - Modes of operation: user-guide/advanced/modes.md - - Numpy integration: user-guide/advanced/numpy.md + - NumPy integration: user-guide/advanced/numpy.md - Serialization and de-serialization: user-guide/advanced/serialization.md - Performance considerations: user-guide/advanced/performance.md - Interface limitations: user-guide/advanced/limitations.md @@ -217,10 +228,12 @@ nav: - IPC: api/ipc.md - PyKX Exceptions: api/exceptions.md - Schema generation: api/schema.md + - Streamlit Integration: api/streamlit.md - System Command Wrappers: api/system.md + - Utilities: api/util.md - File loading and saving: - - Writing PyKX data to disk: api/pykx-save-load/write.md - - Reading PyKX data from disk: api/pykx-save-load/read.md + - Writing data to disk: api/pykx-save-load/write.md + - Reading data from disk: api/pykx-save-load/read.md - Reimporter module: api/reimporting.md - Serialization: api/serialize.md - Beta Features: @@ -229,6 +242,7 @@ nav: - Compression and Encryption: beta-features/compress-encypt.md - Remote Function Execution: beta-features/remote-functions.md - Multithreading: beta-features/threading.md + - Streamlit: beta-features/streamlit.md - Python interfacing within q: - Overview: pykx-under-q/intro.md - API: pykx-under-q/api.md @@ -237,7 +251,9 @@ nav: - Examples: - Subscriber: examples/subscriber/readme.md - Compression and Encryption: examples/compress_and_encrypt/readme.md + - Database Creation and Management: examples/db-management.ipynb - IPC: examples/ipc/README.md + - Charting Data with PyKX: examples/charting.ipynb - PyKX as a Server: examples/server/server.md - Multithreaded Execution: examples/threaded_execution/threading.md - Extras: diff --git a/pyproject.toml b/pyproject.toml index 35d6aa7..daef722 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,6 +71,7 @@ doc = [ "mkdocs-autorefs==0.4.1", "mkdocs-click==0.5.0", "mkdocs-exclude==1.0.2", + "mkdocs-exclude-search==0.6.6", "mkdocs-jupyter~=0.24", "mkdocs-material~=9.4.5", "mkdocs-render-swagger-plugin==0.0.3", @@ -100,6 +101,9 @@ dashboards = [ beta = [ "dill>=0.2.0", ] +streamlit = [ + "streamlit~=1.28; python_version>'3.7'" +] test = [ "coverage[toml]==6.3.2", "Cython~=3.0.0", @@ -213,6 +217,7 @@ ignore = [ "I100", # import statements are in the wrong order "I202", # additional newline in a group of imports (We use three 3: built-in, third-party, local) "W503", # depracated warning - goes against PEP8 + "W605", # Invalid escape character in comments causing issue with q examples ] diff --git a/setup.py b/setup.py index 2696be7..61d2393 100755 --- a/setup.py +++ b/setup.py @@ -177,6 +177,8 @@ def ext(name: str, '-O3', '-Wall', '-Wextra', + '-Wno-error=incompatible-pointer-types', # Warning became an error in GCC 14.x + '-Wno-error=int-conversion', # Warning became an error in GCC 14.x # It'd be nice if we could leave -Wunused-variable enabled, but when Cython's binding # option is True (which it needs to be to generate signatures for its callables) tons of # unused variables are created. This clutters the compiler output, which could hide diff --git a/src/pykx/__init__.py b/src/pykx/__init__.py index c756518..f055f88 100644 --- a/src/pykx/__init__.py +++ b/src/pykx/__init__.py @@ -231,7 +231,7 @@ def _register(self, self._call( f'{"" if name[0] == "." else "."}{name}:(enlist`)!enlist(::);' f'system "d {"" if name[0] == "." else "."}{name}";' - f'system "l {path.as_posix()}"', + f'.pykx.util.loadfile["{path.parent}";"{path.name}"];', wait=True, ) return name[1:] if name[0] == '.' else name @@ -277,6 +277,7 @@ def paths(self, paths: List[Union[str, Path]]): from . import exceptions from . import wrappers from . import schema +from . import streamlit from . import random from ._wrappers import _init as _wrappers_init @@ -360,7 +361,7 @@ def install_into_QHOME(overwrite_embedpy=False, to_local_folder=False) -> None: def activate_numpy_allocator() -> None: - """Sets the allocator used for Numpy array data to one optimzied for use with PyKX. + """Sets the allocator used for Numpy array data to one optimized for use with PyKX. This will only change the default allocator if the environment variable `PYKX_ALLOCATOR` is set to 1 or if the flag `--pykxalloc` is present in the QARGS environment variable. @@ -376,7 +377,7 @@ def activate_numpy_allocator() -> None: Numpy arrays created with this allocator can be converted into a q vector without copying the data. - Because q objects must have their metadata immediately preceeding the data, only a single + Because q objects must have their metadata immediately preceding the data, only a single q vector can be created using this approach. Repeated conversions of the Numpy array into a q vector will yield the same q vector with its reference count incremented by 1 each time. diff --git a/src/pykx/_ipc.pyx b/src/pykx/_ipc.pyx index eb66ef9..ecf5d57 100644 --- a/src/pykx/_ipc.pyx +++ b/src/pykx/_ipc.pyx @@ -74,6 +74,28 @@ def _unlicensed_call(handle: int, query: bytes, parameters: List[K], wait: bool) cpdef ssl_info(): + """View information relating to the TLS settings used by PyKX from your process + + Returns: + A dictionary outlining the TLS settings used by PyKX + + Example: + + ```python + >>> import pykx as kx + >>> kx.ssl_info() + pykx.Dictionary(pykx.q(' + SSLEAY_VERSION | OpenSSL 1.1.1q 5 Jul 2022 + SSL_CERT_FILE | /usr/local/anaconda3/ssl/server-crt.pem + SSL_CA_CERT_FILE | /usr/local/anaconda3/ssl/cacert.pem + SSL_CA_CERT_PATH | /usr/local/anaconda3/ssl + SSL_KEY_FILE | /usr/local/anaconda3/ssl/server-key.pem + SSL_CIPHER_LIST | ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:.. + SSL_VERIFY_CLIENT| NO + SSL_VERIFY_SERVER| YES + ')) + ``` + """ if licensed: return q('-26!0') cdef uintptr_t info = core.sslInfo(0) diff --git a/src/pykx/config.py b/src/pykx/config.py index 8136346..ae89596 100644 --- a/src/pykx/config.py +++ b/src/pykx/config.py @@ -96,6 +96,7 @@ def _is_set(envvar): _qlic = os.getenv('QLIC', '') _pwd = os.getcwd() license_located = False +lic_path = '' for loc in (_pwd, _qlic, qhome): if loc=='': pass @@ -126,15 +127,63 @@ def _license_install_B64(license, license_type): with open(qlic/license_type, 'wb') as binary_file: binary_file.write(lic) + return True + + +def _license_check(lic_type, lic_encoding, lic_variable): + license_content = None + lic_name = lic_type + '.lic' + lic_file = qlic / lic_name + if os.path.exists(lic_file): + with open(lic_file, 'rb') as f: + license_content = base64.encodebytes(f.read()).decode('utf-8') + license_content = license_content.replace('\n', '') + if lic_encoding == license_content: + conflict_message = 'We have been unable to update your license for PyKX using '\ + 'the following information:\n'\ + f" Environment variable: {lic_variable} \n"\ + f' License location: {qlic}/{lic_type}.lic\n'\ + 'Reason: License content matches supplied Environment variable' + print(conflict_message) + return False + else: + return _license_install_B64(lic_encoding, lic_name) + +def _license_install(intro=None, return_value=False, license_check=False, license_error=None): # noqa: + + if license_check: + install_success = False + kc_b64 = _get_config_value('KDB_LICENSE_B64', None) + k4_b64 = _get_config_value('KDB_K4LICENSE_B64', None) + + if kc_b64 is not None: + kx_license_env = 'KDB_LICENSE_B64' + kx_license_file = 'kc' + install_success = _license_check(kx_license_file, kc_b64, kx_license_env) + elif k4_b64 is not None: + kx_license_env = 'KDB_K4LICENSE_B64' + kx_license_file = 'k4' + install_success = _license_check(kx_license_file, k4_b64, kx_license_env) + if install_success: + if license_error is not None: + install_message = f'Initialisation failed with error: {license_error}\n'\ + 'Your license has been updated using the following '\ + 'information:\n'\ + f' Environment variable: {kx_license_env}\n'\ + f' License write location: {qlic}/{kx_license_file}.lic' + print(install_message) + return True - -def _license_install(intro=None, return_value=False): # noqa: modes_url = "https://code.kx.com/pykx/user-guide/advanced/modes.html" - lic_url = "https://kx.com/kdb-insights-personal-edition-license-download" + personal_url = "https://kx.com/kdb-insights-personal-edition-license-download" + commercial_url = "https://kx.com/book-demo" unlicensed_message = '\nPyKX unlicensed mode enabled. To set this as your default behavior '\ - "please set the following environment variable 'PYKX_UNLICENSED='true'"\ - '\n\nFor more information on PyKX modes of operation, please visit '\ - f'{modes_url}.\nTo apply for a PyKX license please visit {lic_url}' + "set the following environment variable PYKX_UNLICENSED='true'"\ + '\n\nFor more information on PyKX modes of operation, visit '\ + f'{modes_url}.\nTo apply for a PyKX license visit '\ + f'\n\n Personal License: {personal_url}'\ + '\n Commercial License: Contact your KX sales representative '\ + f'or sales@kx.com or apply on {commercial_url}' first_user = '\nThank you for installing PyKX!\n\n'\ 'We have been unable to locate your license for PyKX. '\ 'Running PyKX in unlicensed mode has reduced functionality.\n'\ @@ -147,10 +196,28 @@ def _license_install(intro=None, return_value=False): # noqa: return False elif continue_license in ('y', 'Y', ''): - redirect = input(f'\nTo apply for a PyKX license, please visit {lic_url}.\n' - 'Once the license application has completed, you will receive a ' - 'welcome email containing your license information.\n' - 'Would you like to open this page? [Y/n]: ') + commercial = input('\nIs the intended use of this software for:' + '\n [1] Personal use (Default)' + '\n [2] Commercial use' + '\nEnter your choice here [1/2]: ').strip().lower() + if commercial not in ('1', '2', ''): + raise Exception('User provided option was not one of [1/2]') + + personal = commercial in ('1', '') + + lic_url = personal_url if personal else commercial_url + lic_type = 'kc.lic' if personal else 'k4.lic' + + if personal: + redirect = input(f'\nTo apply for your PyKX license, navigate to {lic_url}.\n' + 'Shortly after you submit your application, you will receive a ' + 'welcome email containing your license information.\n' + 'Would you like to open this page? [Y/n]: ') + else: + redirect = input('\nTo apply for your PyKX license, contact your ' + 'KX sales representative or sales@kx.com.\n' + f'Alternately apply through {lic_url}.\n' + 'Would you like to open this page? [Y/n]: ') if redirect.lower() in ('y', ''): try: @@ -164,15 +231,15 @@ def _license_install(intro=None, return_value=False): # noqa: 'input the file path (Default)' '\n [2] Input the activation key (base64 encoded string) provided in ' 'your welcome email' - '\n [3] Proceed with unlicensed mode:' + '\n [3] Proceed with unlicensed mode' '\nEnter your choice here [1/2/3]: ').strip().lower() if install_type not in ('1', '2', '3', ''): raise Exception('User provided option was not one of [1/2/3]') if install_type in ('1', ''): - license = input('\nPlease provide the download location of your license ' - '(E.g., ~/path/to/kc.lic) : ').strip() + license = input('\nProvide the download location of your license ' + f'(for example, ~/path/to/{lic_type}) : ').strip() download_location = os.path.expanduser(Path(license)) if not os.path.exists(download_location): @@ -182,10 +249,10 @@ def _license_install(intro=None, return_value=False): # noqa: print('\nPyKX license successfully installed. Restart Python for this to take effect.\n') # noqa: E501 elif install_type == '2': - license = input('\nPlease provide your activation key (base64 encoded string) ' + license = input('\nProvide your activation key (base64 encoded string) ' 'provided with your welcome email : ').strip() - _license_install_B64(license, 'kc.lic') + _license_install_B64(license, lic_type) print('\nPyKX license successfully installed. Restart Python for this to take effect.\n') # noqa: E501 elif install_type == '3': @@ -202,14 +269,7 @@ def _license_install(intro=None, return_value=False): # noqa: if any(i in qargs for i in _arglist) or _licenvset or not hasattr(sys, 'ps1'): # noqa: C901 pass elif not license_located: - kc_b64 = _get_config_value('KDB_LICENSE_B64', None) - k4_b64 = _get_config_value('KDB_K4LICENSE_B64', None) - if kc_b64 is not None: - _license_install_B64(kc_b64, 'kc.lic') - elif k4_b64 is not None: - _license_install_B64(k4_b64, 'k4.lic') - else: - _license_install() + _license_install() licensed = False @@ -250,7 +310,6 @@ def _license_install(intro=None, return_value=False): # noqa: pykx_qdebug = _is_enabled('PYKX_QDEBUG', '--q-debug') pandas_2 = pd.__version__.split('.')[0] == '2' -disable_pandas_warning = _is_enabled('PYKX_DISABLE_PANDAS_WARNING') def find_core_lib(name: str) -> Path: diff --git a/src/pykx/core.pyx b/src/pykx/core.pyx index 21efe10..546930a 100644 --- a/src/pykx/core.pyx +++ b/src/pykx/core.pyx @@ -9,7 +9,7 @@ import sys from . import beta_features from .util import num_available_cores -from .config import tcore_path_location, _is_enabled, _license_install, pykx_threading, _check_beta, _get_config_value, pykx_lib_dir, ignore_qhome +from .config import tcore_path_location, _is_enabled, _license_install, pykx_threading, _check_beta, _get_config_value, pykx_lib_dir, ignore_qhome, lic_path def _normalize_qargs(user_args: List[str]) -> Tuple[bytes]: @@ -294,17 +294,21 @@ if not pykx_threading: if _qinit_check_proc.returncode: # Fallback to unlicensed mode if _qinit_output != ' ': _capout_msg = f'Captured output from initialization attempt:\n{_qinit_output}' + _lic_location = f'License location used:\n{lic_path}' else: _capout_msg = '' # nocov - this can only occur under extremely weird circumstances. + _lic_location = '' # nocov - this additional line is to ensure this code path is covered. if hasattr(sys, 'ps1'): if re.compile('exp').search(_capout_msg): _exp_license = 'Your PyKX license has now expired.\n\n'\ f'{_capout_msg}\n\n'\ + f'{_lic_location}\n\n'\ 'Would you like to renew your license? [Y/n]: ' - _license_message = _license_install(_exp_license, True) + _license_message = _license_install(_exp_license, True, True, 'exp') elif re.compile('embedq').search(_capout_msg): _ce_license = 'You appear to be using a non kdb Insights license.\n\n'\ f'{_capout_msg}\n\n'\ + f'{_lic_location}\n\n'\ 'Running PyKX in the absence of a kdb Insights license '\ 'has reduced functionality.\nWould you like to install '\ 'a kdb Insights personal license? [Y/n]: ' @@ -313,14 +317,16 @@ if not pykx_threading: _upd_license = 'Your installed license is out of date for this version'\ ' of PyKX and must be updated.\n\n'\ f'{_capout_msg}\n\n'\ + f'{_lic_location}\n\n'\ 'Would you like to install an updated kdb '\ 'Insights personal license? [Y/n]: ' _license_message = _license_install(_upd_license, True) if (not _license_message) and _qinit_check_proc.returncode: if '--licensed' in qargs or _is_enabled('PYKX_LICENSED', '--licensed'): - raise PyKXException(f'Failed to initialize embedded q.{_capout_msg}') + raise PyKXException(f'Failed to initialize embedded q.{_capout_msg}\n\n{_lic_location}') else: - warn(f'Failed to initialize PyKX successfully with the following error: {_capout_msg}', PyKXWarning) + warn('Failed to initialize PyKX successfully with ' + f'the following error: {_capout_msg}\n\n{_lic_location}', PyKXWarning) _libq_path_py = bytes(find_core_lib('e')) _libq_path = _libq_path_py _q_handle = dlopen(_libq_path, RTLD_NOW | RTLD_GLOBAL) diff --git a/src/pykx/ctx.py b/src/pykx/ctx.py index f2c134e..6921c6f 100644 --- a/src/pykx/ctx.py +++ b/src/pykx/ctx.py @@ -132,7 +132,8 @@ def __getattr__(self, key): # noqa attr = self._q._call( 'k){x:. x;$[99h<@x;:`$"_pykx_fn_marker";99h~@x;if[` in!x;if[(::)~x`;:`$"_pykx_ctx_marker"]]]x}', # noqa: E501 fqn_with_key, - wait=True + wait=True, + skip_debug=True ) except QError as err: if '_' in str(key): diff --git a/src/pykx/embedded_q.py b/src/pykx/embedded_q.py index 5101193..f962125 100644 --- a/src/pykx/embedded_q.py +++ b/src/pykx/embedded_q.py @@ -118,12 +118,28 @@ class EmbeddedQ(Q, metaclass=ABCMetaSingleton): def __init__(self): # noqa if licensed: - kxic_path = (pykx_dir/'lib'/'kxic.k').as_posix() + kxic_path = (pykx_dir/'lib').as_posix() + kxic_file = 'kxic.k' pykx_qlib_path = (pykx_dir/'pykx').as_posix() # This q code is run as a single call into q to improve startup performance: code = '' + code += ''' + .pykx.util.loadfile:{[folder;file] + cache:system"cd"; + res:.[{system"cd ",x;res:system"l ",y;(0b;res)}; + (folder;file); + {(1b;x)} + ]; + if[folder~system"cd";system"cd ",cache]; + $[res[0];'res[1];res[1]] + }; + ''' if not no_qce: - code += f'if[not `comkxic in key `;system"l {kxic_path}"];' + code += f''' + if[not `comkxic in key `; + .pykx.util.loadfile["{kxic_path}";"{kxic_file}"] + ]; + ''' if os.getenv('PYKX_UNDER_Q') is None: os.environ['PYKX_UNDER_PYTHON'] = 'true' code += 'setenv[`PYKX_UNDER_PYTHON;"true"];' @@ -165,8 +181,8 @@ def __init__(self): # noqa break else: raise err - pykx_qini_path = (Path(__file__).parent.absolute()/'pykx_init.q_') - self._call(f'\l {pykx_qini_path}', skip_debug=True) # noqa + pykx_qini_path = Path(__file__).parent.absolute().as_posix() + self._call(f'.pykx.util.loadfile["{pykx_qini_path}";"pykx_init.q_"]', skip_debug=True) # noqa pykx_q_path = (Path(__file__).parent.absolute()/'pykx.q') with open(pykx_q_path, 'r') as f: code = f.read() @@ -222,7 +238,7 @@ def __call__(self, query = wrappers.CharVector(query) if (not skip_debug) and (debug or pykx_qdebug): if 0 != len(args): - query = wrappers.List([bytes(query), *[wrappers.K(x) for x in args]]) + query = wrappers.List([query, *[wrappers.K(x) for x in args]]) result = _keval( b'{[pykxquery] .Q.trp[value; pykxquery; {2@"backtrace:\n",.Q.sbt y;\'x}]}', query diff --git a/src/pykx/ipc.py b/src/pykx/ipc.py index b98e1d5..72e1cae 100644 --- a/src/pykx/ipc.py +++ b/src/pykx/ipc.py @@ -638,6 +638,13 @@ def _send(self, ): if self.closed: raise RuntimeError("Attempted to use a closed IPC connection") + tquery = type(query) + debugging = (not skip_debug) and (debug or pykx_qdebug) + if not (issubclass(tquery, K) or isinstance(query, (str, bytes))): + raise ValueError('Cannot send object of passed type over IPC: ' + str(tquery)) + if debugging: + if not issubclass(tquery, Function): + query = CharVector(query) start_time = monotonic_ns() timeout = self._connection_info['timeout'] while True: @@ -646,14 +653,14 @@ def _send(self, events = self._writer.select(timeout) for key, _mask in events: callback = key.data - if (not skip_debug) and (debug or pykx_qdebug): + if debugging: return callback()( key.fileobj, bytes(CharVector( '{[pykxquery] .Q.trp[{[x] (0b; value x)}; pykxquery;' '{(1b;"backtrace:\n",.Q.sbt y;x)}]}' )), - CharVector(query) if len(params) == 0 else List((CharVector(query), *params)), + query if len(params) == 0 else List((query, *params)), wait=wait, error=error, debug=debug @@ -672,13 +679,13 @@ def _ipc_query_builder(self, query, *params): for a, b in zip(prev_types, data): if not issubclass(a, type(None))\ - and (issubclass(type(b), Function) or isinstance(b, Foreign) + and (isinstance(b, Foreign) or (isinstance(b, Composition) and q('{.pykx.util.isw x}', b)) )\ and not issubclass(a, Function)\ or issubclass(type(b), Function) and\ isinstance(b, Composition) and q('{.pykx.util.isw x}', b): - raise ValueError('Cannot send Python function over IPC') + raise ValueError('Cannot send object of passed type over IPC: ' + str(type(b))) return data def _send_sock(self, @@ -1084,6 +1091,18 @@ def __call__(self, # basis: q('{x set y+til z}', 'async_query', 10, 5, wait=True) ``` + + Call a PyKX Operator function with supplied parameters + + ```python + q(kx.q.sum, [1, 2, 3]) + ``` + + Call a PyKX Keyword function with supplied paramters + + ```python + q(kx.q.floor, [5.2, 10.4]) + ``` """ if wait is None: wait = self._connection_info['wait'] @@ -1462,6 +1481,18 @@ def __call__(self, # basis: await q('{x set y+til z}', 'async_query', 10, 5, wait=True) ``` + + Call a PyKX Operator function with supplied parameters + + ```python + await q(kx.q.sum, [1, 2, 3]) + ``` + + Call a PyKX Keyword function with supplied paramters + + ```python + await q(kx.q.floor, [5.2, 10.4]) + ``` """ if not reuse: conn = _DeferredQConnection(self._stored_args['host'], @@ -1532,6 +1563,7 @@ def _call(self, *args: Any, wait: Optional[bool] = None, debug: bool = False, + skip_debug: bool = False ): try: with self._lock if self._lock is not None else nullcontext(): @@ -1666,6 +1698,7 @@ def _call(self, *args: Any, wait: Optional[bool] = None, debug: bool = False, + skip_debug: bool = False ): return self._send(query, *args, wait=wait, debug=debug)._await() @@ -1984,6 +2017,7 @@ def _call(self, *args: Any, wait: Optional[bool] = None, debug: bool = False, + skip_debug: bool = False, ): conn = _DeferredQConnection(self._stored_args['host'], self._stored_args['port'], @@ -2463,11 +2497,11 @@ def _licensed_call(handle: int, query: bytes, parameters: List, wait: bool) -> K # TODO: can we switch over to exclusively using this approach instead of `_licensed_call`? # It would involve making `cls._lib` be either libq or libe depending on if we're licensed. @classmethod - def _unlicensed_call(cls, handle: int, query: bytes, parameters: List, wait: bool) -> K: + def _unlicensed_call(cls, handle: int, query, parameters: List, wait: bool) -> K: return _ipc._unlicensed_call(handle, query, parameters, wait) def __call__(self, - query: Union[str, bytes, CharVector], + query: Union[str, bytes, CharVector, K], *args: Any, wait: Optional[bool] = None, debug: bool = False, @@ -2529,6 +2563,18 @@ def __call__(self, q('{x set y+til z}', 'async_query', 10, 5, wait=True) ``` + Call a PyKX Operator function with supplied parameters + + ```python + q(kx.q.sum, [1, 2, 3]) + ``` + + Call a PyKX Keyword function with supplied paramters + + ```python + q(kx.q.floor, [5.2, 10.4]) + ``` + Automatically reconnect to a q server after a disconnect. ```python @@ -2545,23 +2591,29 @@ def __call__(self, return self._call(query, *args, wait=wait, debug=debug) def _call(self, - query: Union[str, bytes], + query: Union[K, str, bytes], *args: Any, wait: Optional[bool] = None, debug: bool = False, + skip_debug: bool = False ) -> K: if wait is None: wait = self._connection_info['wait'] if self.closed: raise RuntimeError('Attempted to use a closed IPC connection') + tquery = type(query) + if not (issubclass(tquery, K) or isinstance(query, (str, bytes))): + raise ValueError('Cannot send object of passed type over IPC: ' + str(tquery)) + if not issubclass(tquery, Function): + if isinstance(query, CharVector): + query = bytes(query) + else: + query = normalize_to_bytes(query, 'Query') if len(args) > 8: raise TypeError('Too many parameters - q queries cannot have more than 8 parameters') prev_types = [type(x) for x in args] handle = self._handle if wait else -self._handle args = [K(x) for x in args] - for a, b in zip(prev_types, (type(x) for x in args)): - if issubclass(b, Function) and not issubclass(a, Function): - raise ValueError('Cannot send Python function over IPC') handler = self._licensed_call if licensed else self._unlicensed_call try: @@ -2574,7 +2626,7 @@ def _call(self, '{(1b; "backtrace:\n",.Q.sbt y; x)}]}', 'Query' ), - [K(normalize_to_bytes(query, 'Query'))] if len(args) == 0 else [List([K(normalize_to_bytes(query, 'Query')), *args])], + [K(query)] if len(args) == 0 else [List((K(query), *args))], wait, ) if res._unlicensed_getitem(0).py() == True: @@ -2582,7 +2634,7 @@ def _call(self, raise QError(res._unlicensed_getitem(2).py().decode()) else: return res._unlicensed_getitem(1) - return handler(handle, normalize_to_bytes(query, 'Query'), args, wait) + return handler(handle, query, args, wait) except BaseException as e: if isinstance(e, QError) and 'snd handle' not in str(e) and 'write to handle' not in str(e) and 'close handle' not in str(e): raise e diff --git a/src/pykx/lib/4-1-libs/l64/libq.so b/src/pykx/lib/4-1-libs/l64/libq.so index b654f7e..bf837c7 100755 Binary files a/src/pykx/lib/4-1-libs/l64/libq.so and b/src/pykx/lib/4-1-libs/l64/libq.so differ diff --git a/src/pykx/lib/4-1-libs/l64arm/libq.so b/src/pykx/lib/4-1-libs/l64arm/libq.so index 8eaf7ea..bcc4b10 100755 Binary files a/src/pykx/lib/4-1-libs/l64arm/libq.so and b/src/pykx/lib/4-1-libs/l64arm/libq.so differ diff --git a/src/pykx/lib/4-1-libs/m64/libq.dylib b/src/pykx/lib/4-1-libs/m64/libq.dylib index 7d1505c..7864911 100755 Binary files a/src/pykx/lib/4-1-libs/m64/libq.dylib and b/src/pykx/lib/4-1-libs/m64/libq.dylib differ diff --git a/src/pykx/lib/4-1-libs/m64arm/libq.dylib b/src/pykx/lib/4-1-libs/m64arm/libq.dylib index ef9b5dc..953bfc2 100755 Binary files a/src/pykx/lib/4-1-libs/m64arm/libq.dylib and b/src/pykx/lib/4-1-libs/m64arm/libq.dylib differ diff --git a/src/pykx/lib/4-1-libs/q.k b/src/pykx/lib/4-1-libs/q.k index 593a7d5..06ad989 100644 --- a/src/pykx/lib/4-1-libs/q.k +++ b/src/pykx/lib/4-1-libs/q.k @@ -117,7 +117,7 @@ IN:{$[99h<@x;x in y;0]};qa:{$[qb x;0;IN[*x;a0];1;|/qa'1_x]};qb:{(2>#x)|(@x)&~11= / CAN EXIT HERE FOR SMALL Q / pt(tables) pf(date/month/year/int) pd(dirs) pv(values) pn(count) pt::0#pf::` vt:(,`)!,()!(); -bv:{g:$[(::)~x;max;min];x:`:.;d:{`/:'x,'d@&(d:!x)like"[0-9]*"}'P:$[`par.txt in!x;-1!'`$0:`/:x,`par.txt;,x]; +bv:{g:$[(::)~x;max;min];x:.Q.d;d:{`/:'x,'d@&(d:!x)like"[0-9]*"}'P:$[`par.txt in!x;jp[x]'`$0:`/:x,`par.txt;,x]; t:?,/!:'.Q.vt:{(&#:'x)(=,/. x)}'{({("DMJJ"`date`month`year`int?.Q.pf)$$last@x:`\:x}'x)!!:'x}'d; d:{`/:'x[(. y)[;0]],'(`$$(. y)[;1]),'!y}[P]@{i:y@&:x=y x:@[x;&x~\:();:;*0#`. pf];(i;x i)}[;g]'+:t#/:g''.Q.vt:t#/:.Q.vt;.Q.vt:P!.q.except[. .Q.pf]''.Q.vt; .Q.vp:t!{(+(,.Q.pf)!,0#. .Q.pf),'+(-2!'.+x)#'+|0#x:?[x;();0b;()]}'d;.Q.pt,:{.[x;();:;+.q.except[!+.Q.vp x;.Q.pf]!x];x}'.q.except[t;.Q.pt];} diff --git a/src/pykx/lib/4-1-libs/w64/q.dll b/src/pykx/lib/4-1-libs/w64/q.dll index 3c53506..c1b7ff9 100644 Binary files a/src/pykx/lib/4-1-libs/w64/q.dll and b/src/pykx/lib/4-1-libs/w64/q.dll differ diff --git a/src/pykx/lib/4-1-libs/w64/q.lib b/src/pykx/lib/4-1-libs/w64/q.lib index ea82b4b..bc1bfa3 100644 Binary files a/src/pykx/lib/4-1-libs/w64/q.lib and b/src/pykx/lib/4-1-libs/w64/q.lib differ diff --git a/src/pykx/lib/l64/libe.so b/src/pykx/lib/l64/libe.so index ebf16db..11818e9 100755 Binary files a/src/pykx/lib/l64/libe.so and b/src/pykx/lib/l64/libe.so differ diff --git a/src/pykx/lib/l64/libq.so b/src/pykx/lib/l64/libq.so index 2a8c586..5a5f383 100755 Binary files a/src/pykx/lib/l64/libq.so and b/src/pykx/lib/l64/libq.so differ diff --git a/src/pykx/lib/l64arm/libq.so b/src/pykx/lib/l64arm/libq.so index 9eb8f21..c148bd6 100755 Binary files a/src/pykx/lib/l64arm/libq.so and b/src/pykx/lib/l64arm/libq.so differ diff --git a/src/pykx/lib/m64/libe.so b/src/pykx/lib/m64/libe.so index 9c997a4..89c1542 100755 Binary files a/src/pykx/lib/m64/libe.so and b/src/pykx/lib/m64/libe.so differ diff --git a/src/pykx/lib/m64/libq.dylib b/src/pykx/lib/m64/libq.dylib index b28aac5..74353fa 100755 Binary files a/src/pykx/lib/m64/libq.dylib and b/src/pykx/lib/m64/libq.dylib differ diff --git a/src/pykx/lib/m64arm/libe.so b/src/pykx/lib/m64arm/libe.so index 75a51fc..7c95d93 100755 Binary files a/src/pykx/lib/m64arm/libe.so and b/src/pykx/lib/m64arm/libe.so differ diff --git a/src/pykx/lib/m64arm/libq.dylib b/src/pykx/lib/m64arm/libq.dylib index 5c4b079..52604ce 100755 Binary files a/src/pykx/lib/m64arm/libq.dylib and b/src/pykx/lib/m64arm/libq.dylib differ diff --git a/src/pykx/lib/q.k b/src/pykx/lib/q.k index 274f4c1..782f4c6 100644 --- a/src/pykx/lib/q.k +++ b/src/pykx/lib/q.k @@ -117,7 +117,7 @@ IN:{$[99h<@x;x in y;0]};qa:{$[qb x;0;IN[*x;a0];1;|/qa'1_x]};qb:{(2>#x)|(@x)&~11= / CAN EXIT HERE FOR SMALL Q / pt(tables) pf(date/month/year/int) pd(dirs) pv(values) pn(count) pt::0#pf::` vt:(,`)!,()!(); -bv:{g:$[(::)~x;max;min];x:`:.;d:{`/:'x,'d@&(d:!x)like"[0-9]*"}'P:$[`par.txt in!x;-1!'`$0:`/:x,`par.txt;,x]; +bv:{g:$[(::)~x;max;min];x:.Q.d;d:{`/:'x,'d@&(d:!x)like"[0-9]*"}'P:$[`par.txt in!x;jp[x]'`$0:`/:x,`par.txt;,x]; t:?,/!:'.Q.vt:{(&#:'x)(=,/. x)}'{({("DMJJ"`date`month`year`int?.Q.pf)$$last@x:`\:x}'x)!!:'x}'d; d:{`/:'x[(. y)[;0]],'(`$$(. y)[;1]),'!y}[P]@{i:y@&:x=y x:@[x;&x~\:();:;*0#`. pf];(i;x i)}[;g]'+:t#/:g''.Q.vt:t#/:.Q.vt;.Q.vt:P!.q.except[. .Q.pf]''.Q.vt; .Q.vp:t!{(+(,.Q.pf)!,0#. .Q.pf),'+(-2!'.+x)#'+|0#x:?[x;();0b;()]}'d;.Q.pt,:{.[x;();:;+.q.except[!+.Q.vp x;.Q.pf]!x];x}'.q.except[t;.Q.pt];} diff --git a/src/pykx/lib/read.q b/src/pykx/lib/read.q index 41bfd1a..398b992 100644 --- a/src/pykx/lib/read.q +++ b/src/pykx/lib/read.q @@ -1,4 +1,4 @@ -system"l ", {x sv (-1 _ x vs y),enlist "csvutil.q"}[$[.z.o~`w64;"\\";"/"]; (value{})6]; +.pykx.util.loadfile[;"csvutil.q"]{x sv (-1 _ x vs y)}[$[.z.o~`w64;"\\";"/"]; (value{})6]; system"d .read"; diff --git a/src/pykx/lib/w64/q.dll b/src/pykx/lib/w64/q.dll index 312318e..442727f 100644 Binary files a/src/pykx/lib/w64/q.dll and b/src/pykx/lib/w64/q.dll differ diff --git a/src/pykx/lib/w64/q.lib b/src/pykx/lib/w64/q.lib index 2857d79..efe485c 100644 Binary files a/src/pykx/lib/w64/q.lib and b/src/pykx/lib/w64/q.lib differ diff --git a/src/pykx/pandas_api/__init__.py b/src/pykx/pandas_api/__init__.py index c884675..e8f755d 100644 --- a/src/pykx/pandas_api/__init__.py +++ b/src/pykx/pandas_api/__init__.py @@ -74,6 +74,7 @@ def return_val(*args, **kwargs): from .pandas_reset_index import _init as _reset_index_init, PandasResetIndex from .pandas_apply import _init as _apply_init, PandasApply from .pandas_sorting import _init as _sorting_init, PandasSorting +from .pandas_replace import _init as _replace_init, PandasReplace def _init(_q): @@ -87,11 +88,12 @@ def _init(_q): _apply_init(q) _sorting_init(q) _reset_index_init(q) + _replace_init(q) class PandasAPI(PandasApply, PandasMeta, PandasIndexing, PandasReindexing, PandasConversions, PandasMerge, PandasSetIndex, PandasGroupBy, - PandasSorting, PandasResetIndex): + PandasSorting, PandasReplace, PandasResetIndex): """PandasAPI mixin class""" replace_self = False prev_locs = {} diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py index b74798b1..66724af 100644 --- a/src/pykx/pandas_api/pandas_meta.py +++ b/src/pykx/pandas_api/pandas_meta.py @@ -60,11 +60,13 @@ def preparse_computations(tab, axis=0, skipna=True, numeric_only=False, bool_onl if bool_only: (tab, cols) = _get_bool_only_subtable(tab) res = q( - '{[tab;skipna;axis]' - 'r:value flip tab;' - 'if[not axis~0;r:flip r];' - 'if[skipna;r:{x where not null x} each r];' - 'r}', + ''' + {[tab;skipna;axis] + r:value flip tab; + if[not axis~0;r:flip r]; + if[skipna;r:{x where not null x} each r]; + r} + ''', tab, skipna, axis @@ -149,15 +151,18 @@ def mean(self, axis: int = 0, numeric_only: bool = False): if numeric_only: tab = _get_numeric_only_subtable(tab) - key_str = '' if axis == 0 else '`$string ' - val_str = '' if axis == 0 else '"f"$value ' - query_str = 'cols tab' if axis == 0 else 'til count tab' - where_str = ' where not (::)~/:r[;1]' return q( - '{[tab]' - f'r:{{[tab; x] ({key_str}x; avg {val_str}tab[x])}}[tab;] each {query_str};' - f'(,/) {{(enlist x 0)!(enlist x 1)}} each r{where_str}}}', - tab + ''' + {[tab;axis] + idx:$[axis;til count tab;cols tab]; + r:{[tab;axis;idx] + ( + $[axis;`$string@;]idx; + avg $[axis;"f"$value@;]tab idx + ) + }[tab;axis]each idx; + {x[;0]!x[;1]} r where not (::)~/:r[;1]} + ''', tab, axis ) @api_return @@ -199,15 +204,14 @@ def std(self, axis: int = 0, ddof: int = 1, numeric_only: bool = False): if ddof == len(tab): return q('{x!count[x]#0n}', axis_keys) - return q( - '''{[tab;axis;ddof;axis_keys] - tab:$[0~axis;(::);flip] value flip tab; - d:$[0~ddof;dev; - 1~ddof;sdev; - {[ddof;x] avg sqrt (sum xexp[x-avg x;2]) % count[x]-ddof}ddof]; - axis_keys!d each tab - }''', tab, axis, ddof, axis_keys - ) + return q(''' + {[tab;axis;ddof;axis_keys] + tab:$[0~axis;(::);flip] value flip tab; + d:$[0~ddof;dev; + 1~ddof;sdev; + {[ddof;x] avg sqrt (sum xexp[x-avg x;2]) % count[x]-ddof}ddof]; + axis_keys!d each tab + }''', tab, axis, ddof, axis_keys) @api_return def median(self, axis: int = 0, numeric_only: bool = False): @@ -217,16 +221,17 @@ def median(self, axis: int = 0, numeric_only: bool = False): if numeric_only: tab = _get_numeric_only_subtable(tab) - key_str = '' if axis == 0 else '`$string ' - val_str = '' if axis == 0 else '"f"$value ' - query_str = 'cols tab' if axis == 0 else 'til count tab' - where_str = ' where not (::)~/:r[;1]' - return q( - '{[tab]' - f'r:{{[tab; x] ({key_str}x; med {val_str}tab[x])}}[tab;] each {query_str};' - f'(,/) {{(enlist x 0)!(enlist x 1)}} each r{where_str}}}', - tab - ) + return q(''' + {[tab;axis] + idx:$[axis;til count tab;cols tab]; + r:{[tab;axis;idx] + ( + $[axis;`$string@;]idx; + med $[axis;"f"$value@;]tab idx + ) + }[tab;axis]each idx; + raze{(enlist x 0)!enlist x 1}each r where not (::)~/:r[;1]} + ''', tab, axis) @convert_result def skew(self, axis=0, skipna=True, numeric_only=False): @@ -245,27 +250,26 @@ def mode(self, axis: int = 0, numeric_only: bool = False, dropna: bool = True): tab = q('{(keys x) _ 0!x}', tab) if numeric_only: tab = _get_numeric_only_subtable(tab) - x_str = 'x: x where not null x; ' if dropna else '' - query_str = 'cols tab' if axis == 0 else 'til count tab' - cols_str = 'tab[x]' if axis == 0 else 'value tab[x]' - maxc_str = 'x[1]' if axis ==0 else 'raze x _ 0' - cs_str = 'cols tab' if axis == 0 else '`idx,`$string each til count r[0][1]' - m_str = '{1 _ raze x}' if axis == 0 else '{x: raze x; x iasc null x}' - flip_m = 'flip ' if axis == 0 else '' - mode_query = f'{{{x_str}(x l) where d=max d:1_deltas (l:where differ x),count x:asc x}}' \ - if numeric_only else f'{{{x_str}x where f=max f:@[0*i;i:x?x;+;1]}}' - return q( - '{[tab]' - f'r:{{[tab; x] (x; {mode_query}' - f'[{cols_str}])}}[tab;] each {query_str};' - f'maxc: max {{count {maxc_str}}} each r;' - 'r:{[x; y] $[not y=t:count x 1;' - '[qq: x 1; (x 0;(y - t){[z; t]z,z[t]}[;t]/qq)];' - '(x 0; x 1)]}[;maxc] each r;' - f'cs: {cs_str};' - f'm: {m_str} each r;' - f'cs !/: {flip_m}m}}', - tab + + return q(''' + {[tab; axis; numeric; drop] + idx:$[axis;til count tab;cols tab]; + modeQuery:$[numeric; + {x[l] where d=max d:1_deltas (l:where differ x),count x:asc x}; + {x where f=max f:@[0*i;i:x?x;+;1]} + ]; + r:{[tab; axis; modeQuery; drop; x] + (x; modeQuery $[drop;{x where not null x};] $[axis;value;]tab x) + }[tab;axis;modeQuery;drop]each idx; + maxc: max{count x y}[$[axis;{raze x _ 0};{x 1}]]each r; + r:{[x; y] + $[not y=t:count x 1; + [qq: x 1; (x 0;(y - t){[z; t]z,z[t]}[;t]/qq)]; + (x 0; x 1)]}[;maxc] each r; + cs:$[axis;`idx,`$string each til count r[0][1];cols tab]; + m:$[axis;{x: raze x; x iasc null x};{1 _ raze x}] each r; + cs!/:$[axis;;flip]m + }''', tab, axis, numeric_only, dropna ) @api_return diff --git a/src/pykx/pandas_api/pandas_replace.py b/src/pykx/pandas_api/pandas_replace.py new file mode 100644 index 0000000..afe1b1d --- /dev/null +++ b/src/pykx/pandas_api/pandas_replace.py @@ -0,0 +1,27 @@ +from . import api_return + + +def _init(_q): + global q + q = _q + + +class PandasReplace: + + @api_return + def replace(self, to_replace, value): + return q(''' + {[t;s;r] + gt:$[-11h~type t;get;(::)] t; + cs:cols $[99h~type gt;value;(::)]gt; + map:([] c:cs; cT:type each value ?[t;();();cs!cs]); + map:update s:count[map]#enlist s,sT:type s,r:count[map]#enlist r,rT:type r from map; + map:select from map where (cT=0) or neg[sT]=cT; + map:update sOp:?[(sT>=0) or cT=0;count[map]#(~/:);count[map]#(=)] from map; + map:update rI:{[t;c;s;sOp] where sOp[s;t c]}[0!gt]'[c;s;sOp] from map; + map:delete from map where 0=count each rI; + map:update atF:?[(0=cT) or neg[cT]=rT;count[map]#(@[;;:;]);count[map]#({1_ @[(::),x;1+y;:;z]})] from map; + map:update r:(count each rI)#'enlist each r from map; + ![t;();0b;map[`c]!exec {[atF;c;rI;r](atF[;rI;r];c)}'[atF;c;rI;r] from map] + } + ''', self, to_replace, value) # noqa: E501 diff --git a/src/pykx/pykx.q b/src/pykx/pykx.q index 2edd4bb..45593a5 100644 --- a/src/pykx/pykx.q +++ b/src/pykx/pykx.q @@ -8,6 +8,17 @@ // @desc Process context prior to PyKX initialization .pykx.util.prevCtx:system"d"; +@[ + {if[not"{.pykx.pyexec x}"~string get x; + -1"Warning: Detected invalid '.p.e' function definition expected for PyKX.\n", + "Have you loaded another Python integration first?\n\n", + "Please consider full installation of PyKX under q following instructions at:\n", + "https://code.kx.com/pykx/pykx-under-q/intro.html#installation.\n" + ] + }; + `.p.e; + {::}] + \d .pykx // @private @@ -29,6 +40,18 @@ util.os:first string .z.o; // @type {dict} util.startup:.Q.opt .z.x + +// @private +// @overview +// @desc Load a file at an associated folder location, this is used +// to allow loading of files at folder locations containing spaces +util.loadfile:{[folder;file] + cache:system"cd"; + res:.[{system"cd ",x;res:system"l ",y;(0b;res)};(folder;file);{(1b;x)}]; + if[folder~system"cd";system"cd ",cache]; + $[res[0];'res[1];res[1]] + } + // @private // @desc Retrieval of PyKX initialization directory on first initialization if[not "true"~lower getenv`PYKX_LOADED_UNDER_Q; @@ -72,7 +95,10 @@ if["true"~getenv`PYKX_UNDER_PYTHON; if[not "true"~lower getenv`PYKX_LOADED_UNDER_Q; util.pyEnvInfo:("None"; "None"; ""); if[0=count getenv`PYKX_Q_LOADED_MARKER; - @[system"l ",;"pykx_init.q_";{system"l ",pykxDir,"/pykx_init.q_"}]; + @[system"l ",; + "pykx_init.q_"; + {[x;y] util.loadfile[x;"pykx_init.q_"]}[pykxDir] + ] ]; ]; @@ -1632,7 +1658,7 @@ console:{pyexec"from code import InteractiveConsole\n__pykx_console__ = Interact // @desc // Set the execution function used when loading files with the extension `*.p` // or when using the following syntax `p)` within a q session -.p.e:{.pykx.pyexec x} +.p.e:{.pykx.pyexec x} // If changing this line please ensure you have updated the check used at the beginning of this file to warn users about PyKX being loaded with other Python libraries // @private // @desc @@ -1708,8 +1734,8 @@ listExtensions:{-2 _/:lst where like[;"*.q"]lst:string key hsym`$pykxDir,"/exten loadExtension:{[ext] if[not 10h=type ext;'"Extension provided must be of type string"]; if[not ext in listExtensions[];'"Extension provided '",ext,"' not available"]; - @[system"l ",; - pykxDir,"/extensions/",ext,".q"; + .[util.loadfile; + (pykxDir,"/extensions/";ext,".q"); {'x," raised when attempting to load extension"} ]; } diff --git a/src/pykx/pykx_init.q_ b/src/pykx/pykx_init.q_ index 7db2989..c9f992f 100644 Binary files a/src/pykx/pykx_init.q_ and b/src/pykx/pykx_init.q_ differ diff --git a/src/pykx/query.py b/src/pykx/query.py index ede1b32..661fa6c 100644 --- a/src/pykx/query.py +++ b/src/pykx/query.py @@ -22,9 +22,6 @@ def __dir__(): return __all__ -consolidate = '{$[0>type x;x;(0h>v 0)&1~count v:distinct type each x;raze x;x]}' - - class QSQL: """Generates and submits functional q SQL queries. @@ -399,7 +396,7 @@ def _seud(self, table, query_type, columns=None, where=None, by=None, modify=Fal query_char = '!' if query_type in ('delete', 'update') else '?' try: res = self._q( - f'{{{query_char}[{table_code};x;y;z]}}', + f'{{{query_char}[{table_code};value x;value y;value z]}}', where_clause, by_clause, select_clause, @@ -422,9 +419,9 @@ def _generate_clause(self, clause_value, clause_name, query_type): if clause_value is None: if clause_name in ('columns', 'where'): b = query_type == 'delete' and clause_name == 'columns' - return self._q._call('`symbol$()', wait=True) if b else [] + return [b'{`symbol$()}', None] if b else [b'{x}', []] elif clause_name == 'by': - return [] if query_type == 'exec' else False + return [b'{x}', []] if query_type == 'exec' else [b'{x}', False] else: if clause_name in ('columns', 'by'): return self._generate_clause_columns_by(clause_value, clause_name, query_type) @@ -439,23 +436,18 @@ def _generate_clause_columns_by(self, clause_value, clause_name, query_type): if isinstance(clause_value, str): if clause_value == '': raise ValueError('q query specifying column cannot be empty') - clause_value = [clause_value] - return self._q._call('raze', - [self._q._call('parse', - k.CharVector(x), - wait=True) for x in clause_value], - wait=True, - ) + clause_value = [k.CharVector(clause_value)] + else: + clause_value = [k.CharVector(x) for x in clause_value] + return [b'{parse each x}', clause_value] elif (query_type in ['select', 'exec']) and (clause_name in ['columns', 'by']): if isinstance(clause_value, list): - return self._q._call('{x!x}', - self._q._call(consolidate, clause_value, wait=True), - wait=True) + return [b'{v!v:{$[0>type x;x;(0h>v 0)&1~count v:distinct type each x;raze x;x]}x}', clause_value] # noqa: E501 elif isinstance(clause_value, str) and query_type == 'select': - return self._q._call('{x!x}enlist@', clause_value, wait=True) - return k.K(clause_value) + return [b'{x!x}enlist@', clause_value] + return [b'{x}', k.K(clause_value)] elif isinstance(clause_value, k.K): - return clause_value + return [b'{x}', clause_value] raise TypeError(f"Unsupported type for '{clause_name}' clause") def _generate_clause_columns_by_dict(self, clause_value): @@ -464,22 +456,21 @@ def _generate_clause_columns_by_dict(self, clause_value): if isinstance(val, str): if val == '': raise ValueError(f'q query specifying column for key {key!r} cannot be empty') - clause_dict[key] = self._q._call('parse', k.CharVector(val), wait=True) + clause_dict[key] = [True, k.CharVector(val)] else: - clause_dict[key] = self._q._call(consolidate, val, wait=True) - return k.K(clause_dict) + clause_dict[key] = [False, val] + return [b'{key[x]!{$[x 0;parse;{$[0>type x;x;(0h>v 0)&1~count v:distinct type each x;raze x;x]}]x 1}each value x}', clause_dict] # noqa: E501 def _generate_clause_where(self, clause_value) -> k.List: if isinstance(clause_value, k.List): - return clause_value # clause value is a parse tree + return [b'{x}', clause_value] if isinstance(clause_value, k.BooleanVector): - return self._q._call('enlist', clause_value, wait=True) + return [b'{enlist x}', clause_value] if isinstance(clause_value, str): - clause_value = [clause_value] - try: - return k.K([self._q._call('parse', k.CharVector(x), wait=True) for x in clause_value]) - except Exception as ex: - raise TypeError("Unsupported type for 'where' clause") from ex + clause_value = [k.CharVector(clause_value)] + else: + clause_value = [k.CharVector(x) for x in clause_value] + return [b'{parse each x}', clause_value] class SQL: diff --git a/src/pykx/read.py b/src/pykx/read.py index 0fd5f25..3196a02 100644 --- a/src/pykx/read.py +++ b/src/pykx/read.py @@ -80,7 +80,8 @@ def csv(self, path: The path to the CSV file. types: Can be a dictionary of columns and their types or a `str`-like object of uppercase characters representing the types. Space is used to drop a column. - If `None`, the types will be guessed using `.csvutil.info`. + If `None`, the types will be guessed using [csvutil.q](https://github.com/KxSystems/kdb/blob/master/utils/csvutil.q). + A breakdown of this process is illustrated in the table below. delimiter: A single character representing the delimiter between values. as_table: `True` if the first line of the CSV file should be treated as column names, in which case a `pykx.Table` is returned. If `False` a `pykx.List` of @@ -92,6 +93,29 @@ def csv(self, See Also: [`q.write.csv`][pykx.write.QWriter.csv] + + CSV Type Guessing Table: + | Type Character | Type | Condition(s) | + |---|---|---| + | * | List |- Any type of width greater than 30.
- Remaining unknown types. | + | B | BooleanAtom |- Matching Byte or Char, maxwidth 1, no decimal points, at least 1 of `[0fFnN]` and 1 of `[1tTyY]` in columns.
- Matching Byte or Char, maxwidth 1, no decimal points, all elements in `[01tTfFyYnN]`. | + | G | GUIDAtom |- Matches GUID-like structure.
- Matches structure wrapped in `{ }`. | + | X | ByteAtom |- Maxwidth of 2, comprised of `[0-9]` AND `[abcdefABCDEF]`. | + | H | ShortAtom |- Matches Integer with maxwidth less than 7. | + | I | IntAtom |- Numerical of size between 7 and 15 with exactly 3 decimal points (IP Address).
- Matches Long with maxwidth less than 12. | + | J | LongAtom |- Numerical, no decimal points, all elements `+-` or `0-9`. | + | E | RealAtom |- Matches float with maxwidth less than 9. | + | F | FloatAtom |- Numerical, maxwidth greater than 2, fewer than 2 decimal points, `/` present.
- Numerical, fewer than 2 decimal points, maxwidth greater than 1. | + | C | CharAtom |- Empty columns. Remaining unknown types of size 1. | + | S | SymbolAtom |- Remaining unknown types of maxwidth 2-11 and granularity of less than 10. | + | P | TimestampAtom |- Numerical, maxwidth 11-29, fewer than 4 decimals matching `YYYY[./-]MM[./-]DD` | + | M | MonthAtom |- Matching either numerical, Int, Byte, Real or Float, fewer than 2 decimal points, maxwidth 4-7 | + | D | DateAtom |- Matching Integer, maxwidth 6 or 8.
- Numerical, 0 decimal points, maxwidth 8-10.
- Numerical, 2 decimal points, maxwidth 8-10.
- No decimal points maxwidth 5-9, matching date with 3 letter month code eg.(9nov1989). | + | N | TimespanAtom |- Numerical, maxwidth 15, no decimal points, all values `0-9`.
- Numerical, maxwidth 3-29, 1 decimal point, matching `*[0-9]D[0-9]*`.
- Numerical, maxwidth 3-28, 1 decimal point. | + | U | MinuteAtom |- Matching Byte, maxwidth 4, matching `[012][0-9][0-5][0-9]`.
- Numerical, maxwidth 4 or 5, no decimal points, matching `*[0-9]:[0-5][0-9]`. | + | V | SecondAtom |- Matching Integer, maxwidth 6, matching `[012][0-9][0-5][0-9][0-5][0-9]`.
- Matching Time, maxwidth 7 or 8, no decimal points. | + | T | TimeAtom |- Numerical, maxwidth 9, no decimal points, all values numeric.
- Numerical, maxwidth 7 - 12, fewer than 2 decimal points, matching `[0-9]:[0-5][0-9]:[0-5][0-9]`.
- Matching Real or Float, maxwidth 7-12, 1 decimal point, matching `[0-9][0-5][0-9][0-5][0-9]`. | + Examples: Read a comma seperated CSV file into a `pykx.Table` guessing the datatypes of each @@ -121,7 +145,7 @@ def csv(self, ```python table = q.read.csv('example.csv', {'x1':kx.IntAtom,'x2':kx.GUIDAtom,'x3':kx.TimestampAtom}) ``` - """ + """ # noqa: E501 as_table = 'enlist' if as_table else '' dict_conversion = None if types is None or isinstance(types, dict): diff --git a/src/pykx/reimporter.py b/src/pykx/reimporter.py index e7d710a..629e952 100644 --- a/src/pykx/reimporter.py +++ b/src/pykx/reimporter.py @@ -42,7 +42,7 @@ def __init__(self): 'QHOME', 'PYKX_EXECUTABLE', 'PYKX_DIR') - self.envvals = [str(os.getenv(x)) for x in self.envlist] + self.envvals = [os.getenv(x) for x in self.envlist] def __enter__(self): self.reset() @@ -54,7 +54,10 @@ def reset(self): Note: It is not recommended to use this function directly instead use the `with` syntax. This will automatically manage setting and restoring the environment variables for you. """ - [os.unsetenv(x) for x in self.envlist] + for x, y in zip(self.envlist, self.envvals): + os.unsetenv(x) + if y is not None: + del os.environ[x] os.environ['QHOME'] = original_qhome def restore(self): @@ -64,7 +67,8 @@ def restore(self): This will automatically manage setting and restoring the environment variables for you. """ for x, y in zip(self.envlist, self.envvals): - os.environ[x] = y + if y is not None: + os.environ[x] = y def __exit__(self, exc_type, exc_value, exc_tb): self.restore() diff --git a/src/pykx/streamlit.py b/src/pykx/streamlit.py new file mode 100644 index 0000000..0636bc5 --- /dev/null +++ b/src/pykx/streamlit.py @@ -0,0 +1,252 @@ +import warnings + +from . import beta_features +from .config import _check_beta, pykx_threading, system +from .exceptions import QError +from .ipc import SyncQConnection + +beta_features.append('Streamlit Integration') + + +# This class is required to ensure that in the absence +# of the streamlit dependency PyKX can be imported +class _dummy_class(object): + def __getattr__(self, item): + return self + + def __call__(self, *args, **kwargs): + return self + + +try: + from streamlit.connections import BaseConnection + _streamlit_unavailable = False +except ImportError: + # This base connection object is to ensure the streamlit + # class can be initialized correctly + BaseConnection = {SyncQConnection: _dummy_class} + _streamlit_unavailable = True + + +def _check_streamlit(): + if _streamlit_unavailable: + raise QError('Use of streamlit functionality requires access to ' + 'of streamlit as a dependency, this can be installed ' + ' using:\n\npip install pykx[streamlit]') + + +class PyKXConnection(BaseConnection[SyncQConnection]): + """ + A connection to q/kdb+ processes from streamlit. Initialize using: + + ```python + st.connection("", type = pykx.streamlit.PyKXConnection, *args) + ``` + + PyKX Connection supports the application of queries using Syncronous IPC + connections to q/kdb+ processes or Python processes running PyKX as a + server. + + This is supported through the ``query()`` method, this method allows + users to run `sql`, `qsql` or `q` queries against these processes returning + PyKX data. + + !!! Warning + Streamlit integration is not presently supported for Windows as for + full utilization it requires use of `PYKX_THREADING` functionality + + Parameters: + host: The host name to which a connection is to be established. + port: The port to which a connection is to be established. + username: Username for q connection authorization. + password: Password for q connection authorization. + timeout: Timeout for blocking socket operations in seconds. If set to `0`, the socket + will be non-blocking. + large_messages: Whether support for messages >2GB should be enabled. + tls: Whether TLS should be used. + unix: Whether a Unix domain socket should be used instead of TCP. If set to `True`, the + host parameter is ignored. Does not work on Windows. + wait: Whether the q server should send a response to the query (which this connection + will wait to receive). Can be overridden on a per-call basis. If `True`, Python will + wait for the q server to execute the query, and respond with the results. If + `False`, the q server will respond immediately to every query with generic null + (`::`), then execute them at some point in the future. + + Note: The `username` and `password` parameters are not required. + The `username` and `password` parameters are only required if the q server requires + authorization. Refer to [ssl documentation](https://code.kx.com/q/kb/ssl/) for more + information. + + Note: The `timeout` argument may not always be enforced when making succesive querys. + When making successive queries if one query times out the next query will wait until a + response has been recieved from the previous query before starting the timer for its own + timeout. This can be avioded by using a seperate `SyncQConnection` instance for each + query. + + Examples: + + Connect to a q process at `localhost` on port `5050` as a streamlit connection, + querying using q + + ```python + >>> import streamlit as st + >>> import pykx as kx + >>> conn = st.connection('pykx', type=kx.streamlit.PyKXConnection, + ... host = 'localhost', port = 5050) + >>> df = conn.query('select from tab').pd() + >>> st.dataframe(df) + ``` + """ + _connection = None + _connection_kwargs = {} + + def _connect(self, **kwargs) -> None: + _check_beta('Streamlit Integration') + _check_streamlit() + if system == 'Windows': + raise QError('Streamlit integration currently unsupported for Windows') + if not pykx_threading: + warnings.warn("Streamlit caching requires execution on secondary threads, " + "to utilize this fully please consider setting PYKX_THREADING " + "= 'True'") + self._connection = SyncQConnection(no_ctx=True, **kwargs) + self._connection_kwargs = kwargs + + def reset(self, **kwargs) -> None: + """ + Reset an existing Streamlit Connection object, this can be used to manually + reconnect to a datasource which was disconnected. This will use the connection + details provided at initialisation of the original class. + + Example: + + Reset a connection if deemed to no longer be valid + + ```python + >>> import streamlit as st + >>> import pykx as kx + >>> conn = st.connection('pykx', type=kx.streamlit.PyKXConnection, + ... host = 'localhost', port = 5050) + >>> if not conn.is_healthy(): + ... conn.reset() + >>> + ``` + """ + _check_beta('Streamlit Integration') + _check_streamlit() + if not isinstance(self._connection, SyncQConnection): + raise QError('Unable to reset uninitialized connection') + self._connection.close() + self._connect(**self._connection_kwargs) + + def is_healthy(self) -> bool: + """ + Check if an existing streamlit connection is 'healthy' and + available for query. + + Returns: + A boolean indicating if the connection being used is in a + 'healthy' state + + Example: + + ```python + >>> import streamlit as st + >>> import pykx as kx + >>> conn = st.connection('pykx', type=kx.streamlit.PyKXConnection, + ... host = 'localhost', port = 5050) + >>> conn.is_healthy() + True + ``` + """ + _check_beta('Streamlit Integration') + _check_streamlit() + if not isinstance(self._connection, SyncQConnection): + raise QError('Unable to validate uninitialized connection') + if self._connection.closed: + warnings.warn('Connection closed') + return False + try: + self.query('::') + return True + except BaseException as err: + warnings.warn('Unhealthy connection detected with error: ' + str(err)) + return False + + def query(self, query: str, *args, format='q', **kwargs): + """ + Evaluate a query on the connected q process over IPC. + + Parameters: + query: A q expression to be evaluated. + *args: Arguments to the q query. Each argument will be converted into a `pykx.K` + object. Up to 8 arguments can be provided, as that is the maximum + supported by q. + format: What execution format is to be used, should the function use the `qsql` + interface, execute a `sql` query or run `q` code. + + Raises: + RuntimeError: A closed IPC connection was used. + QError: Query timed out, may be raised if the time taken to make or receive a query + goes over the timeout limit. + TypeError: Too many arguments were provided - q queries cannot have more than 8 + parameters. + ValueError: Attempted to send a Python function over IPC. + + Examples: + + Connect to a q process at `localhost` on port `5050` as a streamlit connection, + querying using q + + ```python + >>> import streamlit as st + >>> import pykx as kx + >>> conn = st.connection('pykx', type=kx.streamlit.PyKXConnection, + ... host = 'localhost', port = 5050) + >>> df = conn.query('select from tab').pd() + >>> st.dataframe(df) + ``` + + Connect to a q process at `localhost` on port `5050` as a streamlit connection, + querying using qsql + + ```python + >>> import streamlit as st + >>> import pykx as kx + >>> conn = st.connection('pykx', type=kx.streamlit.PyKXConnection, + ... host = 'localhost', port = 5050) + >>> df = conn.query('tab', where='x>0.5', format='qsql').pd() + >>> st.dataframe(df) + ``` + + Connect to a q process at `localhost` on port `5050` as a streamlit connection, + querying using sql + + ```python + >>> import streamlit as st + >>> import pykx as kx + >>> conn = st.connection('pykx', type=kx.streamlit.PyKXConnection, + ... host = 'localhost', port = 5050) + >>> df = conn.query('select * from tab where x>0.5', format='sql').pd() + >>> st.dataframe(df) + ``` + """ + _check_beta('Streamlit Integration') + _check_streamlit() + + def _query(query: str, format, args, kwargs): + if format == 'sql': + try: + res = self._connection.sql(query, *args) + except QError as err: + if '.s.sp' in str(err): + raise QError('SQL functionality not loaded on connected server, error: ' + str(err)) # noqa: E501 + raise QError(err) + return res + elif format == 'q': + return self._connection(query, *args, **kwargs) + if format == 'qsql': + return self._connection.qsql.select(query, *args, **kwargs) + else: + raise QError("Unsupported format provided for query, must be one of 'q', 'qsql' or 'sql'") # noqa: E501 + return _query(query, format, args, kwargs) diff --git a/src/pykx/system.py b/src/pykx/system.py index bdef11b..5bfcbfd 100644 --- a/src/pykx/system.py +++ b/src/pykx/system.py @@ -1,7 +1,10 @@ """System command wrappers for PyKX.""" +import os +from pathlib import Path +from warnings import warn from . import Q, wrappers as k -from .exceptions import QError +from .exceptions import PyKXWarning, QError __all__ = ['SystemCommands'] @@ -25,11 +28,24 @@ def __call__(self, x): return self._q('{system x}', k.CharVector(x)) def tables(self, namespace=None): - """Lists the tables in the current namespace or in the provided namespace.""" + """Lists the tables associated with a namespace/dictionary + + Examples: + + Retrieve the tables within a provided namespace: + + ```python + kx.system.tables('.foo') + ``` + + Retrieve the tables within a provided dictionary: + + ```python + kx.system.tables('foo') + ``` + """ if namespace is not None: namespace = str(namespace) - if namespace[0] != '.': - namespace = '.' + namespace return self._q._call(f'\\a {namespace}', wait=True) return self._q._call('\\a', wait=True) @@ -47,13 +63,13 @@ def console_size(self): Get the maximum console_size size of output for EmbeddedQ to 10 columns and 10 rows. - ``` + ```python kx.q.system.console_size ``` Set the maximum console size of output for EmbeddedQ to 10 columns and 10 rows. - ``` + ```python kx.q.system.console_size = [10, 10] ``` """ @@ -81,13 +97,13 @@ def display_size(self): Get the maximum display size of output for EmbeddedQ to 10 columns and 10 rows. - ``` + ```python kx.q.system.display_size ``` Set the maximum display size of output for EmbeddedQ to 10 columns and 10 rows. - ``` + ```python kx.q.system.display_size = [10, 10] ``` """ @@ -108,13 +124,13 @@ def cd(self, directory=None): Get the current working directory. - ``` + ```python kx.q.system.cd() ``` Change the current working directory to the root directory on a `UNIX` like machine. - ``` + ```python kx.q.system.cd('/') ``` """ @@ -129,19 +145,19 @@ def namespace(self, ns=None): Get the current namespace. - ``` + ```python kx.q.system.namespace() ``` Change the current namespace to `.foo`, note the leading `.` may be ommited. - ``` + ```python kx.q.system.namespace('foo') ``` Return to the default namespace. - ``` + ```python kx.q.system.namespace('') ``` """ @@ -159,26 +175,30 @@ def namespace(self, ns=None): def functions(self, ns=None): """Get the functions available in the current namespace or functions in a - provided namespace. + provided namespace or dictionary. Examples: Get the functions within the current namespace. - ``` + ```python kx.q.system.functions() ``` - Get the functions within the `.foo` namespace, note the leading `.` may be ommited. + Get the functions within the `.foo` namespace. + ```python + kx.q.system.functions('.foo') ``` - kx.q.system.functions('foo') + + Get the functions within a dictionary. + + ```python + kx.q.system.function('foo') ``` """ if ns is not None: ns = str(ns) - if ns[0] != '.': - ns = '.' + ns return self._q._call(f'\\f {ns}', wait=True) return self._q._call('\\f', wait=True) @@ -193,13 +213,13 @@ def garbage_collection(self): Get the current garbage collection mode. - ``` + ```python kx.q.system.garbage_collection ``` Set the current garbage collection mode to immediate collection. - ``` + ```python kx.q.system.garbage_collection = 1 ``` """ @@ -212,18 +232,43 @@ def garbage_collection(self, value): return self._q._call(f'\\g {value}', wait=True) raise ValueError('Garbage collection mode can only be set to either 0 or 1.') - def load(self, fd): + def load(self, path): """Loads a q script or a directory of a splayed table. Examples: Load a q script named `foo.q`. - ``` + ```python kx.q.system.load('foo.q') ``` """ - return self._q._call(f'\\l {fd}', wait=True) + if isinstance(path, k.CharAtom) or isinstance(path, k.CharVector): + path = path.py().decode() + elif isinstance(path, k.SymbolAtom): + path = path.py() + if path[0] == ':': + path = path[1:] + elif isinstance(path, Path): + path = str(path) + if ' ' not in path: + if path[-1] == '/': + path = path[:-1] + print(path) + return self._q._call(f'\\l {path}', wait=True) + warn('Detected a space in supplied path\n' + f' Path: \'{path}\'\n' + 'q system loading does not support spaces, attempting load ' + 'using alternative load operation', PyKXWarning) + full_path = os.path.abspath(path) + load_path = Path(full_path) + folder = load_path.parent.as_posix() + file = load_path.name + + if not (load_path.is_dir() or load_path.is_file()): + raise ValueError(f'Provided user path \'{str(load_path)} \'is not a file/directory') + return self._q._call('.pykx.util.loadfile', k.CharVector(folder), + k.CharVector(file), wait=True) @property def utc_offset(self): @@ -235,13 +280,13 @@ def utc_offset(self): Get the current local time offset. - ``` + ```python kx.q.system.utc_offset ``` Set the current local time offset to be -4:00 from UTC. - ``` + ```python kx.q.system.utc_offset = -4 ``` """ @@ -262,13 +307,13 @@ def precision(self): Get the current level of float precision. - ``` + ```python kx.q.system.precision ``` Set the Level of float precision to 2. - ``` + ```python kx.q.system.precision = 2 ``` """ @@ -292,7 +337,7 @@ def rename(self, src, dest): Rename a file `foo.q` to `bar.q`. - ``` + ```python kx.q.system.rename('foo.q', 'bar.q') ``` """ @@ -326,13 +371,13 @@ def num_threads(self): Set the number of threads for embedded q to use to 8. - ``` + ```python kx.q.num_threads = 8 ``` Set the number of threads for a q process being connected to over IPC to 8. - ``` + ```python q = kx.SyncQConnection('localhost', 5001) q.num_threads = 8 ``` @@ -356,13 +401,13 @@ def random_seed(self): Get the current seed value. - ``` + ```python kx.q.system.random_seed ``` Set the random seed value to 23. - ``` + ```python kx.q.system.random_seed = 23 ``` """ @@ -379,13 +424,13 @@ def variables(self, ns=None): Get the variables defined in the current namespace. - ``` + ```python kx.q.system.variables() ``` Get the variables associated with a q namespace/dictionary - ``` + ```python kx.q.system.variables('.foo') kx.q.system.variables('foo') ``` @@ -403,7 +448,7 @@ def workspace(self): Get the memory usage of `EmbeddedQ`. - ``` + ```python kx.q.system.workspace ``` """ @@ -417,14 +462,15 @@ def week_offset(self): Get the current week offset. - ``` + ```python kx.q.system.week_offset ``` Set the current week offset so Monday is the first day of the week. - ``` + ```python kx.q.system.week_offset = 2 + ``` """ return self._q._call('\\W', wait=True) @@ -443,13 +489,13 @@ def date_parsing(self): Get the current value for date parsing. - ``` + ```python kx.q.system.date_parsing ``` Get the current value for date parsing so the format is `dd/mm/yyyy`. - ``` + ```python kx.q.system.date_parsing = 1 ``` """ diff --git a/src/pykx/toq.pyx b/src/pykx/toq.pyx index c436fb2..8db005a 100644 --- a/src/pykx/toq.pyx +++ b/src/pykx/toq.pyx @@ -104,7 +104,7 @@ from . import wrappers as k from ._pyarrow import pyarrow as pa from .cast import * from . import config -from .config import disable_pandas_warning, find_core_lib, k_allocator, licensed, pandas_2, system +from .config import find_core_lib, k_allocator, licensed, pandas_2, system from .constants import INF_INT16, INF_INT32, INF_INT64, NULL_INT16, NULL_INT32, NULL_INT64 from .exceptions import LicenseException, PyArrowUnavailable, PyKXException, QError from .util import df_from_arrays, slice_to_range @@ -1303,20 +1303,28 @@ def from_numpy_ndarray(x: np.ndarray, elif ktype in supported_np_temporal_types: if ktype is k.TimestampVector or ktype is k.TimespanVector: offset = TIMESTAMP_OFFSET if ktype is k.TimestampVector else 0 - if x.dtype == np.dtype('kx, False) +_timedelta_resolution_str_map = { + 'timedelta64[ns]': k.TimespanAtom, + 'timedelta64[ms]': k.TimeAtom, + 'timedelta64[s]': k.SecondAtom, +} + +def from_pandas_timedelta( + x: Any, + ktype: Optional[KType] = None, + *, + cast: bool = False, + handle_nulls: bool = False, +) -> k.K: + x = x.to_numpy() + if ktype is None: + ktype = _timedelta_resolution_str_map[str(x.dtype)] + return from_numpy_timedelta64(x, ktype=ktype, cast=cast, handle_nulls=handle_nulls) + def from_arrow(x: Union['pa.Array', 'pa.Table'], ktype: Optional[KType] = None, @@ -2601,7 +2626,8 @@ _converter_from_python_type = { if not pandas_2: _converter_from_python_type[pd.core.indexes.numeric.Int64Index] = from_pandas_index _converter_from_python_type[pd.core.indexes.numeric.Float64Index] = from_pandas_index - +else: + _converter_from_python_type[pd._libs.tslibs.timedeltas.Timedelta] = from_pandas_timedelta class ToqModule(ModuleType): # TODO: `cast` should be set to False at the next major release (KXI-12945) diff --git a/src/pykx/util.py b/src/pykx/util.py index da99cb7..9234a76 100644 --- a/src/pykx/util.py +++ b/src/pykx/util.py @@ -11,6 +11,7 @@ from .config import qargs, qhome, qlic from ._version import version as __version__ from .exceptions import PyKXException +from .reimporter import PyKXReimport __all__ = [ @@ -256,8 +257,101 @@ def get_default_args(f: Callable) -> Dict[str, Any]: } -def debug_environment(detailed=False, return_info=False): - """Displays information about your environment to help debug issues.""" +def debug_environment(detailed: bool = False, return_info: bool = False) -> Union[str, None]: + """ + Functionality for the retrieval of information about a users environment + + Parameters: + detailed: When returning information about a users license print the content of both + `QHOME` and `QLIC` directories + return_info: Should the information returned from the function be printed to console + (default) or provided as a str + + Returns: + Returns `None` if return information is printed to console otherwise + returns a `str` representation + + Examples: + + ```python + >>> import pykx as kx + >>> kx.util.debug_environment() + **** PyKX information **** + pykx.args: () + pykx.qhome: /usr/local/anaconda3/envs/qenv/q + pykx.qlic: /usr/local/anaconda3/envs/qenv/q + pykx.licensed: True + pykx.__version__: 2.4.3 + pykx.file: /usr/local/anaconda3/lib/python3.8/site-packages/pykx/util.py + + **** Python information **** + sys.version: 3.8.3 (default, Jul 2 2020, 11:26:31) + [Clang 10.0.0 ] + pandas: 2.0.3 + numpy: 1.24.4 + pytz: 2023.3.post1 + which python: /usr/local/bin/python + which python3: /Library/Frameworks/Python.framework/Versions/3.12/bin/python3 + find_libpython: /usr/local/anaconda3/lib/libpython3.8.dylib + + **** Platform information **** + platform.platform: macOS-10.16-x86_64-i386-64bit + + **** PyKX Environment Variables **** + PYKX_IGNORE_QHOME: + PYKX_KEEP_LOCAL_TIMES: + PYKX_ALLOCATOR: + PYKX_GC: + PYKX_LOAD_PYARROW_UNSAFE: + PYKX_MAX_ERROR_LENGTH: + PYKX_NOQCE: + PYKX_Q_LIB_LOCATION: + PYKX_RELEASE_GIL: + PYKX_Q_LOCK: + PYKX_DEFAULT_CONVERSION: + PYKX_SKIP_UNDERQ: + PYKX_UNSET_GLOBALS: + PYKX_DEBUG_INSIGHTS_LIBRARIES: + PYKX_EXECUTABLE: /usr/local/anaconda3/bin/python + PYKX_PYTHON_LIB_PATH: + PYKX_PYTHON_BASE_PATH: + PYKX_PYTHON_HOME_PATH: + PYKX_DIR: /usr/local/anaconda3/lib/python3.8/site-packages/pykx + PYKX_QDEBUG: + PYKX_THREADING: + PYKX_4_1_ENABLED: + + **** PyKX Deprecated Environment Variables **** + SKIP_UNDERQ: + UNSET_PYKX_GLOBALS: + KEEP_LOCAL_TIMES: + IGNORE_QHOME: + UNDER_PYTHON: + PYKX_NO_SIGINT: + + **** q Environment Variables **** + QARGS: + QHOME: /usr/local/anaconda3/lib/python3.8/site-packages/pykx/lib + QLIC: /usr/local/anaconda3/envs/qenv/q + QINIT: + + **** License information **** + pykx.qlic directory: True + pykx.qhome writable: True + pykx.qhome lics: ['k4.lic'] + pykx.qlic lics: ['k4.lic'] + + **** q information **** + which q: /usr/local/anaconda3/envs/qenv/q/q + q info: + (`m64;4f;2020.05.04) + "insights.lib.embedq insights.lib.pykx.. + ``` + + + + + """ debug_info = "" debug_info += pykx_information() debug_info += python_information() @@ -376,10 +470,16 @@ def q_information(): q_info += f"which q: {whichq}\n" if whichq is not None: q_info += ('q info: \n') - if platform.system() == 'Windows': # nocov: - q_info += subprocess.check_output("powershell -NoProfile -ExecutionPolicy ByPass \"echo \\\"-1 .Q.s1 (.z.o;.z.K;.z.k);-1 .Q.s1 .z.l 4;\\\" | q -c 200 200\"", shell=True).decode(encoding='utf-8') # noqa: E501 - else: # nocov: - q_info += subprocess.check_output("echo \"-1 .Q.s1 (.z.o;.z.K;.z.k);-1 .Q.s1 .z.l 4;\" | q -c 200 200", shell=True).decode(encoding='utf-8') # noqa: E501 - except Exception: - pass + if platform.system() == 'Windows': + cmd = "powershell -NoProfile -ExecutionPolicy ByPass \"echo \\\"-1 .Q.s1 (.z.o;.z.K;.z.k);-1 .Q.s1 .z.l 4;\\\" | q -c 200 200\"" # noqa: E501 + else: + cmd = "echo \"-1 .Q.s1 (.z.o;.z.K;.z.k);-1 .Q.s1 .z.l 4;\" | q -c 200 200" + with PyKXReimport(): + out = subprocess.run(cmd, shell=True, capture_output=True) + if out.returncode == 0: + q_info += (out.stdout).decode(encoding='utf-8') + else: + q_info += "Failed to gather q information: " + (out.stderr).decode(encoding='utf-8') + except Exception as e: + q_info += f"Failed to gather q information: {e}" return q_info diff --git a/src/pykx/wrappers.py b/src/pykx/wrappers.py index d10df40..af4e55e 100644 --- a/src/pykx/wrappers.py +++ b/src/pykx/wrappers.py @@ -502,7 +502,8 @@ def pd( self, *, raw: bool = False, - has_nulls: Optional[bool] = None + has_nulls: Optional[bool] = None, + as_arrow: Optional[bool] = False, ): return self.np(raw=raw) @@ -639,6 +640,7 @@ def pd(self, *, raw: bool = False, has_nulls: Optional[bool] = None, + as_arrow: Optional[bool] = False, ) -> Union[pd.Timedelta, int]: if raw: return self.np(raw=True) @@ -673,6 +675,7 @@ def pd( *, raw: bool = False, has_nulls: Optional[bool] = None, + as_arrow: Optional[bool] = False, ): if raw: return self.np(raw=True) @@ -1476,6 +1479,51 @@ def __getitem__(self, key): return q('@', self, _idx_to_k(key, _wrappers.k_n(self))) +if pandas_2 and pa is not None: + _as_arrow_map = { + 'List': 'object', + 'BooleanVector': 'bool[pyarrow]', + 'GUIDVector': 'object', + 'ByteVector': 'uint8[pyarrow]', + 'ShortVector': 'int16[pyarrow]', + 'IntVector': 'int32[pyarrow]', + 'LongVector': 'int64[pyarrow]', + 'RealVector': 'float[pyarrow]', + 'FloatVector': 'double[pyarrow]', + 'CharVector': pd.ArrowDtype(pa.binary(1)), + 'SymbolVector': 'string[pyarrow]', + 'TimestampVector': 'timestamp[ns][pyarrow]', + 'MonthVector': 'timestamp[s][pyarrow]', + 'DateVector': 'timestamp[s][pyarrow]', + 'TimespanVector': 'duration[ns][pyarrow]', + 'MinuteVector': 'duration[s][pyarrow]', + 'SecondVector': 'duration[s][pyarrow]', + 'TimeVector': 'duration[ms][pyarrow]' + } + + _as_arrow_raw_map = { + 'List': 'object', + 'BooleanVector': 'bool[pyarrow]', + 'GUIDVector': 'object', + 'ByteVector': 'uint8[pyarrow]', + 'ShortVector': 'int16[pyarrow]', + 'IntVector': 'int32[pyarrow]', + 'LongVector': 'int64[pyarrow]', + 'RealVector': 'float[pyarrow]', + 'FloatVector': 'double[pyarrow]', + 'CharVector': pd.ArrowDtype(pa.binary(1)), + 'SymbolVector': pd.ArrowDtype(pa.binary()), + 'TimestampVector': 'int64[pyarrow]', + 'DatetimeVector': 'double[pyarrow]', + 'MonthVector': 'int32[pyarrow]', + 'DateVector': 'int32[pyarrow]', + 'TimespanVector': 'int64[pyarrow]', + 'MinuteVector': 'int32[pyarrow]', + 'SecondVector': 'int32[pyarrow]', + 'TimeVector': 'int32[pyarrow]', + } + + class Vector(Collection, abc.Sequence): """Base type for all q vectors, which are ordered collections of a particular type.""" @property @@ -1490,7 +1538,7 @@ def has_infs(self) -> bool: type_char = ' bg xhijefcspmdznuvts'[self.t] except IndexError: return False - return q(f'{{any any -0W 0W{type_char}=\\:x}}')(self).py() + return q(f'{{any -0W 0W{type_char}=\\:x}}')(self).py() def __len__(self): return _wrappers.k_n(self) @@ -1535,8 +1583,19 @@ def pd( *, raw: bool = False, has_nulls: Optional[bool] = None, + as_arrow: Optional[bool] = False, ): res = pd.Series(self.np(raw=raw, has_nulls=has_nulls), copy=False) + if as_arrow: + if not pandas_2: + raise RuntimeError('Pandas Version must be at least 2.0 to use as_arrow=True') + if pa is None: + raise PyArrowUnavailable # nocov + if raw: + if type(self).__name__ != 'GUIDVector': + res = res.astype(_as_arrow_raw_map[type(self).__name__]) + else: + res = res.astype(_as_arrow_map[type(self).__name__]) return res def pa(self, *, raw: bool = False, has_nulls: Optional[bool] = None): @@ -1959,7 +2018,7 @@ def py(self, *, raw: bool = False, has_nulls: Optional[bool] = None, stdlib: boo def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None): """Provides a Numpy representation of the list.""" - return _wrappers.list_np(self, raw, has_nulls) + return _wrappers.list_np(self, False, has_nulls) class NumericVector(Vector): @@ -2006,11 +2065,24 @@ def pd( *, raw: bool = False, has_nulls: Optional[bool] = None, + as_arrow: Optional[bool] = False, ): + if as_arrow: + if not pandas_2: + raise RuntimeError('Pandas Version must be at least 2.0 to use as_arrow=True') + if pa is None: + raise PyArrowUnavailable # nocov arr = self.np(raw=raw, has_nulls=has_nulls) - if isinstance(arr, np.ma.MaskedArray): - arr = pd.arrays.IntegerArray(arr, mask=arr.mask, copy=False) - res = pd.Series(arr, copy=False) + if as_arrow: + arr = pa.array(arr) + if raw: + res = pd.Series(arr, copy=False, dtype=_as_arrow_raw_map[type(self).__name__]) + else: + res = pd.Series(arr, copy=False, dtype=_as_arrow_map[type(self).__name__]) + else: + if isinstance(arr, np.ma.MaskedArray): + arr = pd.arrays.IntegerArray(arr, mask=arr.mask, copy=False) + res = pd.Series(arr, copy=False) return res @@ -2172,6 +2244,7 @@ def pd( *, raw: bool = False, has_nulls: Optional[bool] = None, + as_arrow: Optional[bool] = False, ): if raw: return PandasUUIDArray(self.np(raw=raw)) @@ -2596,9 +2669,17 @@ def pd( *, raw: bool = False, has_nulls: Optional[bool] = None, + as_arrow: Optional[bool] = False, ): if raw: - return super(self).pd(raw=raw, has_nulls=has_nulls) + res = super().pd(raw=raw, has_nulls=has_nulls) + if as_arrow: + if not pandas_2: + raise RuntimeError('Pandas Version must be at least 2.0 to use as_arrow=True') + if pa is None: + raise PyArrowUnavailable # nocov + res = res.astype('int64[pyarrow]') + return res res = pd.Series(self.np(raw=raw, has_nulls=has_nulls), dtype='category') return res @@ -2621,8 +2702,9 @@ def pd( *, raw: bool = False, has_nulls: Optional[bool] = None, + as_arrow: Optional[bool] = False, ): - res = self._as_list().pd(raw=raw, has_nulls=has_nulls) + res = self._as_list().pd(raw=raw, has_nulls=has_nulls, as_arrow=as_arrow) return res def pa(self, *, raw: bool = False, has_nulls: Optional[bool] = None): @@ -2786,7 +2868,10 @@ def pd( raw: bool = False, has_nulls: Optional[bool] = None, raw_guids=False, + as_arrow: Optional[bool] = False, ): + if raw_guids: + warnings.warn("Keyword 'raw_guids' is deprecated", DeprecationWarning) if raw_guids and not raw: v = [x.np(raw=isinstance(x, GUIDVector), has_nulls=has_nulls) for x in self._values] v = [PandasUUIDArray(x) if x.dtype == complex else x for x in v] @@ -2805,8 +2890,19 @@ def pd( for i, v in enumerate(self._values): if not raw and isinstance(v, EnumVector): df = df.astype({self._keys.py()[i]: 'category'}) - _pykx_base_types[self._keys.py()[i]] = str(type(v)).split('.')[-1] + _pykx_base_types[self._keys.py()[i]] = str(type(v).__name__) df.attrs['_PyKX_base_types'] = _pykx_base_types + if as_arrow: + if not pandas_2: + raise RuntimeError('Pandas Version must be at least 2.0 to use as_arrow=True') + if pa is None: + raise PyArrowUnavailable # nocov + if raw: + t_dict = dict(filter(lambda i: i[1] != 'GUIDVector', _pykx_base_types.items())) + df = df.astype(dict([(k, _as_arrow_raw_map[v]) + for k, v in t_dict.items()])) + else: + df = df.astype(dict([(k, _as_arrow_map[v]) for k, v in _pykx_base_types.items()])) return df def pa(self, *, raw: bool = False, has_nulls: Optional[bool] = None): @@ -2988,6 +3084,116 @@ def grouped(self, cols: Union[List, str] = ''): else: raise e + def xbar(self, values): + """ + Apply `xbar` round down operations on the column(s) of a table to a specified + value + + Parameters: + values: Provide a dictionary mapping the column to apply rounding to with + the rounding value as follows `{column: value}`. + + Returns: + A table with rounding applied to the specified columns. + + Example: + + ```python + >>> import pykx as kx + >>> N = 5 + >>> kx.random.seed(42) + >>> tab = kx.Table(data = { + ... 'x': kx.random.random(N, 100.0), + ... 'y': kx.random.random(N, 10.0)}) + >>> tab + pykx.Table(pykx.q(' + x y + ----------------- + 77.42128 8.200469 + 70.49724 9.857311 + 52.12126 4.629496 + 99.96985 8.518719 + 1.196618 9.572477 + ')) + >>> tab.xbar({'x': 10}) + pykx.Table(pykx.q(' + x y + ----------- + 70 8.200469 + 70 9.857311 + 50 4.629496 + 90 8.518719 + 0 9.572477 + ')) + >>> tab.xbar({'x': 10, 'y': 2}) + pykx.Table(pykx.q(' + x y + ---- + 70 8 + 70 8 + 50 4 + 90 8 + 0 8 + ')) + ``` + """ + return q("{if[11h<>type key y;" + " '\"Column(s) supplied must convert to type pykx.SymbolAtom\"];" + " ![x;();0b;key[y]!{(xbar;x;y)}'[value y;key y]]}", self, values) + + def window_join(self, table, windows, cols, aggs): + """ + Window joins provide the ability to analyse the behaviour of data + in one table in the neighborhood of another. + + Parameters: + table: A `pykx.Table` or Python table equivalent containing a `['sym' and 'time']` + column (or equivalent) with a `parted` attribute on `'sym'`. + windows: A pair of lists containing times/timestamps denoting the beginning and + end of the windows + cols: The names of the common columns `['sym' and 'time']` within each table + aggs: A dictionary mapping the name of a new derived column to a list + specifying the function to be applied as the first element and the columns + which should be passed from the `table` to this function. These are mapped + {'new_col0': [f0, 'c0'], 'new_col1': [f1, 'c0', 'c1']}. + + Returns: + For each record of the original table, a record with additional columns + denoted by the `new_col0` entries in the `aggs` argument are added which is + the result of applying the function `f0` with the content of column `c0` over + the matching intervals in the `table`. + + Example: + + ```python + >>> trades = kx.Table(data={ + ... 'sym': ['ibm', 'ibm', 'ibm'], + ... 'time': kx.q('10:01:01 10:01:04 10:01:08'), + ... 'price': [100, 101, 105]}) + >>> quotes = kx.Table(data={ + ... 'sym': 'ibm', + ... 'time': kx.q('10:01:01+til 9'), + ... 'ask': [101, 103, 103, 104, 104, 107, 108, 107, 108], + ... 'bid': [98, 99, 102, 103, 103, 104, 106, 106, 107]}) + >>> windows = kx.q('{-2 1+\:x}', trades['time']) + >>> trades.window_join(quotes, + ... windows, + ... ['sym', 'time'], + ... {'ask_max': [lambda x: max(x), 'ask'], + ... 'ask_minus_bid': [lambda x, y: x - y, 'ask', 'bid']}) + pykx.Table(pykx.q(' + sym time price ask_minus_bid ask_max + ---------------------------------------- + ibm 10:01:01 100 3 4 103 + ibm 10:01:04 101 4 1 1 1 104 + ibm 10:01:08 105 3 2 1 1 108 + ')) + ``` + """ + return q("{[t;q;w;c;a]" + "(cols[t], key a) xcol wj[w; c; t;enlist[q],value a]}", + self, table, windows, cols, aggs) + def _repr_html_(self): if not licensed: return self.__repr__() @@ -3417,6 +3623,7 @@ def pd( *, raw: bool = False, has_nulls: Optional[bool] = None, + as_arrow: Optional[bool] = False, ): kk = self._keys._keys vk = self._values._keys @@ -3425,6 +3632,12 @@ def pd( if len(self) == 0: df = pd.DataFrame(columns=kk.py() + vk.py()) df = df.set_index(kk.py()) + if as_arrow: + if not pandas_2: + raise RuntimeError('Pandas Version must be at least 2.0 to use as_arrow=True') + if pa is None: + raise PyArrowUnavailable # nocov + df = df.convert_dtypes(dtype_backend='pyarrow') return df idx = [kvg(i).np(raw=raw, has_nulls=has_nulls).reshape(-1) for i in range(len(kk))] @@ -3439,11 +3652,22 @@ def pd( for i, col in enumerate(kk.py()): if not raw and isinstance(kvg(i), EnumVector): df[col] = df[col].astype('category') - _pykx_base_types[col] = str(type(kvg(i))).split('.')[-1] + _pykx_base_types[col] = str(type(kvg(i)).__name__) for i, col in enumerate(vk.py()): if not raw and isinstance(vvg(i), EnumVector): df[col] = df[col].astype('category') - _pykx_base_types[col] = str(type(vvg(i))).split('.')[-1] + _pykx_base_types[col] = str(type(vvg(i)).__name__) + if as_arrow: + if not pandas_2: + raise RuntimeError('Pandas Version must be at least 2.0 to use as_arrow=True') + if pa is None: + raise PyArrowUnavailable # nocov + if raw: + t_dict = dict(filter(lambda i: i[1] != 'GUIDVector', _pykx_base_types.items())) + df = df.astype(dict([(k, _as_arrow_raw_map[v]) + for k, v in t_dict.items()])) + else: + df = df.astype(dict([(k, _as_arrow_map[v]) for k, v in _pykx_base_types.items()])) df.set_index(kk.py(), inplace=True) df.attrs['_PyKX_base_types'] = _pykx_base_types return df diff --git a/tests/test_ipc.py b/tests/test_ipc.py index 2c4685c..ef563d4 100644 --- a/tests/test_ipc.py +++ b/tests/test_ipc.py @@ -657,6 +657,60 @@ def test_large_IPC(kx, q_port): assert size == len(res) +@pytest.mark.unlicensed +def test_func_parameter(kx, q_port): + # The below tests are formatted as follows + # to allow operation both in licensed and + # unlicensed mode, the initial call retrieves + # the function and the assertion passes the + # tested functions to the server as the query arg + with kx.SyncQConnection(port=q_port) as q: + fn = q('{sum}', None) + assert q(fn, [1, 2, 3]).py() == 6 + + fn = q('{floor}', None) + assert q(fn, 5.2).py() == 5 + + fn = q('{mins}', None) + assert q(fn, [1, 2, 3]).py() == [1, 1, 1] + + fn = q('{cut}', None) + assert q(fn, 2, [1, 2, 3]).py() == [[1, 2], [3]] + + fn = q('{min x}') + assert q(fn, [1, 2, 3]).py() == 1 + + if kx.licensed: + with kx.SecureQConnection(port=q_port) as q: + fn = q('{sum}', None) + assert q(fn, [1, 2, 3]).py() == 6 + + fn = q('{floor}', None) + assert q(fn, 5.2).py() == 5 + + fn = q('{mins}', None) + assert q(fn, [1, 2, 3]).py() == [1, 1, 1] + + fn = q('{cut}', None) + assert q(fn, 2, [1, 2, 3]).py() == [[1, 2], [3]] + + fn = q('{min x}') + assert q(fn, [1, 2, 3]).py() == 1 + + +@pytest.mark.unlicensed +def test_func_errors(kx, q_port): + with kx.SyncQConnection(port=q_port) as q: + with pytest.raises(ValueError) as err: + q(sum, [1, 2, 3]) + assert 'builtin_function_or_method' in str(err) + + with kx.SecureQConnection(port=q_port) as q: + with pytest.raises(ValueError) as err: + q(sum, [1, 2, 3]) + assert 'builtin_function_or_method' in str(err) + + @pytest.mark.unlicensed def test_debug_kwarg(kx, q_port): with kx.SyncQConnection(port=q_port) as q: @@ -712,6 +766,7 @@ def test_debug_kwarg_global(q_port): with kx.SyncQConnection(port=q_port) as q: q('.pykx_test.cache_sbt:.Q.sbt') q('.Q.sbt:{.pykx_test.cache:y;x y}[.Q.sbt]') + assert q('=', b'z', b'z').py() assert q('til 10').py() == list(range(10)) with pytest.raises(kx.QError) as e: q('til "asd"') @@ -857,6 +912,7 @@ def test_SyncQConnection_reconnect(kx): @pytest.mark.unlicensed +@pytest.mark.xfail(reason='Flaky on several platforms') def test_SecureQConnection_reconnect(kx): q_exe_path = subprocess.run(['which', 'q'], stdout=subprocess.PIPE).stdout.decode().strip() proc = subprocess.Popen( diff --git a/tests/test_license.py b/tests/test_license.py index d57d930..7399b33 100644 --- a/tests/test_license.py +++ b/tests/test_license.py @@ -1,7 +1,6 @@ import base64 from io import StringIO import os -import shutil import re # Do not import pykx here - use the `kx` fixture instead! @@ -56,13 +55,27 @@ def test_invalid_lic_continue(tmp_path, monkeypatch): assert str(e) == 'Invalid input provided please try again' +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) +def test_invalid_commercial_input(tmp_path, monkeypatch): + os.environ['QLIC'] = os.environ['QHOME'] = str(tmp_path.absolute()) + inputs = iter(['Y', 'F']) + monkeypatch.setattr('builtins.input', lambda _: next(inputs)) + try: + import pykx as kx # noqa: F401 + except Exception as e: + assert str(e) == 'User provided option was not one of [1/2]' + + @pytest.mark.skipif( os.getenv('PYKX_THREADING') is not None, reason='Not supported with PYKX_THREADING' ) def test_licensed_signup_no_file(tmp_path, monkeypatch): os.environ['QLIC'] = os.environ['QHOME'] = str(tmp_path.absolute()) - inputs = iter(['Y', 'n', '1', '/test/test.blah']) + inputs = iter(['Y', '1', 'n', '1', '/test/test.blah']) monkeypatch.setattr('builtins.input', lambda _: next(inputs)) try: import pykx as kx # noqa: F401 @@ -76,7 +89,7 @@ def test_licensed_signup_no_file(tmp_path, monkeypatch): ) def test_licensed_signup_invalid_b64(tmp_path, monkeypatch): os.environ['QLIC'] = os.environ['QHOME'] = str(tmp_path.absolute()) - inputs = iter(['Y', 'n', '2', 'data:image/png;test']) + inputs = iter(['Y', '1', 'n', '2', 'data:image/png;test']) monkeypatch.setattr('builtins.input', lambda _: next(inputs)) try: import pykx as kx # noqa: F401 @@ -94,7 +107,7 @@ def test_licensed_success_file(monkeypatch): qhome_path = os.environ['QHOME'] os.unsetenv('QLIC') os.unsetenv('QHOME') - inputs = iter(['Y', 'n', '1', qhome_path + '/kc.lic']) + inputs = iter(['Y', '1', 'n', '1', qhome_path + '/kc.lic']) monkeypatch.setattr('builtins.input', lambda _: next(inputs)) import pykx as kx @@ -112,7 +125,7 @@ def test_licensed_success_b64(monkeypatch): os.unsetenv('QHOME') with open(qhome_path + '/kc.lic', 'rb') as f: license_content = base64.encodebytes(f.read()) - inputs = iter(['Y', 'n', '2', str(license_content)]) + inputs = iter(['Y', '1', 'n', '2', str(license_content)]) monkeypatch.setattr('builtins.input', lambda _: next(inputs)) import pykx as kx @@ -195,28 +208,6 @@ def test_check_license_success_b64(kx): assert kx.license.check(license, format='STRING') -@pytest.mark.xfail(reason="Manual testing works correctly, seems to be a persistance issue") -@pytest.mark.skipif('KDB_LICENSE_EXPIRED' not in os.environ, - reason='Test required KDB_LICENSE_EXPIRED environment variable to be set') -def test_exp_license(kx): - exp_lic = os.environ['KDB_LICENSE_EXPIRED'] - lic_folder = '/tmp/license' - os.makedirs(lic_folder, exist_ok=True) - with open(lic_folder + '/k4.lic', 'wb') as binary_file: - binary_file.write(base64.b64decode(exp_lic)) - qhome_loc = os.environ['QHOME'] - os.environ['QLIC'] = os.environ['QHOME'] = lic_folder - pattern = re.compile('Your PyKX license has now.*') - with patch('sys.stdout', new=StringIO()) as test_out: - try: - import pykx # noqa: F401 - except Exception as e: - assert str(e) == "EOF when reading a line" - shutil.rmtree(lic_folder) - os.environ['QLIC'] = os.environ['QHOME'] = qhome_loc - assert pattern.match(test_out.getvalue()) - - def test_check_license_invalid(kx): pattern = re.compile("Supplied license information does not match.*") with patch('sys.stdout', new=StringIO()) as test_out: diff --git a/tests/test_pandas_replace.py b/tests/test_pandas_replace.py new file mode 100644 index 0000000..b2bcc37 --- /dev/null +++ b/tests/test_pandas_replace.py @@ -0,0 +1,25 @@ +# Do not import pykx here - use the `kx` fixture instead! + + +def test_unkeyed_replace(kx, q): + tab = kx.q('([] a:2 2 3; b:4 2 6; c:7 2 9; d:(`a;`b;`c); e:(1;2;`a))') + assert all((tab.replace(2, 10).pd() == tab.pd().replace(2, 10))) + assert all((tab.replace(1000, 1).pd() == tab.pd().replace(1000, 1))) + assert all((tab.replace('a', 100).pd() == tab.pd().replace('a', 100))) + assert all((tab.replace(2, 'a').pd() == tab.pd().replace(2, 'a'))) + assert all((tab.replace(3, "test").pd() == tab.pd().replace(3, "test"))) + + replaced_tab = kx.q('([] a:2 2 3; b:((`a,2);2;6); c:7 2 9; d:(`a;`b;`c); e:(1;2;`a))') + assert all((tab.replace(4, ('a', 2)) == replaced_tab)) + + +def test_keyed_replace(kx, q): + ktab = kx.q('([a:2 2 3]b:4 2 6; c:7 2 9; d:(`a;`b;`c); e:(1;2;`a))') + assert all((ktab.replace(2, 10).pd() == ktab.pd().replace(2, 10))) + assert all((ktab.replace(1000, 1).pd() == ktab.pd().replace(1000, 1))) + assert all((ktab.replace('a', 100).pd() == ktab.pd().replace('a', 100))) + assert all((ktab.replace(2, 'a').pd() == ktab.pd().replace(2, 'a'))) + assert all((ktab.replace(3, "test").pd() == ktab.pd().replace(3, "test"))) + + replaced_ktab = kx.q('([a:2 2 3]b:((`a,2);2;6); c:7 2 9; d:(`a;`b;`c); e:(1;2;`a))') + assert all(ktab.replace(4, ('a', 2)).values() == replaced_ktab.values()) diff --git a/tests/test_pykx.py b/tests/test_pykx.py index 22738b8..271eec1 100644 --- a/tests/test_pykx.py +++ b/tests/test_pykx.py @@ -357,3 +357,9 @@ def test_PYKX_Q_LIB_LOCATION(): import pykx as kx kx.q('\\l PYKX_Q_LIB_LOCATION.q') assert 42 == kx.q('.pytest.a').py() + + +@pytest.mark.unlicensed +def test_subnormals(kx): + import numpy as np + assert '5e-324' == str(np.finfo(np.float64).smallest_subnormal + 0.) diff --git a/tests/test_q.py b/tests/test_q.py index 288aef7..20fa8be 100644 --- a/tests/test_q.py +++ b/tests/test_q.py @@ -244,6 +244,8 @@ def test_debug_global(): assert kx.q('til 10').py() == list(range(10)) cache_sbt = kx.q('.Q.sbt') kx.q('.Q.sbt:{.pykx_test.cache:x}') + + assert kx.q('=', kx.q('"z"'), b'z').py() try: kx.q('til "asd"') except Exception as e: @@ -276,3 +278,14 @@ def test_41(): kx.q('(`a;):(`b;1.2)') assert 'match' in str(err) os.unsetenv('PYKX_4_1_ENABLED') + + +@pytest.mark.isolate +def test_load_spacefile(tmp_path): + test_location = tmp_path/'test directory' + os.makedirs(test_location, exist_ok=True) + with open(test_location/'file.q', 'w') as f: + f.write('.pykx_test.tmp.variable:1b') + import pykx as kx + kx.q('{.pykx.util.loadfile[1_string x;y]}', test_location, b'file.q') + assert kx.q('.pykx_test.tmp.variable') diff --git a/tests/test_streamlit.py b/tests/test_streamlit.py new file mode 100644 index 0000000..b4f9448 --- /dev/null +++ b/tests/test_streamlit.py @@ -0,0 +1,41 @@ +import os +import sys + +# Do not import pykx here - use the `kx` fixture instead! +import pytest + +if not sys.version_info < (3, 8): + import streamlit as st + + +@pytest.mark.skipif(sys.version_info < (3, 8), reason="requires python3.8 or higher") +def test_streamlit(kx, q_port): + conn = st.connection('pykx', type=kx.streamlit.PyKXConnection, + host='localhost', port=q_port) + assert kx.q('~', conn.query('til 5'), [0, 1, 2, 3, 4]) + + conn.query('tab:([]10?1f;10?1f)') + sql_loaded = conn.query('@[{system"l ",x;1b};"s.k_";{0b}]') + if sql_loaded: + assert kx.q('~', conn.query('tab'), conn.query('select * from tab', format='sql')) + assert kx.q('~', conn.query('select from tab where x>0.5'), conn.query('tab', where='x>0.5', format='qsql')) # noqa: E501 + assert conn.is_healthy() + + with pytest.raises(kx.QError) as err: + conn.query('tab', format='unsupported') + assert 'Unsupported format provided for query' in str(err.value) + + +@pytest.mark.isolate +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Threading only works when beta features enabled so this will pass in threading tests' +) +@pytest.mark.skipif(sys.version_info < (3, 8), reason="requires python3.8 or higher") +def test_beta(): + import pykx as kx + + with pytest.raises(kx.QError) as err: + st.connection('pykx', type=kx.streamlit.PyKXConnection, + host='localhost', port=5050) + assert 'Attempting to use a beta feature "Streamlit' in str(err.value) diff --git a/tests/test_system.py b/tests/test_system.py index b9d47cb..620d583 100644 --- a/tests/test_system.py +++ b/tests/test_system.py @@ -76,6 +76,10 @@ def test_system_tables(): kx.q('qtab: ([] til 10; 2 + til 10)') kx.q('r: ([] til 10; 2 + til 10)') assert kx.q.system.tables().py() == ['qtab', 'r'] + kx.q('.foo.tab:([]10?1f;10?1f)') + kx.q('foo.bar:([]10?1f)') + assert kx.q.system.tables('.foo').py() == ['tab'] + assert kx.q.system.tables('foo').py() == ['bar'] @pytest.mark.isolate @@ -95,7 +99,8 @@ def test_system_functions(): kx.q('\\d .foo') kx.q('func: {x + 3}') kx.q('\\d .') - assert all(kx.q.system.functions('foo') == kx.q('enlist `func')) + kx.q('foo.bar: {x+1}') + assert all(kx.q.system.functions('foo') == kx.q('enlist `bar')) assert all(kx.q.system.functions('.foo') == kx.q('enlist `func')) @@ -201,6 +206,51 @@ def test_system_load(): pass +@pytest.mark.isolate +def test_system_space_load(tmp_path): + test_location = tmp_path/'test directory' + os.makedirs(test_location, exist_ok=True) + cache_dir = os.getcwd() + file_location = test_location/'load_file.q' + with open(file_location, 'w') as f: + f.write('.pykx_test.system.variable:1b') + import pykx as kx + kx.q.system.load(file_location) + assert kx.q('.pykx_test.system.variable') + assert cache_dir == os.getcwd() + + test_splay = test_location/'splay/' + kx.q('{x set ([]10?1f;10?1f)}', test_splay) + + def test_load_splay(test_splay): + loaded = kx.q.system.load(test_splay) + assert loaded.py() == 'splay' + assert isinstance(kx.q['splay'], kx.Table) + kx.q('delete splay from `.') + assert cache_dir == os.getcwd() + + test_load_splay(test_splay) # Path + test_load_splay(str(test_splay)) # String + test_load_splay(kx.toq(test_splay)) # Symbol with leading : + test_load_splay(kx.toq(str(test_splay))) # Symbol without leading : + test_load_splay(kx.CharVector(str(test_splay))) # CharVector + test_load_splay(str(test_splay) + '/') # String with trailing / + # Symbol with leading : with trailing / + test_load_splay(kx.q('{`$string[x],"/"}', kx.toq(test_splay))) + # Symbol without leading : with trailing / + test_load_splay(kx.q('{`$string[x],"/"}', kx.toq(str(test_splay)))) + # CharVector with trailing / + test_load_splay(kx.q('{x,"/"}', kx.CharVector(str(test_splay)))) + + file_move_location = test_location/'move_file.q' + with open(file_move_location, 'w') as f: + f.write('.pykx_test.move.variable:1b;system"cd .."') + kx.q.system.load(file_move_location) + assert kx.q('.pykx_test.move.variable') + assert cache_dir != os.getcwd() + os.chdir(cache_dir) + + @pytest.mark.isolate def test_system_namespace(): import pykx as kx @@ -287,7 +337,8 @@ def test_system_functions_ipc(q_port): q('print: {til x}') assert all(q.system.functions() == q('enlist `print')) q('.foo.func: {x + 3}') - assert all(q.system.functions('foo') == q('enlist `func')) + q('foo.bar:{x+2}') + assert all(q.system.functions('foo') == q('enlist `bar')) assert all(q.system.functions('.foo') == q('enlist `func')) diff --git a/tests/test_toq.py b/tests/test_toq.py index 51b6e68..a90b52a 100644 --- a/tests/test_toq.py +++ b/tests/test_toq.py @@ -384,24 +384,94 @@ def test_from_datetime64(kx): @pytest.mark.unlicensed @pytest.mark.nep49 -def test_from_datetime64_smsus(kx): - d = np.array(['2020-09-08T07:06:05.000004'], dtype='datetime64[us]') +def test_from_datetime64_smsusns(kx): + d = np.array(['2020-09-08T07:06:05.000004', '2020-09-08T07:06:05.000004'], + dtype='datetime64[ns]') + dn = np.array(['', ''], dtype='datetime64[ns]') + dnm = np.array(['', '2020-09-08T07:06:05.000004'], dtype='datetime64[ns]') + df = pd.DataFrame(data={'d': d, 'dn': dn, 'dnm': dnm}) kd = kx.K(d) - assert isinstance(kd, kx.TimestampVector) + kd_hn = kx.K(d, handle_nulls=True) + kdn = kx.K(dn) + kdn_hn = kx.K(dn, handle_nulls=True) + assert all([isinstance(x, kx.TimestampVector) for x in [kd, kd_hn, kdn, kdn_hn]]) assert (kd.np() == d.astype(np.dtype('datetime64[ns]'))).all() - - d = np.array(['2020-09-08T07:06:05.004'], dtype='datetime64[ms]') + if kx.licensed: + assert (kx.toq(df) == kx.toq(kx.toq(df).pd())).all().all() + assert (kx.toq(df, handle_nulls=True) + == kx.toq(kx.toq(df, handle_nulls=True).pd(), handle_nulls=True)).all().all() + if kx.config.pandas_2: + assert (kx.toq(df) == kx.toq(kx.toq(df).pd(as_arrow=True))).all().all() + assert (kx.toq(df, handle_nulls=True) + == kx.toq(kx.toq(df, handle_nulls=True).pd(as_arrow=True), + handle_nulls=True)).all().all() + + d = np.array(['2020-09-08T07:06:05.000004', '2020-09-08T07:06:05.000004'], + dtype='datetime64[us]') + dn = np.array(['', ''], dtype='datetime64[us]') + dnm = np.array(['', '2020-09-08T07:06:05.000004'], dtype='datetime64[us]') + df = pd.DataFrame(data={'d': d, 'dn': dn, 'dnm': dnm}) kd = kx.K(d) - assert isinstance(kd, kx.TimestampVector) + kd_hn = kx.K(d, handle_nulls=True) + kdn = kx.K(dn) + kdn_hn = kx.K(dn, handle_nulls=True) + assert all([isinstance(x, kx.TimestampVector) for x in [kd, kd_hn, kdn, kdn_hn]]) assert (kd.np() == d.astype(np.dtype('datetime64[ns]'))).all() + if kx.licensed: + assert (kx.toq(df) == kx.toq(kx.toq(df).pd())).all().all() + assert (kx.toq(df, handle_nulls=True) + == kx.toq(kx.toq(df, handle_nulls=True).pd(), handle_nulls=True)).all().all() + if kx.config.pandas_2: + assert (kx.toq(df) == kx.toq(kx.toq(df).pd(as_arrow=True))).all().all() + assert (kx.toq(df, handle_nulls=True) + == kx.toq(kx.toq(df, handle_nulls=True).pd(as_arrow=True), + handle_nulls=True)).all().all() + + d = np.array(['2020-09-08T07:06:05.000004', '2020-09-08T07:06:05.000004'], + dtype='datetime64[ms]') + dn = np.array(['', ''], dtype='datetime64[ms]') + dnm = np.array(['', '2020-09-08T07:06:05.000004'], dtype='datetime64[ms]') + df = pd.DataFrame(data={'d': d, 'dn': dn, 'dnm': dnm}) - d = np.array(['2020-09-08T07:06:05'], dtype='datetime64[s]') + kd = kx.K(d) + kd_hn = kx.K(d, handle_nulls=True) + kdn = kx.K(dn) + kdn_hn = kx.K(dn, handle_nulls=True) + assert all([isinstance(x, kx.TimestampVector) for x in [kd, kd_hn, kdn, kdn_hn]]) + assert (kd.np() == d.astype(np.dtype('datetime64[ns]'))).all() + if kx.licensed: + assert (kx.toq(df) == kx.toq(kx.toq(df).pd())).all().all() + assert (kx.toq(df, handle_nulls=True) + == kx.toq(kx.toq(df, handle_nulls=True).pd(), handle_nulls=True)).all().all() + if kx.config.pandas_2: + assert (kx.toq(df) == kx.toq(kx.toq(df).pd(as_arrow=True))).all().all() + assert (kx.toq(df, handle_nulls=True) + == kx.toq(kx.toq(df, handle_nulls=True).pd(as_arrow=True), + handle_nulls=True)).all().all() + + d = np.array(['2020-09-08T07:06:05.000004', '2020-09-08T07:06:05.000004'], + dtype='datetime64[s]') + dn = np.array(['', ''], dtype='datetime64[s]') + dnm = np.array(['', '2020-09-08T07:06:05.000004'], dtype='datetime64[s]') + df = pd.DataFrame(data={'d': d, 'dn': dn, 'dnm': dnm}) kd = kx.K(d) - assert isinstance(kd, kx.TimestampVector) + kd_hn = kx.K(d, handle_nulls=True) + kdn = kx.K(dn) + kdn_hn = kx.K(dn, handle_nulls=True) + assert all([isinstance(x, kx.TimestampVector) for x in [kd, kd_hn, kdn, kdn_hn]]) assert (kd.np() == d.astype(np.dtype('datetime64[ns]'))).all() + if kx.licensed: + assert (kx.toq(df) == kx.toq(kx.toq(df).pd())).all().all() + assert (kx.toq(df, handle_nulls=True) + == kx.toq(kx.toq(df, handle_nulls=True).pd(), handle_nulls=True)).all().all() + if kx.config.pandas_2: + assert (kx.toq(df) == kx.toq(kx.toq(df).pd(as_arrow=True))).all().all() + assert (kx.toq(df, handle_nulls=True) + == kx.toq(kx.toq(df, handle_nulls=True).pd(as_arrow=True), + handle_nulls=True)).all().all() @pytest.mark.unlicensed @@ -926,19 +996,6 @@ def test_from_pandas_dataframe_licensed(q, kx): assert time_tab.equals(kx.K(time_tab).pd()) -@pytest.mark.nep49 -def test_from_pandas_dataframe_licensed_warning(q, kx): - if pd.__version__.split('.')[0] == '2': - q('N:100') - gen_q_datatypes_table(q, 'dset_1D', int(q('N'))) - q('gen_names:{"dset_",/:x,/:string til count y}') - type_tab = q('flip (`$gen_names["tab";dset_1D])!N#\'dset_1D') - df = type_tab.pd() - del df.attrs['_PyKX_base_types'] - with pytest.warns(RuntimeWarning): - kx.K(df) - - @pytest.mark.unlicensed @pytest.mark.nep49 def test_from_complex_pandas_dataframe(kx, pd): diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py index 0587c04..040209f 100644 --- a/tests/test_wrappers.py +++ b/tests/test_wrappers.py @@ -1525,7 +1525,6 @@ def test_py(self, q, kx): @pytest.mark.nep49 def test_np(self, q, kx): qv = q(self.v) - assert qv.np(raw=True).dtype == np.uintp assert qv.np().dtype == object assert qv.np()[1] == UUID(int=1) assert isinstance(qv.np()[-1], float) @@ -1565,7 +1564,6 @@ def test_contains(self, q): def test_empty_vector(self, q): assert q('0h$()').np().dtype == object - assert q('0h$()').np(raw=True).dtype == np.uint64 # NaN is tricky to compare, so we generate GUID vectors until we get one whose complex form has no @@ -2227,6 +2225,15 @@ def test_bool(self, q): with pytest.raises(TypeError): bool(q('0#', q(self.q_vec_str))) + def test_pd(self, q, kx): + assert all(q(self.q_vec_str).pd(raw=True).to_numpy() == [0, 1, 2, 0, 1, 2]) + assert all(q(self.q_vec_str).pd().to_numpy() == ['abc', 'xyz', 'hmm', 'abc', 'xyz', 'hmm']) + + if kx.config.pandas_2: + assert all(q(self.q_vec_str).pd(raw=True, as_arrow=True) == [0, 1, 2, 0, 1, 2]) + assert all( + q(self.q_vec_str).pd(as_arrow=True) == ['abc', 'xyz', 'hmm', 'abc', 'xyz', 'hmm']) + class Test_Anymap: def test_anymap(self, kx, q, tmp_path): @@ -2501,6 +2508,65 @@ def test_table_negative_indexing(self, q): with pytest.raises(IndexError): tab[-6] + def test_xbar(self, kx, q): + tab = q('([]10?100f;10?10f;10?1f)') + assert q('~', + tab.xbar({'x': 10}), + q('{[tab]update 10 xbar x from tab}', tab)) + assert q('~', + tab.xbar({'x': 10, 'x1': 2}), + q('{[tab]update 10 xbar x, 2 xbar x1 from tab}', tab)) + + with pytest.raises(kx.QError) as err: + tab.xbar({10: 10}) + assert 'Column(s) supplied' in str(err) + + @pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' + ) + def test_window_join(self, kx, q): + trades = kx.Table(data={ + 'sym': ['ibm', 'ibm', 'ibm'], + 'time': q('10:01:01 10:01:04 10:01:08'), + 'price': [100, 101, 105]}) + q['trades'] = trades + quotes = kx.Table(data={ + 'sym': 'ibm', + 'time': q('10:01:01+til 9'), + 'ask': [101, 103, 103, 104, 104, 107, 108, 107, 108], + 'bid': [98, 99, 102, 103, 103, 104, 106, 106, 107]}) + q['quotes'] = quotes + windows = q('{-2 1+\\:x}', trades['time']) + columns = ['sym', 'time'] + q['columns'] = columns + q['windows'] = windows + py_join = trades.window_join(quotes, + windows, + columns, + {'ask': [lambda x: max(x), 'ask'], + 'bid': [lambda x: min(x), 'bid']}) + q_join = trades.window_join(quotes, + windows, + columns, + {'ask': [kx.q('max'), 'ask'], + 'bid': [kx.q('min'), 'bid']}) + + only_q = kx.q('wj[windows; columns;trades;(quotes;(max;`ask);(min;`bid))]') + assert q('~', py_join, q_join) + assert q('~', py_join, only_q) + + py_multi_join = trades.window_join(quotes, + windows, + columns, + {'ask_min_bid': [lambda x, y: x - y, 'ask', 'bid']}) + + q_multi_join = trades.window_join(quotes, + windows, + columns, + {'ask_min_bid': [kx.q('{x - y}'), 'ask', 'bid']}) + assert q('~', py_multi_join, q_multi_join) + @pytest.mark.filterwarnings('ignore:Splayed tables are not yet implemented') class Test_SplayedTable: @@ -4101,19 +4167,6 @@ def test_repr_html(kx, q): @pytest.mark.unlicensed -@pytest.mark.xfail(reason="as_arrow functionality currently awaiting introduction", strict=False) -def test_pyarrow_pandas_ci_only(q): - if os.getenv('CI'): - with pytest.raises(NotImplementedError): - q('get`:a set (' - '(1 2;3 4);' - '`time`price`vol!(2022.03.29D16:45:14.880819;1.;100i);' - '([]a:1 2;b:("ab";"cd")))' - ).pd(as_arrow=True) - - -@pytest.mark.unlicensed -@pytest.mark.xfail(reason="as_arrow functionality currently awaiting introduction", strict=False) @pytest.mark.skipif(pd.__version__[0] == '1', reason="Only supported from Pandas 2.* onwards") def test_pyarrow_pandas_all_ipc(kx, q_port): with kx.QConnection(port=q_port) as q: @@ -4129,27 +4182,17 @@ def gen_q_datatypes_table(q, table_name: str, num_rows: int = 100) -> str: gen_q_datatypes_table(q, 'tab', 100) for vec in q('tab'): - assert 'pyarrow' in str(vec.pd(as_arrow=True)) + assert 'pyarrow' in vec.pd(as_arrow=True).dtype.__repr__() q('tab: flip (`a`b`c`d`e`f`g`h`i`j`k`l`m`n)!(tab)') cols = q('cols tab').py() dfa = q('tab').pd(as_arrow=True) for c in cols: - assert 'pyarrow' in str(dfa[c].dtype) + assert 'pyarrow' in dfa[c].dtype.__repr__() q('tab: (til 100)!(tab)') - with pytest.raises(NotImplementedError): - q('10?0Ng').pd(as_arrow=True) - - with pytest.raises(NotImplementedError): - q('0Nm').pd(as_arrow=True) - - with pytest.raises(NotImplementedError): - q('0Nu').pd(as_arrow=True) - @pytest.mark.unlicensed -@pytest.mark.xfail(reason="as_arrow functionality currently awaiting introduction", strict=False) @pytest.mark.skipif(pd.__version__[0] == '1', reason="Only supported from Pandas 2.* onwards") def test_pyarrow_pandas_all(q): def gen_q_datatypes_table(q, table_name: str, num_rows: int = 100) -> str: @@ -4164,30 +4207,96 @@ def gen_q_datatypes_table(q, table_name: str, num_rows: int = 100) -> str: gen_q_datatypes_table(q, 'tab', 100) for vec in q('tab'): - assert 'pyarrow' in str(vec.pd(as_arrow=True)) + assert 'pyarrow' in vec.pd(as_arrow=True).dtype.__repr__() q('tab: flip (`a`b`c`d`e`f`g`h`i`j`k`l`m`n)!(tab)') cols = q('cols tab').py() dfa = q('tab').pd(as_arrow=True) for c in cols: - assert 'pyarrow' in str(dfa[c].dtype) + assert 'pyarrow' in str(dfa[c].dtype.__repr__()) q('tab: (til 100)!(tab)') - with pytest.raises(NotImplementedError): - q('10?0Ng').pd(as_arrow=True) - with pytest.raises(NotImplementedError): - q('`u$v:6#u:`abc`xyz`hmm').pd(as_arrow=True) +@pytest.mark.skipif(pd.__version__[0] == '1', reason="Only supported from Pandas 2.* onwards") +def test_pyarrow_pandas_all_with_null_inf(kx): - with pytest.raises(NotImplementedError): - q('0Nm').pd(as_arrow=True) + def make_t(keycol=False): + t = kx.q('{d:"hijefpmdnuvt";flip (`$/:d)!(d$\\:1 0N),\'value each\'("-0W";"0W"),\\:/:d}', + None) + t = kx.q(''' + {update b:0101b,x:0x00112233,g:{0Ng,3?0Ng}[], + c:"0 24",s:`a``bb`cc,C:("aa";"";enlist "b";"cc") from x} + ''', t) + t = kx.q.xcol({'i': 'ii'}, t) - with pytest.raises(NotImplementedError): - q('0Nu').pd(as_arrow=True) + if keycol: + t = kx.q('{`keycol xkey update keycol:i from x}', t) + return t + + t=make_t() + + def test_pd(t, hn, r): + t_rt = kx.toq(t.pd(raw=r), handle_nulls=hn) + t_rt_as = kx.toq(t.pd(raw=r, as_arrow=True), handle_nulls=hn) + assert kx.q('~', t_rt, t_rt_as) + assert kx.q('~', t_rt.dtypes, t_rt_as.dtypes) + + # KXI-44586 g guids cannot convert + t=t.drop(columns=['g']) + t_rt_a = kx.toq([t[c].pd(raw=r) for c in t.columns.py()], handle_nulls=hn) + t_rt_as_a = kx.toq([t[c].pd(raw=r, as_arrow=True) for c in t.columns.py()], handle_nulls=hn) + + for x, y in zip(t_rt_a, t_rt_as_a): + assert kx.q('~', x, y) + assert type(x) == type(y) + + test_pd(t, hn=False, r=False) + test_pd(t, hn=True, r=False) + + # KXI-44569 C List return is junk + t=t.drop(columns=['C']) + + test_pd(t, hn=False, r=True) + test_pd(t, hn=True, r=True) + + t=make_t() + # Minute overflows Seconds when roundtripping + t=t.drop(columns=['u']) + + # Exclude nulls to test non masked array logic + test_pd(t.iloc[[0, 2, 3]], hn=False, r=False) + test_pd(t.iloc[[0, 2, 3]], hn=True, r=False) + + t=t.drop(columns=['C']) + + test_pd(t.iloc[[0, 2, 3]], hn=False, r=True) + test_pd(t.iloc[[0, 2, 3]], hn=True, r=True) + + t=make_t(keycol=True) + test_pd(t, hn=False, r=False) + test_pd(t, hn=True, r=False) + + # KXI-44569 C List return is junk + t=t.drop(columns=['C']) + + test_pd(t, hn=False, r=True) + test_pd(t, hn=True, r=True) + + t=make_t(keycol=True) + # Minute overflows Seconds when roundtripping + t=t.drop(columns=['u']) + + # Exclude nulls to test non masked array logic + test_pd(t.iloc[[0, 2, 3]], hn=False, r=False) + test_pd(t.iloc[[0, 2, 3]], hn=True, r=False) + + t=t.drop(columns=['C']) + + test_pd(t.iloc[[0, 2, 3]], hn=False, r=True) + test_pd(t.iloc[[0, 2, 3]], hn=True, r=True) @pytest.mark.embedded -@pytest.mark.xfail(reason="as_arrow functionality currently awaiting introduction", strict=False) @pytest.mark.skipif(pd.__version__[0] == '1', reason="Only supported from Pandas 2.* onwards") def test_pyarrow_pandas_table_roundtrip(kx): kx.q('gen_data:{@[;0;string]x#/:prd[x]?/:(`6;`6;0Ng),("bxhijefpdnuvt"$\\:0)}') @@ -4206,11 +4315,185 @@ def test_pyarrow_pandas_table_roundtrip(kx): assert (tab[x]._values == tab2[x]._values).all() -@pytest.mark.embedded -@pytest.mark.skipif(pd.__version__[0] == '1', reason="Only supported from Pandas 2.* onwards") -@pytest.mark.xfail(reason="as_arrow functionality currently awaiting introduction", strict=False) -def test_pyarrow_pandas_timedeltas(kx): - tds = kx.toq(kx.q(''' - ([] a:1D 1D01 1D01:02 1D01:01:01 1D01:01:01.001 1D01:01:01.001001 1D01:01:01.001001001) - ''').pd(as_arrow=True)['a']) - assert ([-17, -17, -17, -18, -19, -16, -16] == kx.q('{type each x}', tds)).all() +@pytest.mark.unlicensed +def test_all_timetypes(kx, q_port): + with kx.QConnection(port=q_port) as q: + # timestamp + td = q(''' + ([] a:2000.01.01D 2000.01.01D01 2000.01.01D01:02 2000.01.01D01:01:01 + 2000.01.01D01:01:01.001 2000.01.01D01:01:01.001001 + 2000.01.01D01:01:01.001001001) + ''') + if kx.config.pandas_2: + df = td.pd(as_arrow=True) + td_roundtrip = kx.toq(df) + assert 'timestamp[ns][pyarrow]' == str(df.dtypes['a']) + if kx.licensed: + assert str(td.dtypes['datatypes'][0]) == str(td_roundtrip.dtypes['datatypes'][0]) + assert all(td == td_roundtrip) + td_a_roundtrip = kx.toq(td['a'].pd(as_arrow=True)) + assert all(td['a'] == td_a_roundtrip) + df = td.pd() + assert 'datetime64[ns]' == str(df.dtypes['a']) + td_roundtrip = kx.toq(df) + if kx.licensed: + assert str(td.dtypes['datatypes'][0]) == str(td_roundtrip.dtypes['datatypes'][0]) + assert all(td == td_roundtrip) + td_a_roundtrip = kx.toq(td['a'].pd()) + assert all(td['a'] == td_a_roundtrip) + + # month + td = q('''([] a:2000.01 2000.12m)''') + if kx.config.pandas_2: + df = td.pd(as_arrow=True) + td_roundtrip = kx.toq(df) + assert 'timestamp[s][pyarrow]' == str(df.dtypes['a']) + if kx.licensed: + assert 'kx.TimestampAtom' == str(td_roundtrip.dtypes['datatypes'][0]) + assert all(td == td_roundtrip) + td_a_roundtrip = kx.toq(td['a'].pd(as_arrow=True)) + assert all(td['a'] == td_a_roundtrip) + df = td.pd() + if kx.config.pandas_2: + assert 'datetime64[s]' == str(df.dtypes['a']) + else: + assert 'datetime64[ns]' == str(df.dtypes['a']) + td_roundtrip = kx.toq(df) + if kx.licensed: + assert 'kx.TimestampAtom' == str(td_roundtrip.dtypes['datatypes'][0]) + assert all(td == td_roundtrip) + td_a_roundtrip = kx.toq(td['a'].pd()) + assert all(td['a'] == td_a_roundtrip) + + # date + td = q('([] a:2000.01.01 2000.01.02)') + if kx.config.pandas_2: + df = td.pd(as_arrow=True) + td_roundtrip = kx.toq(df) + assert 'timestamp[s][pyarrow]' == str(df.dtypes['a']) + if kx.licensed: + assert 'kx.TimestampAtom' == str(td_roundtrip.dtypes['datatypes'][0]) + assert all(td == td_roundtrip) + td_a_roundtrip = kx.toq(td['a'].pd(as_arrow=True)) + assert all(td['a'] == td_a_roundtrip) + df = td.pd() + if kx.config.pandas_2: + assert 'datetime64[s]' == str(df.dtypes['a']) + else: + assert 'datetime64[ns]' == str(df.dtypes['a']) + td_roundtrip = kx.toq(df) + if kx.licensed: + assert 'kx.TimestampAtom' == str(td_roundtrip.dtypes['datatypes'][0]) + assert all(td == td_roundtrip) + td_a_roundtrip = kx.toq(td['a'].pd()) + assert all(td['a'] == td_a_roundtrip) + + # timespan + td = q(''' + ([] a:1D 1D01 1D01:02 1D01:01:01 1D01:01:01.001 1D01:01:01.001001 + 1D01:01:01.001001001) + ''') + if kx.config.pandas_2: + df = td.pd(as_arrow=True) + td_roundtrip = kx.toq(df) + assert 'duration[ns][pyarrow]' == str(df.dtypes['a']) + if kx.licensed: + assert str(td.dtypes['datatypes'][0]) == str(td_roundtrip.dtypes['datatypes'][0]) + assert all(td == td_roundtrip) + td_a_roundtrip = kx.toq(td['a'].pd(as_arrow=True)) + assert all(td['a'] == td_a_roundtrip) + df = td.pd() + assert 'timedelta64[ns]' == str(df.dtypes['a']) + td_roundtrip = kx.toq(df) + if kx.licensed: + assert str(td.dtypes['datatypes'][0]) == str(td_roundtrip.dtypes['datatypes'][0]) + assert all(td == td_roundtrip) + td_a_roundtrip = kx.toq(td['a'].pd()) + assert all(td['a'] == td_a_roundtrip) + + # minute + td = q('([] a:00:00 00:01 00:10 01:00 24:00)') + if kx.config.pandas_2: + df = td.pd(as_arrow=True) + td_roundtrip = kx.toq(df) + assert 'duration[s][pyarrow]' == str(df.dtypes['a']) + if kx.licensed: + assert 'kx.SecondAtom' == str(td_roundtrip.dtypes['datatypes'][0]) + assert all(td == td_roundtrip) + td_a_roundtrip = kx.toq(td['a'].pd(as_arrow=True)) + assert all(td['a'] == td_a_roundtrip) + df = td.pd() + if kx.config.pandas_2: + assert 'timedelta64[s]' == str(df.dtypes['a']) + else: + assert 'timedelta64[ns]' == str(df.dtypes['a']) + td_roundtrip = kx.toq(df) + if kx.licensed: + if kx.config.pandas_2: + assert 'kx.SecondAtom' == str(td_roundtrip.dtypes['datatypes'][0]) + else: + assert 'kx.TimespanAtom' == str(td_roundtrip.dtypes['datatypes'][0]) + assert all(td == td_roundtrip) + td_a_roundtrip = kx.toq(td['a'].pd()) + assert all(td['a'] == td_a_roundtrip) + + # second + td = q('([] a:00:00:00 00:00:01 00:00:10 00:01:00 00:10:00 01:00:00 24:00:00)') + if kx.config.pandas_2: + df = td.pd(as_arrow=True) + td_roundtrip = kx.toq(df) + assert 'duration[s][pyarrow]' == str(df.dtypes['a']) + if kx.licensed: + assert str(td.dtypes['datatypes'][0]) == str(td_roundtrip.dtypes['datatypes'][0]) + assert all(td == td_roundtrip) + td_a_roundtrip = kx.toq(td['a'].pd(as_arrow=True)) + assert all(td['a'] == td_a_roundtrip) + df = td.pd() + if kx.config.pandas_2: + assert 'timedelta64[s]' == str(df.dtypes['a']) + else: + assert 'timedelta64[ns]' == str(df.dtypes['a']) + td_roundtrip = kx.toq(df) + if kx.licensed: + if kx.config.pandas_2: + assert str(td.dtypes['datatypes'][0]) == str(td_roundtrip.dtypes['datatypes'][0]) + else: + assert 'kx.TimespanAtom' == str(td_roundtrip.dtypes['datatypes'][0]) + assert all(td == td_roundtrip) + td_a_roundtrip = kx.toq(td['a'].pd()) + assert all(td['a'] == td_a_roundtrip) + + # time + td = q(''' + ([] a:00:00:00.000 00:00:00.001 00:00:01.000 00:00:10.000 + 00:01:00.000 00:10:00.000 01:00:00.000 24:00:00.000) + ''') + if kx.config.pandas_2: + df = td.pd(as_arrow=True) + td_roundtrip = kx.toq(df) + assert 'duration[ms][pyarrow]' == str(df.dtypes['a']) + if kx.licensed: + assert str(td.dtypes['datatypes'][0]) == str(td_roundtrip.dtypes['datatypes'][0]) + assert all(td == td_roundtrip) + td_a_roundtrip = kx.toq(td['a'].pd(as_arrow=True)) + assert all(td['a'] == td_a_roundtrip) + df = td.pd() + if kx.config.pandas_2: + assert 'timedelta64[ms]' == str(df.dtypes['a']) + else: + assert 'timedelta64[ns]' == str(df.dtypes['a']) + td_roundtrip = kx.toq(df) + if kx.licensed: + if kx.config.pandas_2: + assert str(td.dtypes['datatypes'][0]) == str(td_roundtrip.dtypes['datatypes'][0]) + else: + assert 'kx.TimespanAtom' == str(td_roundtrip.dtypes['datatypes'][0]) + assert all(td == td_roundtrip) + td_a_roundtrip = kx.toq(td['a'].pd()) + assert all(td['a'] == td_a_roundtrip) + + +@pytest.mark.unlicensed +def test_datetime64(kx): + df = pd.DataFrame(data={'a': np.array([9999, 1577899899], dtype='datetime64[s]')}) + all(df['a'] == kx.toq(df).pd()['a'])