From d2d13d983f6c365daeed36930291f167f60a5fef Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Sat, 25 Mar 2023 13:59:59 -0400 Subject: [PATCH] Support TIMESTAMP WITH TIMEZONE and DECIMAL types with DuckDB connection (#283) * Handle duckdb's TIMESTAMP WITH TIME ZONE type * Handle duckdb's DECIMAL type * Bump version to 1.1.1 --- Cargo.lock | 18 +++--- automation/bump_version.py | 1 - javascript/vegafusion-embed/package-lock.json | 2 +- javascript/vegafusion-embed/package.json | 2 +- python/vegafusion-jupyter/package-lock.json | 2 +- python/vegafusion-jupyter/package.json | 2 +- .../vegafusion_jupyter/_frontend.py | 2 +- .../vegafusion_jupyter/_version.py | 2 +- python/vegafusion/setup.cfg | 4 +- python/vegafusion/tests/test_pretransform.py | 63 +++++++++++++++++++ python/vegafusion/vegafusion/_version.py | 2 +- .../vegafusion/connection/duckdb.py | 10 ++- vegafusion-common/Cargo.toml | 2 +- vegafusion-core/Cargo.toml | 4 +- vegafusion-dataframe/Cargo.toml | 4 +- vegafusion-datafusion-udfs/Cargo.toml | 4 +- vegafusion-python-embed/Cargo.toml | 10 +-- vegafusion-runtime/Cargo.toml | 12 ++-- vegafusion-server/Cargo.toml | 10 +-- vegafusion-sql/Cargo.toml | 12 ++-- vegafusion-wasm/Cargo.toml | 6 +- vegafusion-wasm/package-lock.json | 2 +- vegafusion-wasm/package.json | 2 +- 23 files changed, 123 insertions(+), 55 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e2ca90591..f564cfa72 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3893,7 +3893,7 @@ dependencies = [ [[package]] name = "vegafusion-common" -version = "1.1.0" +version = "1.1.1" dependencies = [ "arrow", "chrono", @@ -3907,7 +3907,7 @@ dependencies = [ [[package]] name = "vegafusion-core" -version = "1.1.0" +version = "1.1.1" dependencies = [ "bytes", "chrono", @@ -3936,7 +3936,7 @@ dependencies = [ [[package]] name = "vegafusion-dataframe" -version = "1.1.0" +version = "1.1.1" dependencies = [ "arrow", "async-trait", @@ -3947,7 +3947,7 @@ dependencies = [ [[package]] name = "vegafusion-datafusion-udfs" -version = "1.1.0" +version = "1.1.1" dependencies = [ "chrono", "chrono-tz", @@ -3960,7 +3960,7 @@ dependencies = [ [[package]] name = "vegafusion-python-embed" -version = "1.1.0" +version = "1.1.1" dependencies = [ "arrow", "async-trait", @@ -3981,7 +3981,7 @@ dependencies = [ [[package]] name = "vegafusion-runtime" -version = "1.1.0" +version = "1.1.1" dependencies = [ "async-lock", "async-recursion", @@ -4032,7 +4032,7 @@ dependencies = [ [[package]] name = "vegafusion-server" -version = "1.1.0" +version = "1.1.1" dependencies = [ "assert_cmd", "clap 3.2.22", @@ -4056,7 +4056,7 @@ dependencies = [ [[package]] name = "vegafusion-sql" -version = "1.1.0" +version = "1.1.1" dependencies = [ "arrow", "async-std", @@ -4086,7 +4086,7 @@ dependencies = [ [[package]] name = "vegafusion-wasm" -version = "1.1.0" +version = "1.1.1" dependencies = [ "chrono", "console_error_panic_hook", diff --git a/automation/bump_version.py b/automation/bump_version.py index 823737969..d3bcf3156 100644 --- a/automation/bump_version.py +++ b/automation/bump_version.py @@ -53,7 +53,6 @@ def bump_version(version): package_json_dirs = [ root / "vegafusion-wasm", root / "javascript" / "vegafusion-embed", - root / "javascript" / "vegafusion-chart-editor", root / "python" / "vegafusion-jupyter" ] for package_json_dir in package_json_dirs: diff --git a/javascript/vegafusion-embed/package-lock.json b/javascript/vegafusion-embed/package-lock.json index e9c03e212..0aa780ac1 100644 --- a/javascript/vegafusion-embed/package-lock.json +++ b/javascript/vegafusion-embed/package-lock.json @@ -1,6 +1,6 @@ { "name": "vegafusion-embed", - "version": "1.1.0", + "version": "1.1.1", "lockfileVersion": 2, "requires": true, "packages": { diff --git a/javascript/vegafusion-embed/package.json b/javascript/vegafusion-embed/package.json index 0529b27aa..b376aecaf 100644 --- a/javascript/vegafusion-embed/package.json +++ b/javascript/vegafusion-embed/package.json @@ -1,6 +1,6 @@ { "name": "vegafusion-embed", - "version": "1.1.0", + "version": "1.1.1", "description": "Library to embed vegafusion visualizations", "keywords": [ "vega", diff --git a/python/vegafusion-jupyter/package-lock.json b/python/vegafusion-jupyter/package-lock.json index 217770eef..38f440a46 100644 --- a/python/vegafusion-jupyter/package-lock.json +++ b/python/vegafusion-jupyter/package-lock.json @@ -1,6 +1,6 @@ { "name": "vegafusion-jupyter", - "version": "1.1.0", + "version": "1.1.1", "lockfileVersion": 2, "requires": true, "packages": { diff --git a/python/vegafusion-jupyter/package.json b/python/vegafusion-jupyter/package.json index 0d2a7f8a6..9c228ef38 100644 --- a/python/vegafusion-jupyter/package.json +++ b/python/vegafusion-jupyter/package.json @@ -1,6 +1,6 @@ { "name": "vegafusion-jupyter", - "version": "1.1.0", + "version": "1.1.1", "description": "Altair Jupyter Widget library that relies on VegaFusion for serverside calculations", "keywords": [ "jupyter", diff --git a/python/vegafusion-jupyter/vegafusion_jupyter/_frontend.py b/python/vegafusion-jupyter/vegafusion_jupyter/_frontend.py index 98c2b0be5..4e8c887e6 100644 --- a/python/vegafusion-jupyter/vegafusion_jupyter/_frontend.py +++ b/python/vegafusion-jupyter/vegafusion_jupyter/_frontend.py @@ -2,4 +2,4 @@ Information about the frontend package of the widgets. """ module_name = "vegafusion-jupyter" -module_version = "^1.1.0" +module_version = "^1.1.1" diff --git a/python/vegafusion-jupyter/vegafusion_jupyter/_version.py b/python/vegafusion-jupyter/vegafusion_jupyter/_version.py index 1a72d32e5..b3ddbc41f 100644 --- a/python/vegafusion-jupyter/vegafusion_jupyter/_version.py +++ b/python/vegafusion-jupyter/vegafusion_jupyter/_version.py @@ -1 +1 @@ -__version__ = '1.1.0' +__version__ = '1.1.1' diff --git a/python/vegafusion/setup.cfg b/python/vegafusion/setup.cfg index 8cf58e7c8..9898ae114 100644 --- a/python/vegafusion/setup.cfg +++ b/python/vegafusion/setup.cfg @@ -4,7 +4,7 @@ universal = 0 [metadata] name = vegafusion description = Core tools for using VegaFusion from Python -version = 1.1.0 +version = 1.1.1 long_description = file: README.md long_description_content_type = text/markdown keywords = vega, altair, vegafusion, arrow @@ -34,6 +34,6 @@ install_requires = [options.extras_require] embed = - vegafusion-python-embed==1.1.0 + vegafusion-python-embed==1.1.1 vl-convert-python>=0.7.0 diff --git a/python/vegafusion/tests/test_pretransform.py b/python/vegafusion/tests/test_pretransform.py index 8a3951bde..e0f12a8bc 100644 --- a/python/vegafusion/tests/test_pretransform.py +++ b/python/vegafusion/tests/test_pretransform.py @@ -5,6 +5,7 @@ import json import polars as pl from datetime import date +import decimal def order_items_spec(): @@ -1045,6 +1046,68 @@ def test_pre_transform_dataset_duckdb_conn(): vf.runtime.set_connection("datafusion") +def test_pre_transform_dataset_duckdb_with_decimal_conn(): + import duckdb + + n = 4050 + # Input a polars DataFrame (which follows the DataFrame Interface Protocol) + order_items = pd.DataFrame({ + "menu_item_int": [0] * n + [1] * (2 * n) + [2] * (3 * n) + }) + + try: + # Create duckdb connection and register order_items with duckdb + conn = duckdb.connect() + conn.register("order_items_int", order_items) + conn.query( + "SELECT menu_item_int::DECIMAL(12,2) as menu_item from order_items_int" + ).to_view("order_items") + + # Set this as the active connection + vf.runtime.set_connection(conn) + + # order_items includes a table://order_items data url + vega_spec = order_items_spec() + datasets, warnings = vf.runtime.pre_transform_datasets( + vega_spec, + ["data_0"], + "UTC", + ) + assert len(warnings) == 0 + assert len(datasets) == 1 + + result = datasets[0] + expected = pd.DataFrame({ + "menu_item": [decimal.Decimal(0), decimal.Decimal(1), decimal.Decimal(2)], + "__count": [n, 2 * n, 3 * n] + }) + pd.testing.assert_frame_equal(result, expected) + finally: + vf.runtime.set_connection("datafusion") + + +def test_duckdb_timestamp_with_timezone(): + try: + vf.runtime.set_connection("duckdb") + dates_df = pd.DataFrame({ + "date_col": [date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 3)], + }) + dates_df["date_col"] = pd.to_datetime(dates_df.date_col).dt.tz_localize("UTC") + spec = date_column_spec() + + (output_ds,), warnings = vf.runtime.pre_transform_datasets( + spec, ["data_0"], "America/New_York", default_input_tz="UTC", inline_datasets=dict(dates=dates_df) + ) + + # Timestamps are in the local timezone, so they should be midnight local time + assert list(output_ds.date_col) == [ + pd.Timestamp('2022-01-01 00:00:00', tz='UTC'), + pd.Timestamp('2022-01-02 00:00:00', tz='UTC'), + pd.Timestamp('2022-01-03 00:00:00', tz='UTC') + ] + finally: + vf.runtime.set_connection("datafusion") + def test_gh_268_hang(): """ Tests for hang reported in https://github.com/hex-inc/vegafusion/issues/268 diff --git a/python/vegafusion/vegafusion/_version.py b/python/vegafusion/vegafusion/_version.py index 1a72d32e5..b3ddbc41f 100644 --- a/python/vegafusion/vegafusion/_version.py +++ b/python/vegafusion/vegafusion/_version.py @@ -1 +1 @@ -__version__ = '1.1.0' +__version__ = '1.1.1' diff --git a/python/vegafusion/vegafusion/connection/duckdb.py b/python/vegafusion/vegafusion/connection/duckdb.py index 23a0853dd..65b0883aa 100644 --- a/python/vegafusion/vegafusion/connection/duckdb.py +++ b/python/vegafusion/vegafusion/connection/duckdb.py @@ -1,3 +1,4 @@ +import re import warnings from . import SqlConnection, CsvReadOptions @@ -26,6 +27,11 @@ def duckdb_type_name_to_pyarrow_type(duckdb_type: str) -> pa.DataType: return pa.int32() elif duckdb_type in ("BIGINT", "INT8", "LONG"): return pa.int64() + elif duckdb_type.startswith("DECIMAL"): + matches = re.findall(r"\d+", duckdb_type) + precision = int(matches[0]) + scale = int(matches[1]) + return pa.decimal128(precision, scale) elif duckdb_type == "UTINYINT": return pa.uint8() elif duckdb_type == "USMALLINT": @@ -34,14 +40,14 @@ def duckdb_type_name_to_pyarrow_type(duckdb_type: str) -> pa.DataType: return pa.uint32() elif duckdb_type == "UBIGINT": return pa.uint64() - elif duckdb_type == "DOUBLE": - return pa.float64() elif duckdb_type == "BOOLEAN": return pa.bool_() elif duckdb_type == "DATE": return pa.date32() elif duckdb_type == "TIMESTAMP": return pa.timestamp("ms") + elif duckdb_type == "TIMESTAMP WITH TIME ZONE": + return pa.timestamp("ms", tz="UTC") else: raise ValueError(f"Unexpected DuckDB type: {duckdb_type}") diff --git a/vegafusion-common/Cargo.toml b/vegafusion-common/Cargo.toml index 11dda0781..3faf9a6b5 100644 --- a/vegafusion-common/Cargo.toml +++ b/vegafusion-common/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "vegafusion-common" -version = "1.1.0" +version = "1.1.1" edition = "2021" description = "Common components required by multiple VegaFusion crates" license = "BSD-3-Clause" diff --git a/vegafusion-core/Cargo.toml b/vegafusion-core/Cargo.toml index fd5e8ae53..aa0e0113f 100644 --- a/vegafusion-core/Cargo.toml +++ b/vegafusion-core/Cargo.toml @@ -2,7 +2,7 @@ name = "vegafusion-core" license = "BSD-3-Clause" edition = "2021" -version = "1.1.0" +version = "1.1.1" description = "Core components required by multiple VegaFusion crates, with WASM compatibility" [features] @@ -35,7 +35,7 @@ features = [ "preserve_order",] [dependencies.vegafusion-common] path = "../vegafusion-common" features = [ "json", "sqlparser",] -version = "1.1.0" +version = "1.1.1" [dependencies.datafusion-common] version = "18.0.0" diff --git a/vegafusion-dataframe/Cargo.toml b/vegafusion-dataframe/Cargo.toml index a6d40074e..37e670871 100644 --- a/vegafusion-dataframe/Cargo.toml +++ b/vegafusion-dataframe/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "vegafusion-dataframe" license = "BSD-3-Clause" -version = "1.1.0" +version = "1.1.1" edition = "2021" description = "VegaFusion's DataFrame and Connection traits" @@ -10,7 +10,7 @@ async-trait = "0.1.53" [dependencies.vegafusion-common] path = "../vegafusion-common" -version = "1.1.0" +version = "1.1.1" [dependencies.datafusion-common] version = "18.0.0" diff --git a/vegafusion-datafusion-udfs/Cargo.toml b/vegafusion-datafusion-udfs/Cargo.toml index dd693573f..2b8c8ee91 100644 --- a/vegafusion-datafusion-udfs/Cargo.toml +++ b/vegafusion-datafusion-udfs/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "vegafusion-datafusion-udfs" license = "BSD-3-Clause" -version = "1.1.0" +version = "1.1.1" edition = "2021" description = "Custom DataFusion UDFs used by VegaFusion" @@ -14,7 +14,7 @@ regex = "^1.5.5" [dependencies.vegafusion-common] path = "../vegafusion-common" -version = "1.1.0" +version = "1.1.1" [dependencies.datafusion-physical-expr] version = "18.0.0" diff --git a/vegafusion-python-embed/Cargo.toml b/vegafusion-python-embed/Cargo.toml index 7b9a11098..06abf9ac6 100644 --- a/vegafusion-python-embed/Cargo.toml +++ b/vegafusion-python-embed/Cargo.toml @@ -2,7 +2,7 @@ name = "vegafusion-python-embed" license = "BSD-3-Clause" edition = "2021" -version = "1.1.0" +version = "1.1.1" description = "vegafusion-python-embed PyO3 Python Package" [lib] @@ -36,21 +36,21 @@ features = [ "pyarrow",] [dependencies.vegafusion-common] path = "../vegafusion-common" features = [ "pyo3",] -version = "1.1.0" +version = "1.1.1" [dependencies.vegafusion-core] path = "../vegafusion-core" features = [ "pyarrow",] -version = "1.1.0" +version = "1.1.1" [dependencies.vegafusion-runtime] path = "../vegafusion-runtime" features = [ "pyarrow",] -version = "1.1.0" +version = "1.1.1" [dependencies.vegafusion-sql] path = "../vegafusion-sql" -version = "1.1.0" +version = "1.1.1" features = [ "datafusion-conn",] [dependencies.tokio] diff --git a/vegafusion-runtime/Cargo.toml b/vegafusion-runtime/Cargo.toml index bce669ecb..27a06c88a 100644 --- a/vegafusion-runtime/Cargo.toml +++ b/vegafusion-runtime/Cargo.toml @@ -6,7 +6,7 @@ harness = false name = "vegafusion-runtime" license = "BSD-3-Clause" edition = "2021" -version = "1.1.0" +version = "1.1.1" description = "VegaFusion Runtime" [features] @@ -59,20 +59,20 @@ version = "0.30.0" [dependencies.vegafusion-common] path = "../vegafusion-common" features = [ "json", "sqlparser", "prettyprint",] -version = "1.1.0" +version = "1.1.1" [dependencies.vegafusion-core] path = "../vegafusion-core" features = [ "sqlparser",] -version = "1.1.0" +version = "1.1.1" [dependencies.vegafusion-datafusion-udfs] path = "../vegafusion-datafusion-udfs" -version = "1.1.0" +version = "1.1.1" [dependencies.vegafusion-dataframe] path = "../vegafusion-dataframe" -version = "1.1.0" +version = "1.1.1" [dependencies.serde] version = "1.0.137" @@ -110,5 +110,5 @@ features = [ "async_tokio",] [dev-dependencies.vegafusion-sql] path = "../vegafusion-sql" -version = "1.1.0" +version = "1.1.1" features = [ "datafusion-conn",] diff --git a/vegafusion-server/Cargo.toml b/vegafusion-server/Cargo.toml index ebbbda8fb..f85115b2f 100644 --- a/vegafusion-server/Cargo.toml +++ b/vegafusion-server/Cargo.toml @@ -5,7 +5,7 @@ path = "src/main.rs" [package] name = "vegafusion-server" license = "BSD-3-Clause" -version = "1.1.0" +version = "1.1.1" edition = "2021" description = "VegaFusion Server" @@ -29,20 +29,20 @@ prost-build = "0.11.4" [dependencies.vegafusion-common] path = "../vegafusion-common" -version = "1.1.0" +version = "1.1.1" [dependencies.vegafusion-core] path = "../vegafusion-core" features = [ "tonic_support",] -version = "1.1.0" +version = "1.1.1" [dependencies.vegafusion-runtime] path = "../vegafusion-runtime" -version = "1.1.0" +version = "1.1.1" [dependencies.vegafusion-sql] path = "../vegafusion-sql" -version = "1.1.0" +version = "1.1.1" features = [ "datafusion-conn",] [dependencies.tokio] diff --git a/vegafusion-sql/Cargo.toml b/vegafusion-sql/Cargo.toml index d7ea34ebd..79829732e 100644 --- a/vegafusion-sql/Cargo.toml +++ b/vegafusion-sql/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "vegafusion-sql" license = "BSD-3-Clause" -version = "1.1.0" +version = "1.1.1" edition = "2021" description = "VegaFusion SQL dialect generation and connection implementations" @@ -24,16 +24,16 @@ version = "0.30.0" [dependencies.vegafusion-common] path = "../vegafusion-common" -version = "1.1.0" +version = "1.1.1" features = [ "sqlparser",] [dependencies.vegafusion-dataframe] path = "../vegafusion-dataframe" -version = "1.1.0" +version = "1.1.1" [dependencies.vegafusion-datafusion-udfs] path = "../vegafusion-datafusion-udfs" -version = "1.1.0" +version = "1.1.1" optional = true [dependencies.arrow] @@ -83,11 +83,11 @@ features = [ "preserve_order",] [dev-dependencies.vegafusion-datafusion-udfs] path = "../vegafusion-datafusion-udfs" -version = "1.1.0" +version = "1.1.1" [dev-dependencies.vegafusion-common] path = "../vegafusion-common" -version = "1.1.0" +version = "1.1.1" features = [ "sqlparser", "json", "prettyprint",] [dev-dependencies.tokio] diff --git a/vegafusion-wasm/Cargo.toml b/vegafusion-wasm/Cargo.toml index 4a695e25c..a6b81e2c3 100644 --- a/vegafusion-wasm/Cargo.toml +++ b/vegafusion-wasm/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "vegafusion-wasm" license = "BSD-3-Clause" -version = "1.1.0" +version = "1.1.1" edition = "2021" description = "VegaFusion WASM package for embedding Vega charts in the browser with a connection to a VegaFusion Runtime\n" @@ -26,11 +26,11 @@ wasm-bindgen-test = "0.3.13" [dependencies.vegafusion-common] path = "../vegafusion-common" features = [ "json",] -version = "1.1.0" +version = "1.1.1" [dependencies.vegafusion-core] path = "../vegafusion-core" -version = "1.1.0" +version = "1.1.1" [dependencies.serde] version = "1.0.137" diff --git a/vegafusion-wasm/package-lock.json b/vegafusion-wasm/package-lock.json index 7919b2698..2bcf45ec0 100644 --- a/vegafusion-wasm/package-lock.json +++ b/vegafusion-wasm/package-lock.json @@ -1,6 +1,6 @@ { "name": "vegafusion-wasm", - "version": "1.1.0", + "version": "1.1.1", "lockfileVersion": 2, "requires": true, "packages": { diff --git a/vegafusion-wasm/package.json b/vegafusion-wasm/package.json index d1283030b..e9bf5204a 100644 --- a/vegafusion-wasm/package.json +++ b/vegafusion-wasm/package.json @@ -1,6 +1,6 @@ { "name": "vegafusion-wasm", - "version": "1.1.0", + "version": "1.1.1", "author": { "name": "Jon Mease", "email": "jon@vegafusion.io",