Skip to content

Commit

Permalink
Update DataFusion 28, Arrow to 43, sqlparser to 0.35, and other dep u…
Browse files Browse the repository at this point in the history
…pdates (#388)

* DataFusion 28, Arrow 43, sqlparser 0.35.0

* update tokio

* update Python protobuf to DataFusion 28
  • Loading branch information
jonmmease authored Sep 10, 2023
1 parent 7127d04 commit 2ed4925
Show file tree
Hide file tree
Showing 17 changed files with 489 additions and 408 deletions.
287 changes: 159 additions & 128 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[workspace]

resolver = "2"
members = [
"vegafusion-common",
"vegafusion-core",
Expand Down
26 changes: 25 additions & 1 deletion python/vegafusion/proto/datafusion.proto
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,10 @@ enum ScalarFunction {
ArrayToString = 97;
Cardinality = 98;
TrimArray = 99;
ArrayContains = 100;
Encode = 101;
Decode = 102;
Cot = 103;
}

message ScalarFunctionNode {
Expand Down Expand Up @@ -639,7 +643,8 @@ message WindowExprNode {
oneof window_function {
AggregateFunction aggr_function = 1;
BuiltInWindowFunction built_in_function = 2;
// udaf = 3
string udaf = 3;
string udwf = 9;
}
LogicalExprNode expr = 4;
repeated LogicalExprNode partition_by = 5;
Expand Down Expand Up @@ -736,6 +741,7 @@ message WindowFrameBound {

message Schema {
repeated Field columns = 1;
map<string, string> metadata = 2;
}

message Field {
Expand All @@ -745,6 +751,7 @@ message Field {
bool nullable = 3;
// for complex data types like structs, unions
repeated Field children = 4;
map<string, string> metadata = 5;
}

message FixedSizeBinary{
Expand Down Expand Up @@ -902,6 +909,12 @@ message ScalarValue{
int64 date_64_value = 21;
int32 interval_yearmonth_value = 24;
int64 interval_daytime_value = 25;

int64 duration_second_value = 35;
int64 duration_millisecond_value = 36;
int64 duration_microsecond_value = 37;
int64 duration_nanosecond_value = 38;

ScalarTimestampValue timestamp_value = 26;
ScalarDictionaryValue dictionary_value = 27;
bytes binary_value = 28;
Expand Down Expand Up @@ -1047,6 +1060,7 @@ message PhysicalPlanNode {
UnionExecNode union = 19;
ExplainExecNode explain = 20;
SortPreservingMergeExecNode sort_preserving_merge = 21;
NestedLoopJoinExecNode nested_loop_join = 22;
}
}

Expand Down Expand Up @@ -1308,6 +1322,7 @@ enum AggregateMode {
FINAL = 1;
FINAL_PARTITIONED = 2;
SINGLE = 3;
SINGLE_PARTITIONED = 4;
}

message WindowAggExecNode {
Expand Down Expand Up @@ -1364,6 +1379,15 @@ message SortExecNode {
message SortPreservingMergeExecNode {
PhysicalPlanNode input = 1;
repeated PhysicalExprNode expr = 2;
// Maximum number of highest/lowest rows to fetch; negative means no limit
int64 fetch = 3;
}

message NestedLoopJoinExecNode {
PhysicalPlanNode left = 1;
PhysicalPlanNode right = 2;
JoinType join_type = 3;
JoinFilter filter = 4;
}

message CoalesceBatchesExecNode {
Expand Down
470 changes: 240 additions & 230 deletions python/vegafusion/vegafusion/proto/datafusion_pb2.py

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions vegafusion-common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ version = "0.4.23"
optional = true

[dependencies.sqlparser]
version = "0.34.0"
version = "0.35.0"
optional = true

[dependencies.serde_json]
Expand All @@ -27,18 +27,18 @@ default_features = false
optional = true

[dependencies.arrow]
version = "42.0.0"
version = "43.0.0"
default_features = false
features = [ "ipc",]

[dependencies.datafusion-common]
version = "27.0.0"
version = "28.0.0"

[dependencies.datafusion-expr]
version = "27.0.0"
version = "28.0.0"

[dependencies.datafusion-proto]
version = "27.0.0"
version = "28.0.0"
optional = true

[dependencies.pyo3]
Expand Down
4 changes: 2 additions & 2 deletions vegafusion-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ rand = "0.8.5"
json-patch = "1.0.0"

[dependencies.sqlparser]
version = "0.34.0"
version = "0.35.0"
optional = true

[dependencies.serde_json]
Expand All @@ -39,7 +39,7 @@ features = [ "json", "sqlparser",]
version = "1.4.0"

[dependencies.datafusion-common]
version = "27.0.0"
version = "28.0.0"

[dependencies.pyo3]
version = "0.19"
Expand Down
8 changes: 4 additions & 4 deletions vegafusion-dataframe/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@ edition = "2021"
description = "VegaFusion's DataFrame and Connection traits"

[dependencies]
async-trait = "0.1.53"
async-trait = "0.1.73"

[dependencies.vegafusion-common]
path = "../vegafusion-common"
version = "1.4.0"

[dependencies.datafusion-common]
version = "27.0.0"
version = "28.0.0"

[dependencies.datafusion-expr]
version = "27.0.0"
version = "28.0.0"

[dependencies.arrow]
version = "42.0.0"
version = "43.0.0"
default_features = false
2 changes: 1 addition & 1 deletion vegafusion-datafusion-udfs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ path = "../vegafusion-common"
version = "1.4.0"

[dependencies.datafusion-physical-expr]
version = "27.0.0"
version = "28.0.0"
2 changes: 1 addition & 1 deletion vegafusion-jni/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,5 @@ version = "1.4.0"
features = [ "datafusion-conn",]

[dependencies.tokio]
version = "1.27.0"
version = "1.32.0"
features = [ "macros", "rt-multi-thread",]
8 changes: 4 additions & 4 deletions vegafusion-python-embed/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ protobuf-src = [ "vegafusion-core/protobuf-src",]
log = "0.4.17"
env_logger = "0.10.0"
pythonize = "0.19.0"
async-trait = "0.1.53"
async-trait = "0.1.73"
uuid = "1.3.0"
prost = "0.11.3"

Expand All @@ -31,7 +31,7 @@ features = [ "derive",]
version = "1.0.79"

[dependencies.arrow]
version = "42.0.0"
version = "43.0.0"
features = [ "pyarrow",]

[dependencies.vegafusion-common]
Expand Down Expand Up @@ -59,10 +59,10 @@ path = "../vegafusion-dataframe"
version = "1.4.0"

[dependencies.datafusion-proto]
version = "27.0.0"
version = "28.0.0"

[dependencies.tokio]
version = "1.27.0"
version = "1.32.0"
features = [ "macros", "rt-multi-thread",]

[dependencies.pyo3]
Expand Down
24 changes: 12 additions & 12 deletions vegafusion-runtime/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,27 +22,27 @@ chrono-tz = "0.8.1"
prost = "0.11.3"
prost-types = "0.11.2"
num-traits = "0.2.15"
itertools = "0.10.3"
itertools = "0.11.0"
float-cmp = "0.9.0"
lru = "0.10.0"
lru = "0.11.1"
futures = "0.3.21"
async-trait = "0.1.53"
async-recursion = "1.0.4"
async-lock = "2.5.0"
async-trait = "0.1.73"
async-recursion = "1.0.5"
async-lock = "2.8.0"
tempfile = "3.3.0"
futures-util = "0.3.21"
bytes = "1.1.0"
deterministic-hash = "1.0.1"
log = "0.4.17"
env_logger = "0.10.0"
ordered-float = "3.6.0"
reqwest-retry = "0.2.1"
reqwest-retry = "0.3.0"
reqwest-middleware = "0.2.0"

[dev-dependencies]
futures = "0.3.21"
futures-util = "0.3.21"
rstest = "0.17.0"
rstest = "0.18.2"
test-case = "3.1.0"
base64 = "0.21.0"
dssim = "3.1.0"
Expand Down Expand Up @@ -79,19 +79,19 @@ version = "1.0.137"
features = [ "derive",]

[dependencies.datafusion-common]
version = "27.0.0"
version = "28.0.0"

[dependencies.datafusion-expr]
version = "27.0.0"
version = "28.0.0"

[dependencies.datafusion-physical-expr]
version = "27.0.0"
version = "28.0.0"

[dependencies.datafusion-optimizer]
version = "27.0.0"
version = "28.0.0"

[dependencies.tokio]
version = "1.29.1"
version = "1.32.0"
features = [ "macros", "rt-multi-thread", "fs",]

[dependencies.reqwest]
Expand Down
2 changes: 1 addition & 1 deletion vegafusion-runtime/src/data/tasks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ async fn process_datetimes(
_ => flat_col(field.name()),
};

Ok(if matches!(expr, Expr::Alias(_, _)) {
Ok(if matches!(expr, Expr::Alias(_)) {
expr
} else {
expr.alias(field.name())
Expand Down
2 changes: 1 addition & 1 deletion vegafusion-server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ version = "1.4.0"
features = [ "datafusion-conn",]

[dependencies.tokio]
version = "1.27.0"
version = "1.32.0"
features = [ "rt-multi-thread", "macros",]

[dependencies.tonic-web]
Expand Down
20 changes: 10 additions & 10 deletions vegafusion-sql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,19 @@ description = "VegaFusion SQL dialect generation and connection implementations"
datafusion-conn = [ "datafusion", "tempfile", "reqwest", "reqwest-retry", "reqwest-middleware", "vegafusion-datafusion-udfs",]

[dependencies]
async-trait = "0.1.53"
async-trait = "0.1.73"
deterministic-hash = "1.0.1"
log = "0.4.17"
chrono = "0.4.23"

[dev-dependencies]
rstest = "0.17.0"
rstest_reuse = "0.5.0"
rstest = "0.18.2"
rstest_reuse = "0.6.0"
lazy_static = "^1.4.0"
toml = "0.7.2"

[dependencies.sqlparser]
version = "0.34.0"
version = "0.35.0"

[dependencies.vegafusion-common]
path = "../vegafusion-common"
Expand All @@ -38,18 +38,18 @@ version = "1.4.0"
optional = true

[dependencies.arrow]
version = "42.0.0"
version = "43.0.0"
default_features = false
features = [ "ipc",]

[dependencies.datafusion-common]
version = "27.0.0"
version = "28.0.0"

[dependencies.datafusion-expr]
version = "27.0.0"
version = "28.0.0"

[dependencies.datafusion]
version = "27.0.0"
version = "28.0.0"
optional = true

[dependencies.tempfile]
Expand All @@ -63,7 +63,7 @@ features = [ "rustls-tls",]
optional = true

[dependencies.reqwest-retry]
version = "0.2.1"
version = "0.3.0"
optional = true

[dependencies.reqwest-middleware]
Expand Down Expand Up @@ -92,7 +92,7 @@ version = "1.4.0"
features = [ "sqlparser", "json", "prettyprint",]

[dev-dependencies.tokio]
version = "1.27.0"
version = "1.32.0"
features = [ "macros", "rt-multi-thread",]

[dev-dependencies.serde]
Expand Down
11 changes: 6 additions & 5 deletions vegafusion-sql/src/compile/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ pub trait ToSqlExpr {
impl ToSqlExpr for Expr {
fn to_sql(&self, dialect: &Dialect, schema: &DFSchema) -> Result<SqlExpr> {
match self {
Expr::Alias(_, _) => {
Expr::Alias(_) => {
// Alias expressions need to be handled at a higher level
Err(VegaFusionError::internal(format!(
"Alias cannot be converted to SQL: {self:?}"
Expand Down Expand Up @@ -348,7 +348,6 @@ impl ToSqlExpr for Expr {
BuiltinScalarFunction::ArrayAppend => "array_append",
BuiltinScalarFunction::ArrayConcat => "array_concat",
BuiltinScalarFunction::ArrayDims => "array_dims",
BuiltinScalarFunction::ArrayFill => "array_fill",
BuiltinScalarFunction::ArrayLength => "array_length",
BuiltinScalarFunction::ArrayNdims => "array_ndims",
BuiltinScalarFunction::ArrayPosition => "array_position",
Expand All @@ -358,6 +357,11 @@ impl ToSqlExpr for Expr {
BuiltinScalarFunction::ArrayReplace => "array_replace",
BuiltinScalarFunction::ArrayToString => "array_to_string",
BuiltinScalarFunction::Cardinality => "array_cardinality",
BuiltinScalarFunction::Decode => "decode",
BuiltinScalarFunction::Encode => "encode",
BuiltinScalarFunction::Cot => "cot",
BuiltinScalarFunction::ArrayContains => "array_contains",
BuiltinScalarFunction::ArrayFill => "array_fill",
BuiltinScalarFunction::TrimArray => "trim_array",
};
translate_scalar_function(fun_name, args, dialect, schema)
Expand Down Expand Up @@ -573,9 +577,6 @@ impl ToSqlExpr for Expr {
"GroupingSet cannot be converted to SQL",
)),
Expr::Like { .. } => Err(VegaFusionError::internal("Like cannot be converted to SQL")),
Expr::ILike { .. } => Err(VegaFusionError::internal(
"ILike cannot be converted to SQL",
)),
Expr::SimilarTo { .. } => Err(VegaFusionError::internal(
"SimilarTo cannot be converted to SQL",
)),
Expand Down
Loading

0 comments on commit 2ed4925

Please sign in to comment.