Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update DataFusion 28, Arrow to 43, sqlparser to 0.35, and other dep updates #388

Merged
merged 8 commits into from
Sep 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
287 changes: 159 additions & 128 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[workspace]

resolver = "2"
members = [
"vegafusion-common",
"vegafusion-core",
Expand Down
26 changes: 25 additions & 1 deletion python/vegafusion/proto/datafusion.proto
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,10 @@ enum ScalarFunction {
ArrayToString = 97;
Cardinality = 98;
TrimArray = 99;
ArrayContains = 100;
Encode = 101;
Decode = 102;
Cot = 103;
}

message ScalarFunctionNode {
Expand Down Expand Up @@ -639,7 +643,8 @@ message WindowExprNode {
oneof window_function {
AggregateFunction aggr_function = 1;
BuiltInWindowFunction built_in_function = 2;
// udaf = 3
string udaf = 3;
string udwf = 9;
}
LogicalExprNode expr = 4;
repeated LogicalExprNode partition_by = 5;
Expand Down Expand Up @@ -736,6 +741,7 @@ message WindowFrameBound {

message Schema {
repeated Field columns = 1;
map<string, string> metadata = 2;
}

message Field {
Expand All @@ -745,6 +751,7 @@ message Field {
bool nullable = 3;
// for complex data types like structs, unions
repeated Field children = 4;
map<string, string> metadata = 5;
}

message FixedSizeBinary{
Expand Down Expand Up @@ -902,6 +909,12 @@ message ScalarValue{
int64 date_64_value = 21;
int32 interval_yearmonth_value = 24;
int64 interval_daytime_value = 25;

int64 duration_second_value = 35;
int64 duration_millisecond_value = 36;
int64 duration_microsecond_value = 37;
int64 duration_nanosecond_value = 38;

ScalarTimestampValue timestamp_value = 26;
ScalarDictionaryValue dictionary_value = 27;
bytes binary_value = 28;
Expand Down Expand Up @@ -1047,6 +1060,7 @@ message PhysicalPlanNode {
UnionExecNode union = 19;
ExplainExecNode explain = 20;
SortPreservingMergeExecNode sort_preserving_merge = 21;
NestedLoopJoinExecNode nested_loop_join = 22;
}
}

Expand Down Expand Up @@ -1308,6 +1322,7 @@ enum AggregateMode {
FINAL = 1;
FINAL_PARTITIONED = 2;
SINGLE = 3;
SINGLE_PARTITIONED = 4;
}

message WindowAggExecNode {
Expand Down Expand Up @@ -1364,6 +1379,15 @@ message SortExecNode {
message SortPreservingMergeExecNode {
PhysicalPlanNode input = 1;
repeated PhysicalExprNode expr = 2;
// Maximum number of highest/lowest rows to fetch; negative means no limit
int64 fetch = 3;
}

message NestedLoopJoinExecNode {
PhysicalPlanNode left = 1;
PhysicalPlanNode right = 2;
JoinType join_type = 3;
JoinFilter filter = 4;
}

message CoalesceBatchesExecNode {
Expand Down
470 changes: 240 additions & 230 deletions python/vegafusion/vegafusion/proto/datafusion_pb2.py

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions vegafusion-common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ version = "0.4.23"
optional = true

[dependencies.sqlparser]
version = "0.34.0"
version = "0.35.0"
optional = true

[dependencies.serde_json]
Expand All @@ -27,18 +27,18 @@ default_features = false
optional = true

[dependencies.arrow]
version = "42.0.0"
version = "43.0.0"
default_features = false
features = [ "ipc",]

[dependencies.datafusion-common]
version = "27.0.0"
version = "28.0.0"

[dependencies.datafusion-expr]
version = "27.0.0"
version = "28.0.0"

[dependencies.datafusion-proto]
version = "27.0.0"
version = "28.0.0"
optional = true

[dependencies.pyo3]
Expand Down
4 changes: 2 additions & 2 deletions vegafusion-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ rand = "0.8.5"
json-patch = "1.0.0"

[dependencies.sqlparser]
version = "0.34.0"
version = "0.35.0"
optional = true

[dependencies.serde_json]
Expand All @@ -39,7 +39,7 @@ features = [ "json", "sqlparser",]
version = "1.4.0"

[dependencies.datafusion-common]
version = "27.0.0"
version = "28.0.0"

[dependencies.pyo3]
version = "0.19"
Expand Down
8 changes: 4 additions & 4 deletions vegafusion-dataframe/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@ edition = "2021"
description = "VegaFusion's DataFrame and Connection traits"

[dependencies]
async-trait = "0.1.53"
async-trait = "0.1.73"

[dependencies.vegafusion-common]
path = "../vegafusion-common"
version = "1.4.0"

[dependencies.datafusion-common]
version = "27.0.0"
version = "28.0.0"

[dependencies.datafusion-expr]
version = "27.0.0"
version = "28.0.0"

[dependencies.arrow]
version = "42.0.0"
version = "43.0.0"
default_features = false
2 changes: 1 addition & 1 deletion vegafusion-datafusion-udfs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ path = "../vegafusion-common"
version = "1.4.0"

[dependencies.datafusion-physical-expr]
version = "27.0.0"
version = "28.0.0"
2 changes: 1 addition & 1 deletion vegafusion-jni/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,5 @@ version = "1.4.0"
features = [ "datafusion-conn",]

[dependencies.tokio]
version = "1.27.0"
version = "1.32.0"
features = [ "macros", "rt-multi-thread",]
8 changes: 4 additions & 4 deletions vegafusion-python-embed/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ protobuf-src = [ "vegafusion-core/protobuf-src",]
log = "0.4.17"
env_logger = "0.10.0"
pythonize = "0.19.0"
async-trait = "0.1.53"
async-trait = "0.1.73"
uuid = "1.3.0"
prost = "0.11.3"

Expand All @@ -31,7 +31,7 @@ features = [ "derive",]
version = "1.0.79"

[dependencies.arrow]
version = "42.0.0"
version = "43.0.0"
features = [ "pyarrow",]

[dependencies.vegafusion-common]
Expand Down Expand Up @@ -59,10 +59,10 @@ path = "../vegafusion-dataframe"
version = "1.4.0"

[dependencies.datafusion-proto]
version = "27.0.0"
version = "28.0.0"

[dependencies.tokio]
version = "1.27.0"
version = "1.32.0"
features = [ "macros", "rt-multi-thread",]

[dependencies.pyo3]
Expand Down
24 changes: 12 additions & 12 deletions vegafusion-runtime/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,27 +22,27 @@ chrono-tz = "0.8.1"
prost = "0.11.3"
prost-types = "0.11.2"
num-traits = "0.2.15"
itertools = "0.10.3"
itertools = "0.11.0"
float-cmp = "0.9.0"
lru = "0.10.0"
lru = "0.11.1"
futures = "0.3.21"
async-trait = "0.1.53"
async-recursion = "1.0.4"
async-lock = "2.5.0"
async-trait = "0.1.73"
async-recursion = "1.0.5"
async-lock = "2.8.0"
tempfile = "3.3.0"
futures-util = "0.3.21"
bytes = "1.1.0"
deterministic-hash = "1.0.1"
log = "0.4.17"
env_logger = "0.10.0"
ordered-float = "3.6.0"
reqwest-retry = "0.2.1"
reqwest-retry = "0.3.0"
reqwest-middleware = "0.2.0"

[dev-dependencies]
futures = "0.3.21"
futures-util = "0.3.21"
rstest = "0.17.0"
rstest = "0.18.2"
test-case = "3.1.0"
base64 = "0.21.0"
dssim = "3.1.0"
Expand Down Expand Up @@ -79,19 +79,19 @@ version = "1.0.137"
features = [ "derive",]

[dependencies.datafusion-common]
version = "27.0.0"
version = "28.0.0"

[dependencies.datafusion-expr]
version = "27.0.0"
version = "28.0.0"

[dependencies.datafusion-physical-expr]
version = "27.0.0"
version = "28.0.0"

[dependencies.datafusion-optimizer]
version = "27.0.0"
version = "28.0.0"

[dependencies.tokio]
version = "1.29.1"
version = "1.32.0"
features = [ "macros", "rt-multi-thread", "fs",]

[dependencies.reqwest]
Expand Down
2 changes: 1 addition & 1 deletion vegafusion-runtime/src/data/tasks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ async fn process_datetimes(
_ => flat_col(field.name()),
};

Ok(if matches!(expr, Expr::Alias(_, _)) {
Ok(if matches!(expr, Expr::Alias(_)) {
expr
} else {
expr.alias(field.name())
Expand Down
2 changes: 1 addition & 1 deletion vegafusion-server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ version = "1.4.0"
features = [ "datafusion-conn",]

[dependencies.tokio]
version = "1.27.0"
version = "1.32.0"
features = [ "rt-multi-thread", "macros",]

[dependencies.tonic-web]
Expand Down
20 changes: 10 additions & 10 deletions vegafusion-sql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,19 @@ description = "VegaFusion SQL dialect generation and connection implementations"
datafusion-conn = [ "datafusion", "tempfile", "reqwest", "reqwest-retry", "reqwest-middleware", "vegafusion-datafusion-udfs",]

[dependencies]
async-trait = "0.1.53"
async-trait = "0.1.73"
deterministic-hash = "1.0.1"
log = "0.4.17"
chrono = "0.4.23"

[dev-dependencies]
rstest = "0.17.0"
rstest_reuse = "0.5.0"
rstest = "0.18.2"
rstest_reuse = "0.6.0"
lazy_static = "^1.4.0"
toml = "0.7.2"

[dependencies.sqlparser]
version = "0.34.0"
version = "0.35.0"

[dependencies.vegafusion-common]
path = "../vegafusion-common"
Expand All @@ -38,18 +38,18 @@ version = "1.4.0"
optional = true

[dependencies.arrow]
version = "42.0.0"
version = "43.0.0"
default_features = false
features = [ "ipc",]

[dependencies.datafusion-common]
version = "27.0.0"
version = "28.0.0"

[dependencies.datafusion-expr]
version = "27.0.0"
version = "28.0.0"

[dependencies.datafusion]
version = "27.0.0"
version = "28.0.0"
optional = true

[dependencies.tempfile]
Expand All @@ -63,7 +63,7 @@ features = [ "rustls-tls",]
optional = true

[dependencies.reqwest-retry]
version = "0.2.1"
version = "0.3.0"
optional = true

[dependencies.reqwest-middleware]
Expand Down Expand Up @@ -92,7 +92,7 @@ version = "1.4.0"
features = [ "sqlparser", "json", "prettyprint",]

[dev-dependencies.tokio]
version = "1.27.0"
version = "1.32.0"
features = [ "macros", "rt-multi-thread",]

[dev-dependencies.serde]
Expand Down
11 changes: 6 additions & 5 deletions vegafusion-sql/src/compile/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ pub trait ToSqlExpr {
impl ToSqlExpr for Expr {
fn to_sql(&self, dialect: &Dialect, schema: &DFSchema) -> Result<SqlExpr> {
match self {
Expr::Alias(_, _) => {
Expr::Alias(_) => {
// Alias expressions need to be handled at a higher level
Err(VegaFusionError::internal(format!(
"Alias cannot be converted to SQL: {self:?}"
Expand Down Expand Up @@ -348,7 +348,6 @@ impl ToSqlExpr for Expr {
BuiltinScalarFunction::ArrayAppend => "array_append",
BuiltinScalarFunction::ArrayConcat => "array_concat",
BuiltinScalarFunction::ArrayDims => "array_dims",
BuiltinScalarFunction::ArrayFill => "array_fill",
BuiltinScalarFunction::ArrayLength => "array_length",
BuiltinScalarFunction::ArrayNdims => "array_ndims",
BuiltinScalarFunction::ArrayPosition => "array_position",
Expand All @@ -358,6 +357,11 @@ impl ToSqlExpr for Expr {
BuiltinScalarFunction::ArrayReplace => "array_replace",
BuiltinScalarFunction::ArrayToString => "array_to_string",
BuiltinScalarFunction::Cardinality => "array_cardinality",
BuiltinScalarFunction::Decode => "decode",
BuiltinScalarFunction::Encode => "encode",
BuiltinScalarFunction::Cot => "cot",
BuiltinScalarFunction::ArrayContains => "array_contains",
BuiltinScalarFunction::ArrayFill => "array_fill",
BuiltinScalarFunction::TrimArray => "trim_array",
};
translate_scalar_function(fun_name, args, dialect, schema)
Expand Down Expand Up @@ -573,9 +577,6 @@ impl ToSqlExpr for Expr {
"GroupingSet cannot be converted to SQL",
)),
Expr::Like { .. } => Err(VegaFusionError::internal("Like cannot be converted to SQL")),
Expr::ILike { .. } => Err(VegaFusionError::internal(
"ILike cannot be converted to SQL",
)),
Expr::SimilarTo { .. } => Err(VegaFusionError::internal(
"SimilarTo cannot be converted to SQL",
)),
Expand Down
Loading