Skip to content

Commit

Permalink
Fix coalesce type mismatch in duckdb 1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
jonmmease committed Aug 10, 2024
1 parent 0975881 commit 0a5073a
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 169 deletions.
200 changes: 46 additions & 154 deletions pixi.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ scikit-image = "0.21.0.*"
toml = "0.10.2.*"
pytest = ">=4.6"
click = "8.1.6.*"
python-duckdb = "0.8.1.*"
python-duckdb = "1.0"
jupyter-packaging = "0.12.3.*"
pip = "23.2.1.*"
voila = "0.5.0.*"
Expand Down
4 changes: 3 additions & 1 deletion vegafusion-common/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ pub fn is_numeric_datatype(dtype: &DataType) -> bool {
| DataType::Float16
| DataType::Float32
| DataType::Float64
| DataType::Decimal128(_, _)
| DataType::Decimal256(_, _)
)
}

Expand All @@ -38,7 +40,7 @@ pub fn is_integer_datatype(dtype: &DataType) -> bool {
pub fn is_float_datatype(dtype: &DataType) -> bool {
matches!(
dtype,
DataType::Float16 | DataType::Float32 | DataType::Float64
DataType::Float16 | DataType::Float32 | DataType::Float64 | DataType::Decimal128(_, _) | DataType::Decimal256(_, _)
)
}

Expand Down
33 changes: 20 additions & 13 deletions vegafusion-sql/src/dataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -891,8 +891,6 @@ impl SqlDataFrame {
.map(|f| f.name().clone())
.collect();

// let dialect = self.dialect();

// Build partitioning column expressions
let partition_by: Vec<_> = groupby.iter().map(|group| flat_col(group)).collect();
let numeric_field = coalesce(vec![
Expand Down Expand Up @@ -1140,20 +1138,23 @@ impl SqlDataFrame {
if groupby.is_empty() {
// Value replacement for field with no groupby fields specified is equivalent to replacing
// null values of that column with the fill value
let select_columns: Vec<_> = self
let select_columns = self
.schema()
.fields()
.iter()
.map(|f| {
let col_name = f.name();

if col_name == field {
coalesce(vec![flat_col(field), lit(value.clone())]).alias(col_name)
Ok(if col_name == field {
coalesce(vec![
flat_col(field).cast_to(&value.data_type(), &self.schema_df()?)?,
lit(value.clone())
]).alias(col_name)
} else {
flat_col(col_name)
}
})
})
.collect();
.collect::<Result<Vec<_>>>()?;

self.select(select_columns).await
} else {
Expand Down Expand Up @@ -1185,16 +1186,19 @@ impl SqlDataFrame {
// Build final selection
// Finally, select all of the original DataFrame columns, filling in missing values
// of the `field` columns
let select_columns: Vec<_> = original_columns
let select_columns = original_columns
.iter()
.map(|col_name| {
if col_name == field {
coalesce(vec![flat_col(field), lit(value.clone())]).alias(col_name)
Ok(if col_name == field {
coalesce(vec![
flat_col(field).cast_to(&value.data_type(), &self.schema_df()?)?,
lit(value.clone())
]).alias(col_name)
} else {
flat_col(col_name)
}
})
})
.collect();
.collect::<Result<Vec<_>>>()?;

let select_column_strs: Vec<_> = if self.dialect().impute_fully_qualified {
// Some dialects (e.g. Clickhouse) require that references to columns in nested
Expand All @@ -1203,7 +1207,10 @@ impl SqlDataFrame {
.iter()
.map(|col_name| {
let expr = if col_name == field {
coalesce(vec![flat_col(field), lit(value.clone())]).alias(col_name)
coalesce(vec![
flat_col(field).cast_to(&value.data_type(), &self.schema_df()?)?,
lit(value.clone())
]).alias(col_name)
} else if col_name == key {
Expr::Column(Column {
relation: Some(TableReference::bare("_key")),
Expand Down

0 comments on commit 0a5073a

Please sign in to comment.