Fix coalesce type mismatch in duckdb 1.0

vega · Aug 10, 2024 · 0a5073a · 0a5073a
1 parent 0975881
commit 0a5073a
Show file tree

Hide file tree

Showing 4 changed files with 70 additions and 169 deletions.
diff --git a/pixi.lock b/pixi.lock
diff --git a/pixi.toml b/pixi.toml
@@ -111,7 +111,7 @@ scikit-image = "0.21.0.*"
 toml = "0.10.2.*"
 pytest = ">=4.6"
 click = "8.1.6.*"
-python-duckdb = "0.8.1.*"
+python-duckdb = "1.0"
 jupyter-packaging = "0.12.3.*"
 pip = "23.2.1.*"
 voila = "0.5.0.*"

diff --git a/vegafusion-common/src/datatypes.rs b/vegafusion-common/src/datatypes.rs
@@ -18,6 +18,8 @@ pub fn is_numeric_datatype(dtype: &DataType) -> bool {
             | DataType::Float16
             | DataType::Float32
             | DataType::Float64
+            | DataType::Decimal128(_, _)
+            | DataType::Decimal256(_, _)
     )
 }
 
@@ -38,7 +40,7 @@ pub fn is_integer_datatype(dtype: &DataType) -> bool {
 pub fn is_float_datatype(dtype: &DataType) -> bool {
     matches!(
         dtype,
-        DataType::Float16 | DataType::Float32 | DataType::Float64
+        DataType::Float16 | DataType::Float32 | DataType::Float64 | DataType::Decimal128(_, _) | DataType::Decimal256(_, _)
     )
 }
 

diff --git a/vegafusion-sql/src/dataframe/mod.rs b/vegafusion-sql/src/dataframe/mod.rs
@@ -891,8 +891,6 @@ impl SqlDataFrame {
             .map(|f| f.name().clone())
             .collect();
 
-        // let dialect = self.dialect();
-
         // Build partitioning column expressions
         let partition_by: Vec<_> = groupby.iter().map(|group| flat_col(group)).collect();
         let numeric_field = coalesce(vec![
@@ -1140,20 +1138,23 @@ impl SqlDataFrame {
         if groupby.is_empty() {
             // Value replacement for field with no groupby fields specified is equivalent to replacing
             // null values of that column with the fill value
-            let select_columns: Vec<_> = self
+            let select_columns = self
                 .schema()
                 .fields()
                 .iter()
                 .map(|f| {
                     let col_name = f.name();
 
-                    if col_name == field {
-                        coalesce(vec![flat_col(field), lit(value.clone())]).alias(col_name)
+                    Ok(if col_name == field {
+                        coalesce(vec![
+                            flat_col(field).cast_to(&value.data_type(), &self.schema_df()?)?,
+                            lit(value.clone())
+                        ]).alias(col_name)
                     } else {
                         flat_col(col_name)
-                    }
+                    })
                 })
-                .collect();
+                .collect::<Result<Vec<_>>>()?;
 
             self.select(select_columns).await
         } else {
@@ -1185,16 +1186,19 @@ impl SqlDataFrame {
             // Build final selection
             // Finally, select all of the original DataFrame columns, filling in missing values
             // of the `field` columns
-            let select_columns: Vec<_> = original_columns
+            let select_columns = original_columns
                 .iter()
                 .map(|col_name| {
-                    if col_name == field {
-                        coalesce(vec![flat_col(field), lit(value.clone())]).alias(col_name)
+                    Ok(if col_name == field {
+                        coalesce(vec![
+                            flat_col(field).cast_to(&value.data_type(), &self.schema_df()?)?,
+                            lit(value.clone())
+                        ]).alias(col_name)
                     } else {
                         flat_col(col_name)
-                    }
+                    })
                 })
-                .collect();
+                .collect::<Result<Vec<_>>>()?;
 
             let select_column_strs: Vec<_> = if self.dialect().impute_fully_qualified {
                 // Some dialects (e.g. Clickhouse) require that references to columns in nested
@@ -1203,7 +1207,10 @@ impl SqlDataFrame {
                     .iter()
                     .map(|col_name| {
                         let expr = if col_name == field {
-                            coalesce(vec![flat_col(field), lit(value.clone())]).alias(col_name)
+                            coalesce(vec![
+                                flat_col(field).cast_to(&value.data_type(), &self.schema_df()?)?,
+                                lit(value.clone())
+                            ]).alias(col_name)
                         } else if col_name == key {
                             Expr::Column(Column {
                                 relation: Some(TableReference::bare("_key")),