From e4f837005514a79864de50b097a1f26ab3daaba0 Mon Sep 17 00:00:00 2001
From: Jon Mease <jonmmease@gmail.com>
Date: Mon, 25 Sep 2023 14:14:41 -0400
Subject: [PATCH] Fix bin transform crash on empty input table (#395)

---
 python/vegafusion/tests/test_pretransform.py | 160 +++++++++++++++++++
 vegafusion-runtime/src/transform/bin.rs      |   2 +-
 2 files changed, 161 insertions(+), 1 deletion(-)

diff --git a/python/vegafusion/tests/test_pretransform.py b/python/vegafusion/tests/test_pretransform.py
index 97879e4f4..3e1777094 100644
--- a/python/vegafusion/tests/test_pretransform.py
+++ b/python/vegafusion/tests/test_pretransform.py
@@ -1217,6 +1217,156 @@ def manual_histogram_spec():
     """)
 
 
+def empty_histogram_spec():
+    return json.loads(r"""
+{
+  "$schema": "https://vega.github.io/schema/vega/v5.json",
+  "background": "white",
+  "padding": 5,
+  "width": 200,
+  "height": 200,
+  "style": "cell",
+  "data": [
+    {"name": "empty_df", "url": "table://empty_df"},
+    {
+      "name": "data_0",
+      "source": "empty_df",
+      "transform": [
+        {
+          "type": "extent",
+          "field": "col",
+          "signal": "layer_0_layer_0_bin_maxbins_10_col_extent"
+        },
+        {
+          "type": "bin",
+          "field": "col",
+          "as": ["__bin_field_name", "__bin_field_name_end"],
+          "signal": "layer_0_layer_0_bin_maxbins_10_col_bins",
+          "extent": {"signal": "layer_0_layer_0_bin_maxbins_10_col_extent"},
+          "maxbins": 10
+        },
+        {
+          "type": "aggregate",
+          "groupby": ["__bin_field_name", "__bin_field_name_end"],
+          "ops": ["count"],
+          "fields": [null],
+          "as": ["__count"]
+        },
+        {
+          "type": "formula",
+          "expr": "'[' + toString(datum[\"__bin_field_name\"]) + ', ' + toString(datum[\"__bin_field_name_end\"]) + ')'",
+          "as": "__bin_range"
+        },
+        {
+          "type": "filter",
+          "expr": "isValid(datum[\"__bin_field_name\"]) && isFinite(+datum[\"__bin_field_name\"]) && isValid(datum[\"__count\"]) && isFinite(+datum[\"__count\"])"
+        }
+      ]
+    }
+  ],
+  "marks": [
+    {
+      "name": "layer_0_layer_0_marks",
+      "type": "rect",
+      "clip": true,
+      "style": ["bar"],
+      "from": {"data": "data_0"},
+      "encode": {
+        "update": {
+          "cursor": {"value": "pointer"},
+          "fill": {"value": "#3e277a"},
+          "opacity": {"value": 1},
+          "ariaRoleDescription": {"value": "bar"},
+          "description": {
+            "signal": "\"col (start): \" + (format(datum[\"__bin_field_name\"], \"\")) + \"; Count of Records: \" + (format(datum[\"__count\"], \"\")) + \"; __bin_field_name_end: \" + (format(datum[\"__bin_field_name_end\"], \"\"))"
+          },
+          "x": {"scale": "x", "field": "__bin_field_name"},
+          "x2": {"scale": "x", "field": "__bin_field_name_end", "offset": -1},
+          "y": {"scale": "y", "field": "__count"},
+          "y2": {"scale": "y", "value": 0}
+        }
+      }
+    }
+  ],
+  "scales": [
+    {
+      "name": "x",
+      "type": "linear",
+      "domain": {
+        "data": "data_0",
+        "fields": ["__bin_field_name", "__bin_field_name_end"]
+      },
+      "range": [0, {"signal": "width"}],
+      "nice": true,
+      "zero": true
+    },
+    {
+      "name": "y",
+      "type": "linear",
+      "domain": {"fields": [{"data": "data_0", "field": "__count"}, [0]]},
+      "range": [{"signal": "height"}, 0],
+      "nice": true,
+      "zero": true
+    }
+  ],
+  "axes": [
+    {
+      "scale": "x",
+      "orient": "bottom",
+      "grid": true,
+      "tickCount": 10,
+      "gridScale": "y",
+      "domain": false,
+      "labels": false,
+      "aria": false,
+      "maxExtent": 0,
+      "minExtent": 0,
+      "ticks": false,
+      "zindex": 0
+    },
+    {
+      "scale": "y",
+      "orient": "left",
+      "grid": true,
+      "gridScale": "x",
+      "tickCount": {"signal": "ceil(height/40)"},
+      "domain": false,
+      "labels": false,
+      "aria": false,
+      "maxExtent": 0,
+      "minExtent": 0,
+      "ticks": false,
+      "zindex": 0
+    },
+    {
+      "scale": "x",
+      "orient": "bottom",
+      "grid": false,
+      "title": "col (start)",
+      "labelFlush": false,
+      "labels": true,
+      "tickCount": 10,
+      "ticks": true,
+      "labelOverlap": true,
+      "zindex": 0
+    },
+    {
+      "scale": "y",
+      "orient": "left",
+      "grid": false,
+      "title": "Count of Records",
+      "labelFlush": false,
+      "labels": true,
+      "ticks": true,
+      "labelOverlap": true,
+      "tickCount": {"signal": "ceil(height/40)"},
+      "zindex": 0
+    }
+  ]
+}
+""")
+
+
 def test_pre_transform_multi_partition():
     n = 4050
     order_items = pd.DataFrame({
@@ -1646,6 +1796,16 @@ def test_keep_signals():
     assert sig1["value"] == [1.4, 9.2]
 
 
+def test_empty_histogram():
+    spec = empty_histogram_spec()
+    empty_df = pd.DataFrame({ 'col': []})
+    (data_0,), warnings = vf.runtime.pre_transform_datasets(
+        spec, ["data_0"], inline_datasets=dict(empty_df=empty_df)
+    )
+    assert data_0.empty
+    assert data_0.columns.tolist() == ["__bin_field_name", "__bin_field_name_end", "__count", "__bin_range"]
+
+
 def test_pre_transform_spec_encoded_datasets():
     # Pre-transform with supported aggregate function should result in no warnings
     vega_spec = movies_histogram_spec()
diff --git a/vegafusion-runtime/src/transform/bin.rs b/vegafusion-runtime/src/transform/bin.rs
index c06bc6141..553309216 100644
--- a/vegafusion-runtime/src/transform/bin.rs
+++ b/vegafusion-runtime/src/transform/bin.rs
@@ -157,7 +157,7 @@ pub fn calculate_bin_params(
     let extent_expr = compile(tx.extent.as_ref().unwrap(), config, Some(schema))?;
     let extent_scalar = extent_expr.eval_to_scalar()?;
 
-    let extent = extent_scalar.to_f64x2()?;
+    let extent = extent_scalar.to_f64x2().unwrap_or([0.0, 0.0]);
 
     let [min_, max_] = extent;
     if min_ > max_ {