Skip to content

Commit

Permalink
Support domain extraction for columns with escaped dots in their names (
Browse files Browse the repository at this point in the history
#458)

* Allow columns with escaped dots in split_domain_data

* Add test for domain column with dot

* pin duckdb in tests

* Update test
  • Loading branch information
jonmmease authored Feb 14, 2024
1 parent f268f49 commit bfbb9c8
Show file tree
Hide file tree
Showing 6 changed files with 4,815 additions and 6 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/build_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ jobs:
ls -la
python -m pip install vegafusion-*.whl
python -m pip install vegafusion_python_embed-*manylinux_2_17_x86_64*.whl
python -m pip install pytest vega-datasets polars duckdb "vl-convert-python>=1.0.1rc1" scikit-image pandas==2.0
python -m pip install pytest vega-datasets polars duckdb==0.9.2 "vl-convert-python>=1.0.1rc1" scikit-image pandas==2.0
- name: Test vegafusion
working-directory: python/vegafusion/
run: pytest
Expand Down Expand Up @@ -350,7 +350,7 @@ jobs:
ls -la
python -m pip install vegafusion-*.whl
python -m pip install vegafusion_python_embed-*macosx_10_*_x86_64.whl
python -m pip install pytest vega-datasets polars duckdb vl-convert-python scikit-image pandas==2.0
python -m pip install pytest vega-datasets polars duckdb==0.9.2 vl-convert-python scikit-image pandas==2.0
python -m pip install pyarrow==10.0 altair==5.1.2
- name: Test vegafusion
working-directory: python/vegafusion/
Expand Down Expand Up @@ -386,7 +386,7 @@ jobs:
python -m pip install $vegafusion
python -m pip install $vegafusion_python_embed
python -m pip install pytest vega-datasets polars duckdb vl-convert-python scikit-image
python -m pip install pytest vega-datasets polars duckdb==0.9.2 vl-convert-python scikit-image
- name: Test vegafusion
working-directory: python/vegafusion/
run: pytest
Expand Down
6 changes: 5 additions & 1 deletion vegafusion-core/src/planning/split_domain_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,18 @@ pub struct SplitScaleDomainVisitor<'a> {
pub task_scope: &'a TaskScope,
pub new_datasets: Vec<(Vec<u32>, DataSpec)>,
pub domain_dataset_fields: HashMap<ScopedVariable, (ScopedVariable, String)>,
pub nested_regex: regex::Regex,
}

impl<'a> SplitScaleDomainVisitor<'a> {
pub fn new(task_scope: &'a TaskScope) -> Self {
// Regex matching unescaped dot characters
let nested_regex = regex::Regex::new(r#"[^\\]\."#).unwrap();
Self {
new_datasets: Vec::new(),
task_scope,
domain_dataset_fields: Default::default(),
nested_regex,
}
}
}
Expand Down Expand Up @@ -173,7 +177,7 @@ impl<'a> SplitScaleDomainVisitor<'a> {
let field_name = &field_ref.field;

// Validate whether we can do anything
if field_name.contains('.') {
if self.nested_regex.is_match(field_name) {
// Nested fields not supported
return Ok(());
}
Expand Down
25 changes: 25 additions & 0 deletions vegafusion-runtime/tests/specs/custom/gh_455.comm_plan.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"server_to_client": [
{
"name": "data_0",
"namespace": "data",
"scope": []
},
{
"name": "data_0_layer_0_layer_0_color_domain_sym\\.bol",
"namespace": "data",
"scope": []
},
{
"name": "data_0_x_domain_sym\\.bol",
"namespace": "data",
"scope": []
},
{
"name": "data_0_y_domain_sum_price",
"namespace": "data",
"scope": []
}
],
"client_to_server": []
}
Loading

0 comments on commit bfbb9c8

Please sign in to comment.