Skip to content

Commit

Permalink
Add fuse_datasets planner configuration for combining datasets during…
Browse files Browse the repository at this point in the history
… planning

This is used to push more data transforms into the root datasets
  • Loading branch information
jonmmease committed Oct 23, 2023
1 parent 13b1e50 commit 6fb9b10
Show file tree
Hide file tree
Showing 9 changed files with 563 additions and 18 deletions.
46 changes: 46 additions & 0 deletions python/vegafusion/vegafusion/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,52 @@ def _import_or_register_inline_datasets(self, inline_datasets=None):

return imported_inline_datasets

def build_pre_transform_spec_plan(
self,
spec,
preserve_interactivity=True,
keep_signals=None,
keep_datasets=None,
):
"""
Diagnostic function that returns the plan used by the pre_transform_spec method
:param spec: A Vega specification dict or JSON string
:param preserve_interactivity: If True (default) then the interactive behavior of
the chart will pre preserved. This requires that all the data that participates
in interactions be included in the resulting spec rather than being pre-transformed.
If False, then all possible data transformations are applied even if they break
the original interactive behavior of the chart.
:param keep_signals: Signals from the input spec that must be included in the
pre-transformed spec. A list with elements that are either:
- The name of a top-level signal as a string
- A two-element tuple where the first element is the name of a signal as a string
and the second element is the nested scope of the dataset as a list of integers
:param keep_datasets: Datasets from the input spec that must be included in the
pre-transformed spec. A list with elements that are either:
- The name of a top-level dataset as a string
- A two-element tuple where the first element is the name of a dataset as a string
and the second element is the nested scope of the dataset as a list of integers
:return:
dict with keys:
- "client_spec": Planned client spec
- "server_spec: Planned server spec
- "comm_plan": Communication plan
- "warnings": List of planner warnings
"""
if self._grpc_channel:
raise ValueError("build_pre_transform_spec_plan not yet supported over gRPC")
else:
# Parse input keep signals and datasets
keep_signals = parse_variables(keep_signals)
keep_datasets = parse_variables(keep_datasets)
return self.embedded_runtime.build_pre_transform_spec_plan(
spec,
preserve_interactivity=preserve_interactivity,
keep_signals=keep_signals,
keep_datasets=keep_datasets,
)

def pre_transform_spec(
self,
spec,
Expand Down
22 changes: 14 additions & 8 deletions vegafusion-core/src/planning/dependency_graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,19 @@ use petgraph::prelude::{DiGraph, EdgeRef, NodeIndex};
use petgraph::Incoming;
use std::collections::{HashMap, HashSet};

pub fn toposort_dependency_graph(
data_graph: &DiGraph<(ScopedVariable, DependencyNodeSupported), ()>,
) -> Result<Vec<NodeIndex>> {
Ok(match toposort(&data_graph, None) {
Ok(v) => v,
Err(err) => {
return Err(VegaFusionError::internal(format!(
"Failed to sort datasets topologically: {err:?}"
)))
}
})
}

/// get HashSet of all data variables with fully supported parents that are themselves fully or
/// partially supported
pub fn get_supported_data_variables(
Expand All @@ -24,14 +37,7 @@ pub fn get_supported_data_variables(
) -> Result<HashMap<ScopedVariable, DependencyNodeSupported>> {
let data_graph = build_dependency_graph(chart_spec, config)?;
// Sort dataset nodes topologically
let nodes: Vec<NodeIndex> = match toposort(&data_graph, None) {
Ok(v) => v,
Err(err) => {
return Err(VegaFusionError::internal(format!(
"Failed to sort datasets topologically: {err:?}"
)))
}
};
let nodes: Vec<NodeIndex> = toposort_dependency_graph(&data_graph)?;

// Traverse nodes and save those to supported_vars that are supported with all supported
// parents
Expand Down
Loading

0 comments on commit 6fb9b10

Please sign in to comment.