diff --git a/.gitignore b/.gitignore index 6338524d..9d1792b8 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,6 @@ Cargo.lock # Ignore the huge test files tests/apps/*.wat output/ + +# Ignore mac files +.DS_Store diff --git a/Cargo.toml b/Cargo.toml index 3b050371..6b534a2a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,11 @@ log = "0.4.20" pest = "2.7.7" pest_derive = "2.7.7" +# Visualization +graphviz-rust = "0.9.0" +project-root = "0.2.2" +opener = { version = "0.7.0", default-features = false } + [dependencies.clap] optional = true version = "3.2.23" diff --git a/README.md b/README.md index c3b1e790..a73ea724 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ `whamm!` is a tool for "Wasm Application Monitoring and Manipulation"[^1], a DSL inspired by the D language. -[^1]: The 'h' is silent. +[^1] The 'h' is silent. ## Tutorials ## @@ -27,10 +27,36 @@ cargo test -- --nocapture # With stdout tracing To run project (there are example Whammys in `tests/whammys` folder): ```shell -cargo run -- --app --whammy +cargo run -- instr --app --whammy ``` To specify log level: ```shell RUST_LOG={ error | warn | info | debug | trace | off } cargo run -- --app --whammy ``` + +To visually debug the decision tree used during Wasm bytecode emission: +```shell +cargo run -- vis-tree --whammy +``` + +## Available Packages ## + +Currently available: +- `wasm:bytecode` + +To be added: +- `thread` operation events +- `gc` operation events +- `function` enter/exit/unwind events +- `memory` access (read/write) events +- `table` access (read/write) events +- `component` operation events +- `BEGIN`/`END` events +- `traps` +- `exception` throw/rethrow/catch events + +Example: +`wasi:http:send_req:alt` +`wasm:bytecode:call:alt` +`wasm:fn:enter:before` \ No newline at end of file diff --git a/src/behavior.rs b/src/behavior.rs new file mode 100644 index 00000000..6a56d08f --- /dev/null +++ b/src/behavior.rs @@ -0,0 +1,6 @@ +pub mod tree; +pub mod builder_visitor; +pub mod visualize; + +#[cfg(test)] +pub mod tests; \ No newline at end of file diff --git a/src/behavior/builder_visitor.rs b/src/behavior/builder_visitor.rs new file mode 100644 index 00000000..8451002a --- /dev/null +++ b/src/behavior/builder_visitor.rs @@ -0,0 +1,452 @@ +use crate::behavior::tree::{ActionWithChildType, BehaviorTree, DecoratorType}; + +use std::collections::HashMap; +use crate::parser::types as parser_types; +use parser_types::{DataType, Whammy, Whamm, WhammVisitor, Expr, Fn, Event, Package, Op, Probe, Provider, Statement, Value}; + +use log::{debug, error, trace}; +use regex::Regex; +use crate::behavior::tree::ParamActionType; +use crate::behavior::tree::DecoratorType::{HasAltCall, HasParams, PredIs}; +use crate::parser::types::Global; +use crate::verifier::types::ScopeType; + +pub type SimpleAST = HashMap>>>>; + +pub fn build_behavior_tree(ast: &Whamm) -> (BehaviorTree, SimpleAST) { + let mut visitor = BehaviorTreeBuilder::new(); + visitor.visit_whamm(ast); + + debug!("{:#?}", visitor.ast); + (visitor.tree, visitor.ast) +} + +pub struct BehaviorTreeBuilder { + pub tree: BehaviorTree, + pub ast: SimpleAST, + pub context_name: String, + curr_provider_name: String, + curr_package_name: String, + curr_event_name: String +} +impl BehaviorTreeBuilder { + pub fn new() -> Self { + Self { + tree: BehaviorTree::new(), + ast: HashMap::new(), + context_name: "".to_string(), + curr_provider_name: "".to_string(), + curr_package_name: "".to_string(), + curr_event_name: "".to_string() + } + } +} +impl BehaviorTreeBuilder { + // ======= + // = AST = + // ======= + + fn add_provider_to_ast(&mut self, provider_name: String) { + if !self.ast.contains_key(&provider_name) { + self.ast.insert(provider_name.clone(), HashMap::new()); + } + self.curr_provider_name = provider_name; + } + + fn add_package_to_ast(&mut self, package_name: String) { + if let Some(provider) = self.ast.get_mut(&self.curr_provider_name) { + if !provider.contains_key(&package_name) { + provider.insert(package_name.clone(), HashMap::new()); + } + } else { + unreachable!() + } + self.curr_package_name = package_name; + } + + fn add_event_to_ast(&mut self, event_name: String) { + if let Some(provider) = self.ast.get_mut(&self.curr_provider_name) { + if let Some(package) = provider.get_mut(&self.curr_package_name) { + if !package.contains_key(&event_name) { + package.insert(event_name.clone(), HashMap::new()); + } + } + } else { + unreachable!() + } + self.curr_event_name = event_name; + } + + fn add_probe_to_ast(&mut self, probe: &Probe) { + if let Some(provider) = self.ast.get_mut(&self.curr_provider_name) { + if let Some(package) = provider.get_mut(&self.curr_package_name) { + if let Some(event) = package.get_mut(&self.curr_event_name) { + if let Some(probes) = event.get_mut(&probe.name) { + probes.push((*probe).clone()); + } else { + event.insert(probe.name.clone(), vec![(*probe).clone()]); + } + } + + + } + } else { + unreachable!() + } + } + + // ================ + // = BehaviorTree = + // ================ + + fn visit_globals(&mut self, globals: &HashMap) { + if globals.len() > 0 { + self.tree.sequence(); + + // visit globals + for (_name, global) in globals.iter() { + if global.is_comp_provided { + if let Expr::VarId { name } = &global.var_name { + self.tree.define(self.context_name.clone(), + name.clone()); + } + } + } + self.tree.exit_sequence(); + } + } + + fn is_in_context(&self, pattern: &str) -> bool { + let regex = Regex::new(pattern).unwrap(); + if let Some(_caps) = regex.captures(self.context_name.as_str()) { + true + } else { + false + } + } + + fn visit_bytecode_package(&mut self, package: &Package) { + if package.events.len() > 0 { + self.tree.action_with_child(ActionWithChildType::EnterPackage { + package_name: package.name.clone() + }) + .decorator(DecoratorType::IsInstr { + instr_names: package.events.keys().cloned().collect(), + }); + for (_name, event) in package.events.iter() { + // just grab the first one and emit behavior (the decorator above is what + // makes this apply to all events) + self.visit_event(event); + break; + } + self.tree.exit_decorator(); + self.tree.exit_action_with_child(); + } + } + + fn visit_bytecode_event(&mut self, event: &Event) { + self.tree.sequence() + .enter_scope_of(self.context_name.clone(), ScopeType::Event); + + // Define globals + self.visit_globals(&event.globals); + + self.visit_probe_ty(event, "before"); + self.visit_probe_ty(event, "alt"); + self.visit_probe_ty(event, "after"); + + self.tree.exit_scope(); + self.tree.exit_sequence(); + } + + fn visit_probe_ty(&mut self, event: &Event, ty: &str) { + if let Some(probes) = event.probe_map.get(ty) { + if let Some(probe) = probes.get(0) { + // just grab the first one and emit behavior (the behavior includes a loop + // over all probes of this type) + self.visit_probe(probe); + } + } + } + + fn visit_bytecode_probe(&mut self, probe: &Probe) { + self.tree.fold_pred() + .fallback() + .decorator(PredIs { + val: false + }) + .force_success() + .exit_decorator() + .sequence() + .fallback() + .decorator(HasParams) + .save_params() + .exit_decorator() + .force_success() + .exit_fallback() + .fallback() + .decorator(PredIs { + val: true + }) + .sequence() + .fallback() + .decorator(DecoratorType::IsProbeType { + probe_type: "alt".to_string() + }) + .remove_orig() + .exit_decorator() + .force_success() + .exit_fallback() + .emit_body() + .emit_params_subtree() + .fallback() + .decorator(HasAltCall) + .emit_alt_call() + .exit_decorator() + .force_success() + .exit_fallback() + .exit_sequence() + .exit_decorator() + .fallback() + // before behavior + .decorator(DecoratorType::IsProbeType { + probe_type: "before".to_string() + }); + + self.emit_bytecode_probe_before_body(probe); + self.tree.exit_decorator() + // alt behavior + .decorator(DecoratorType::IsProbeType { + probe_type: "alt".to_string() + }); + self.emit_bytecode_probe_alt_body(probe); + self.tree.exit_decorator() + // after behavior + .decorator(DecoratorType::IsProbeType { + probe_type: "after".to_string() + }); + self.emit_bytecode_probe_after_body(probe); + self.tree.exit_decorator() + // exit + .exit_fallback() + .exit_fallback() + .exit_sequence() + .exit_fallback(); + } + + fn emit_bytecode_probe_before_body(&mut self, _probe: &Probe) { + self.tree.parameterized_action(ParamActionType::EmitIf { + cond: 0, + conseq: 1 + }) + .emit_pred() + .emit_body() + .exit_parameterized_action(); + } + + fn emit_bytecode_probe_alt_body(&mut self, _probe: &Probe) { + self.tree.sequence() + .remove_orig() + .parameterized_action(ParamActionType::EmitIfElse { + cond: 0, + conseq: 1, + alt: 2 + }) + .emit_pred() + .sequence() + .emit_body() + .fallback() + .decorator(HasAltCall) + .sequence() // TODO -- remove need for this (just have normal lib::() call syntax) + .emit_params_subtree() + .emit_alt_call() + .exit_sequence() + .exit_decorator() + .force_success() + .exit_fallback() + .exit_sequence() + .sequence() + .emit_params_subtree() + .emit_orig() + .exit_sequence() + .exit_parameterized_action() + .exit_sequence(); + } + + fn emit_bytecode_probe_after_body(&mut self, _probe: &Probe) { + self.tree.parameterized_action(ParamActionType::EmitIf { + cond: 0, + conseq: 1 + }) + .emit_pred() + .emit_body() + .exit_parameterized_action(); + } +} +impl WhammVisitor<()> for BehaviorTreeBuilder { + fn visit_whamm(&mut self, whamm: &Whamm) -> () { + trace!("Entering: BehaviorTreeBuilder::visit_whamm"); + self.context_name = "whamm".to_string(); + + self.tree.sequence(); + // .enter_scope(self.context_name.clone()); + + // visit globals + self.visit_globals(&whamm.globals); + + // visit whammys + whamm.whammys.iter().for_each(| whammy | self.visit_whammy(whammy)); + + // self.tree.exit_scope(); + + trace!("Exiting: BehaviorTreeBuilder::visit_whamm"); + self.tree.exit_sequence(); + // Remove from `context_name` + self.context_name = "".to_string(); + } + + fn visit_whammy(&mut self, whammy: &Whammy) -> () { + trace!("Entering: BehaviorTreeBuilder::visit_whammy"); + self.context_name += &format!(":{}", whammy.name.clone()); + + self.tree.enter_scope(self.context_name.clone(), whammy.name.clone()); + + // visit globals + self.visit_globals(&whammy.globals); + + whammy.providers.iter().for_each(| (_name, provider) | { + self.visit_provider(provider) + }); + + self.tree.exit_scope(); + + trace!("Exiting: BehaviorTreeBuilder::visit_whammy"); + // Remove from `context_name` + self.context_name = self.context_name[..self.context_name.rfind(":").unwrap()].to_string(); + } + + fn visit_provider(&mut self, provider: &Provider) -> () { + trace!("Entering: BehaviorTreeBuilder::visit_provider"); + self.context_name += &format!(":{}", provider.name.clone()); + self.add_provider_to_ast(provider.name.clone()); + + self.tree.enter_scope(self.context_name.clone(), provider.name.clone()); + + // visit globals + self.visit_globals(&provider.globals); + + provider.packages.iter().for_each(| (_name, package) | { + self.visit_package(package) + }); + + self.tree.exit_scope(); + + trace!("Exiting: BehaviorTreeBuilder::visit_provider"); + // Remove this package from `context_name` + self.context_name = self.context_name[..self.context_name.rfind(":").unwrap()].to_string(); + } + + fn visit_package(&mut self, package: &Package) -> () { + trace!("Entering: BehaviorTreeBuilder::visit_package"); + self.context_name += &format!(":{}", package.name.clone()); + self.add_package_to_ast(package.name.clone()); + + self.tree.enter_scope(self.context_name.clone(), package.name.clone()); + + if self.is_in_context(r"whamm:whammy([0-9]+):wasm:bytecode") { + self.visit_bytecode_package(package); + } else { + error!("Unsupported package: {}", package.name); + }; + + self.tree.exit_scope(); + + trace!("Exiting: BehaviorTreeBuilder::visit_package"); + // Remove this package from `context_name` + self.context_name = self.context_name[..self.context_name.rfind(":").unwrap()].to_string(); + } + + fn visit_event(&mut self, event: &Event) -> () { + trace!("Entering: BehaviorTreeBuilder::visit_event"); + self.context_name += &format!(":{}", event.name.clone()); + self.add_event_to_ast(event.name.clone()); + + if self.is_in_context(r"whamm:whammy([0-9]+):wasm:bytecode:(.*)") { + self.visit_bytecode_event(event); + } else { + error!("Unsupported event: {}", event.name); + }; + + trace!("Exiting: BehaviorTreeBuilder::visit_event"); + // Remove this event from `context_name` + self.context_name = self.context_name[..self.context_name.rfind(":").unwrap()].to_string(); + } + + fn visit_probe(&mut self, probe: &Probe) -> () { + trace!("Entering: BehaviorTreeBuilder::visit_probe"); + self.context_name += &format!(":{}", probe.name.clone()); + self.add_probe_to_ast(probe); + + if probe.name == "alt" { + self.tree.decorator(DecoratorType::ForFirstProbe { + target: probe.name.clone() + }); + } else { + self.tree.decorator(DecoratorType::ForEachProbe { + target: probe.name.clone() + }); + } + self.tree.sequence() + .enter_scope(self.context_name.clone(), probe.name.clone()); + + // visit globals + self.visit_globals(&probe.globals); + + if self.is_in_context(r"whamm:whammy([0-9]+):wasm:bytecode:(.*)") { + self.visit_bytecode_probe(probe); + } else { + error!("Unsupported probe: {}", self.context_name); + }; + + self.tree.exit_scope(); + + trace!("Exiting: BehaviorTreeBuilder::visit_probe"); + self.tree.exit_sequence() + .exit_decorator(); + // Remove this probe from `context_name` + self.context_name = self.context_name[..self.context_name.rfind(":").unwrap()].to_string(); + } + + fn visit_fn(&mut self, _f: &Fn) -> () { + unreachable!() + } + + fn visit_formal_param(&mut self, _param: &(Expr, DataType)) -> () { + unreachable!() + } + + fn visit_stmt(&mut self, _assign: &Statement) -> () { + // Not visiting event/probe bodies + unreachable!() + } + + fn visit_expr(&mut self, _call: &Expr) -> () { + // Not visiting predicates/statements + unreachable!() + } + + fn visit_op(&mut self, _op: &Op) -> () { + // Not visiting predicates/statements + unreachable!() + } + + fn visit_datatype(&mut self, _datatype: &DataType) -> () { + // Not visiting predicates/statements + unreachable!() + } + + fn visit_value(&mut self, _val: &Value) -> () { + // Not visiting predicates/statements + unreachable!() + } +} \ No newline at end of file diff --git a/src/behavior/tests.rs b/src/behavior/tests.rs new file mode 100644 index 00000000..0ffdd02f --- /dev/null +++ b/src/behavior/tests.rs @@ -0,0 +1 @@ +// TODO \ No newline at end of file diff --git a/src/behavior/tree.rs b/src/behavior/tree.rs new file mode 100644 index 00000000..51376aca --- /dev/null +++ b/src/behavior/tree.rs @@ -0,0 +1,638 @@ +use log::error; +use crate::verifier::types::ScopeType; + +#[derive(Debug)] +pub struct BehaviorTree { + pub nodes: Vec, + pub curr: usize, // indexes into this::nodes +} +impl BehaviorTree { + pub fn new() -> Self { + Self { + nodes: vec![ Node::Root { + id: 0, + child: 0 + }], + curr: 0 + } + } + + pub fn reset(&mut self) { + self.curr = 0; + } + + pub fn get_node(&self, idx: usize) -> Option<&Node> { + self.nodes.get(idx) + } + + pub fn get_node_mut(&mut self, idx: usize) -> Option<&mut Node> { + self.nodes.get_mut(idx) + } + + pub fn get_root(&self) -> Option<&Node>{ + self.get_node(0) + } + + pub fn get_curr(&self) -> Option<&Node> { + self.get_node(self.curr) + } + + pub fn get_curr_mut(&mut self) -> Option<&mut Node> { + self.get_node_mut(self.curr) + } + + // ================== + // ==== Control ===== + // ================== + + pub fn sequence(&mut self) -> &mut Self { + let id = self.nodes.len(); + self.put_child_and_enter(Node::Sequence { + id, + parent: self.curr, + children: vec![], + }); + self + } + + pub fn exit_sequence(&mut self) -> &mut Self { + match self.get_curr_mut() { + Some(Node::Sequence {parent, ..}) => { + self.curr = parent.clone() + }, + other => { + error!("Something went wrong, expected Sequence, but was: {:?}", other) + } + }; + self + } + + pub fn fallback(&mut self) -> &mut Self { + let id = self.nodes.len(); + self.put_child_and_enter(Node::Fallback { + id, + parent: self.curr, + children: vec![], + }); + self + } + + pub fn exit_fallback(&mut self) -> &mut Self { + match self.get_curr_mut() { + Some(Node::Fallback {parent, ..}) => { + self.curr = parent.clone() + }, + other => { + error!("Something went wrong, expected Fallback, but was: {:?}", other) + } + }; + self + } + + pub fn decorator(&mut self, ty: DecoratorType) -> &mut Self { + let id = self.nodes.len(); + self.put_child_and_enter(Node::Decorator { + id, + ty, + parent: self.curr, + child: 0, + }); + self + } + + pub fn exit_decorator(&mut self) -> &mut Self { + match self.get_curr_mut() { + Some(Node::Decorator {parent, ..}) => { + self.curr = parent.clone() + }, + other => { + error!("Something went wrong, expected Decorator, but was: {:?}", other) + } + }; + self + } + + pub fn action_with_child(&mut self, ty: ActionWithChildType) -> &mut Self { + let id = self.nodes.len(); + self.put_child_and_enter(Node::ActionWithChild { + id, + parent: self.curr, + ty, + child: 0, + }); + self + } + + pub fn exit_action_with_child(&mut self) -> &mut Self { + match self.get_curr_mut() { + Some(Node::ActionWithChild {parent, ..}) => { + self.curr = parent.clone() + }, + other => { + error!("Something went wrong, expected ActionWithChild, but was: {:?}", other) + } + }; + self + } + + pub fn parameterized_action(&mut self, ty: ParamActionType) -> &mut Self { + let id = self.nodes.len(); + self.put_child_and_enter(Node::ParameterizedAction { + id, + parent: self.curr, + ty, + children: vec![], + }); + self + } + + pub fn exit_parameterized_action(&mut self) -> &mut Self { + match self.get_curr_mut() { + Some(Node::ParameterizedAction {parent, ..}) => { + self.curr = parent.clone() + }, + other => { + error!("Something went wrong, expected ParameterizedAction, but was: {:?}", other) + } + }; + self + } + + // ================== + // ==== Actions ===== + // ================== + + fn add_action_as_param(&mut self, idx: usize, id: usize) { + match self.get_curr_mut() { + Some(Node::ParameterizedAction {ty, ..}) => { + match ty { + ParamActionType::EmitIf { cond, conseq } => { + if idx == 0 { + *cond = id; + } else if idx == 1 { + *conseq = id; + } else { + error!("Unexpected index for parameterized action (EmitIf): {}", idx); + } + }, + ParamActionType::EmitIfElse { cond, conseq, alt } => { + if idx == 0 { + *cond = id; + } else if idx == 1 { + *conseq = id; + }else if idx == 2 { + *alt = id; + } else { + error!("Unexpected index for parameterized action (EmitIfElse): {}", idx); + } + } + } + }, + _ => {} + }; + } + + pub fn define(&mut self, context: String, var_name: String) -> &mut Self { + let id = self.nodes.len(); + self.put_child(Node::Action { + id, + parent: self.curr, + ty: ActionType::Define { + context, + var_name + } + }); + self + } + + pub fn emit_body(&mut self) -> &mut Self { + let id = self.nodes.len(); + self.put_child(Node::Action { + id, + parent: self.curr, + ty: ActionType::EmitBody + }); + self + } + + pub fn emit_alt_call(&mut self) -> &mut Self { + let id = self.nodes.len(); + self.put_child(Node::Action { + id, + parent: self.curr, + ty: ActionType::EmitAltCall + }); + self + } + + pub fn emit_params(&mut self) -> &mut Self { + let id = self.nodes.len(); + self.put_child(Node::Action { + id, + parent: self.curr, + ty: ActionType::EmitParams + }); + self + } + + pub fn emit_params_subtree(&mut self) -> &mut Self { + self.fallback() + .decorator(DecoratorType::HasParams) + .emit_params() + .exit_decorator() + .force_success() + .exit_fallback() + } + + pub fn remove_orig(&mut self) -> &mut Self { + let id = self.nodes.len(); + self.put_child(Node::Action { + id, + parent: self.curr, + ty: ActionType::RemoveOrig + }); + self + } + + pub fn emit_orig(&mut self) -> &mut Self { + let id = self.nodes.len(); + self.put_child(Node::Action { + id, + parent: self.curr, + ty: ActionType::EmitOrig + }); + self + } + + pub fn emit_pred(&mut self) -> &mut Self { + let id = self.nodes.len(); + self.put_child(Node::Action { + id, + parent: self.curr, + ty: ActionType::EmitPred + }); + self + } + + pub fn enter_scope_of(&mut self, context_name: String, scope_ty: ScopeType) -> &mut Self { + let id = self.nodes.len(); + self.put_child(Node::Action { + id, + parent: self.curr, + ty: ActionType::EnterScopeOf { + context: context_name, + scope_ty + } + }); + self + } + + pub fn enter_scope(&mut self, context_name: String, scope_name: String) -> &mut Self { + let id = self.nodes.len(); + self.put_child(Node::Action { + id, + parent: self.curr, + ty: ActionType::EnterScope { + context: context_name, + scope_name + } + }); + self + } + + pub fn exit_scope(&mut self) -> &mut Self { + let id = self.nodes.len(); + self.put_child(Node::Action { + id, + parent: self.curr, + ty: ActionType::ExitScope + }); + self + } + + pub fn fold_pred(&mut self) -> &mut Self { + let id = self.nodes.len(); + self.put_child(Node::Action { + id, + parent: self.curr, + ty: ActionType::FoldPred + }); + self + } + + pub fn force_success(&mut self) -> &mut Self { + let id = self.nodes.len(); + self.put_child(Node::Action { + id, + parent: self.curr, + ty: ActionType::ForceSuccess + }); + self + } + + pub fn save_params(&mut self) -> &mut Self { + let id = self.nodes.len(); + self.put_child(Node::Action { + id, + parent: self.curr, + ty: ActionType::SaveParams + }); + self + } + + // ================== + // ==== Base Fns ==== + // ================== + + pub fn put_child(&mut self, node: Node) -> Option { + let mut assigned_id = None; + let new_id = self.nodes.len(); + + if let Some(curr) = self.get_curr_mut() { + match curr { + Node::Root { child, .. } => { + *child = new_id; + assigned_id = Some(new_id); + } + Node::Sequence { children, .. } => { + children.push(new_id); + assigned_id = Some(new_id); + } + Node::Decorator { child, .. } => { + *child = new_id; + assigned_id = Some(new_id); + } + Node::Fallback { children, .. } => { + children.push(new_id); + assigned_id = Some(new_id); + } + Node::ActionWithChild { child, .. } => { + *child = new_id; + assigned_id = Some(new_id); + } + Node::ParameterizedAction { children, .. } => { + let idx = children.len(); + children.push(new_id); + + self.add_action_as_param(idx, new_id); + assigned_id = Some(new_id); + } + _ => { + error!("Cannot add child to this Tree node type"); + } + } + } + if assigned_id.is_some() { + self.nodes.push(node); + } + assigned_id + } + + pub fn put_child_and_enter(&mut self, node: Node) -> bool { + if let Some(id) = self.put_child(node) { + self.curr = id; + } + false + } + + // For use as param passing (consider IfElse action) + pub fn put_floating_child(&mut self, node: Node) -> usize { + let new_id = self.nodes.len(); + self.nodes.push(node); + new_id + } + + pub fn exit_child(&mut self) { + match self.get_curr_mut() { + Some(Node::Sequence {parent, ..}) | + Some(Node::Fallback {parent, ..}) => { + self.curr = parent.clone() + }, + Some(Node::Decorator {parent, ..}) => { + self.curr = parent.clone() + } + _ => { + error!("Attempted to exit current scope, but there was no parent to exit into.") + } + } + } +} + +#[derive(Debug)] +pub enum Node { + Root { + id: usize, + child: usize + }, + Sequence { + id: usize, + parent: usize, + children: Vec + }, + Decorator { + id: usize, + ty: DecoratorType, + parent: usize, + child: usize + }, + Fallback { + id: usize, + parent: usize, + children: Vec + }, + ActionWithChild { + id: usize, + ty: ActionWithChildType, + parent: usize, + child: usize + }, + ParameterizedAction { + id: usize, + parent: usize, + ty: ParamActionType, + children: Vec + }, + Action { + id: usize, + parent: usize, + ty: ActionType + } +} + +#[derive(Debug)] +pub enum DecoratorType { + IsInstr { + instr_names: Vec + }, + IsProbeType { + probe_type: String + }, + HasParams, + HasAltCall, + PredIs { + val: bool + }, + /// Iterates over all probes of the specified name in the list. + ForEachProbe { + target: String + }, + /// Only pulls the first probe of the specified name from the list. + ForFirstProbe { + target: String + } +} + +#[derive(Debug)] +pub enum ActionType { + EnterScope { + context: String, + scope_name: String + }, + EnterScopeOf { + context: String, + scope_ty: ScopeType + }, + ExitScope, + Define { + context: String, + var_name: String + }, + EmitPred, + FoldPred, + Reset, + SaveParams, + EmitParams, + EmitBody, + EmitAltCall, + RemoveOrig, + EmitOrig, + ForceSuccess +} + +#[derive(Debug)] +pub enum ActionWithChildType { + EnterPackage { + package_name: String + }, +} + +#[derive(Debug)] +pub enum ParamActionType { + EmitIf { + cond: usize, + conseq: usize + }, + EmitIfElse { + cond: usize, + conseq: usize, + alt: usize + } +} + +pub trait BehaviorVisitor { + // Abstracted visit fn + fn visit_node(&mut self, node: &Node) -> T { + match node { + Node::Root { .. } => self.visit_root(node), + Node::Sequence { .. } => self.visit_sequence(node), + Node::Decorator { .. } => self.visit_decorator(node), + Node::Fallback { .. } => self.visit_fallback(node), + Node::ActionWithChild { .. } => self.visit_action_with_child(node), + Node::ParameterizedAction { .. } => self.visit_parameterized_action(node), + Node::Action { .. } => self.visit_action(node), + } + } + fn visit_root(&mut self, node: &Node) -> T; + + // Control nodes + fn visit_sequence(&mut self, node: &Node) -> T; + fn visit_decorator(&mut self, node: &Node) -> T { + if let Node::Decorator { ty, ..} = node { + match ty { + DecoratorType::IsInstr {..} => self.visit_is_instr(node), + DecoratorType::IsProbeType {..} => self.visit_is_probe_type(node), + DecoratorType::HasAltCall {..} => self.visit_has_alt_call(node), + DecoratorType::HasParams {..} => self.visit_has_params(node), + DecoratorType::PredIs {..} => self.visit_pred_is(node), + DecoratorType::ForEachProbe {..} => self.visit_for_each_probe(node), + DecoratorType::ForFirstProbe {..} => self.visit_for_first_probe(node), + } + } else { + unreachable!() + } + } + fn visit_fallback(&mut self, node: &Node) -> T; + fn visit_action_with_child(&mut self, node: &Node) -> T { + if let Node::ActionWithChild { ty, ..} = node { + match ty { + ActionWithChildType::EnterPackage {..} => self.visit_enter_package(node), + } + } else { + unreachable!() + } + } + fn visit_parameterized_action(&mut self, node: &Node) -> T { + if let Node::ParameterizedAction { ty, ..} = node { + match ty { + ParamActionType::EmitIfElse {..} => self.visit_emit_if_else(node), + ParamActionType::EmitIf {..} => self.visit_emit_if(node) + } + } else { + unreachable!() + } + } + + // Decorator nodes + fn visit_is_instr(&mut self, node: &Node) -> T; + fn visit_is_probe_type(&mut self, node: &Node) -> T; + fn visit_has_alt_call(&mut self, node: &Node) -> T; + fn visit_has_params(&mut self, node: &Node) -> T; + fn visit_pred_is(&mut self, node: &Node) -> T; + fn visit_for_each_probe(&mut self, node: &Node) -> T; + fn visit_for_first_probe(&mut self, node: &Node) -> T; + + // Action with child nodes + fn visit_enter_package(&mut self, node: &Node) -> T; + + // Parameterized action nodes + fn visit_emit_if_else(&mut self, node: &Node) -> T; + fn visit_emit_if(&mut self, node: &Node) -> T; + + // Action nodes + fn visit_action(&mut self, node: &Node) -> T { + if let Node::Action { ty, ..} = node { + match ty { + ActionType::EnterScope {..} => self.visit_enter_scope(node), + ActionType::EnterScopeOf {..} => self.visit_enter_scope_of(node), + ActionType::ExitScope {..} => self.visit_exit_scope(node), + ActionType::Define {..} => self.visit_define(node), + ActionType::EmitPred {..} => self.visit_emit_pred(node), + ActionType::FoldPred {..} => self.visit_fold_pred(node), + ActionType::Reset {..} => self.visit_reset(node), + ActionType::SaveParams {..} => self.visit_save_params(node), + ActionType::EmitParams {..} => self.visit_emit_params(node), + ActionType::EmitBody {..} => self.visit_emit_body(node), + ActionType::EmitAltCall {..} => self.visit_emit_alt_call(node), + ActionType::RemoveOrig {..} => self.visit_remove_orig(node), + ActionType::EmitOrig {..} => self.visit_emit_orig(node), + ActionType::ForceSuccess {..} => self.visit_force_success(node), + } + } else { + unreachable!() + } + } + fn visit_enter_scope(&mut self, node: &Node) -> T; + fn visit_enter_scope_of(&mut self, node: &Node) -> T; + fn visit_exit_scope(&mut self, node: &Node) -> T; + fn visit_define(&mut self, node: &Node) -> T; + fn visit_emit_pred(&mut self, node: &Node) -> T; + fn visit_fold_pred(&mut self, node: &Node) -> T; + fn visit_reset(&mut self, node: &Node) -> T; + fn visit_save_params(&mut self, node: &Node) -> T; + fn visit_emit_params(&mut self, node: &Node) -> T; + fn visit_emit_body(&mut self, node: &Node) -> T; + fn visit_emit_alt_call(&mut self, node: &Node) -> T; + fn visit_remove_orig(&mut self, node: &Node) -> T; + fn visit_emit_orig(&mut self, node: &Node) -> T; + fn visit_force_success(&mut self, node: &Node) -> T; +} diff --git a/src/behavior/visualize.rs b/src/behavior/visualize.rs new file mode 100644 index 00000000..1dc478e0 --- /dev/null +++ b/src/behavior/visualize.rs @@ -0,0 +1,529 @@ +use std::io::Result; +use std::path::PathBuf; +use graphviz_rust::cmd::{CommandArg, Format}; +use graphviz_rust::exec; +use graphviz_rust::dot_structures::{Attribute, Edge, EdgeTy, Graph, Id, Node, NodeId, Stmt, Vertex}; +use graphviz_rust::dot_generator::{attr, edge, graph, id, node, node_id, stmt}; +use graphviz_rust::printer::PrinterContext; +use crate::behavior::tree::{ActionType, ActionWithChildType, BehaviorTree, BehaviorVisitor, DecoratorType, Node as TreeNode, ParamActionType}; + +pub fn visualization_to_file(tree: &BehaviorTree, path: PathBuf) -> Result> { + let graph = visualize(tree); + let p = path.to_str().unwrap(); + + let res = exec( + graph, + &mut PrinterContext::default(), + vec![Format::Svg.into(), CommandArg::Output(p.to_string())] + ); + match &res { + Err(e) => { + println!("{}", e.to_string()); + } + _ => {} + } + res +} + +fn visualize(tree: &BehaviorTree) -> Graph { + let mut visualizer = Visualizer { + tree, + graph: graph!(strict di id!("")), + is_param_action: false, + param_label: None + }; + if let Some(root) = tree.get_root() { + visualizer.visit_root(root); + } + + visualizer.graph +} + +const CONTROL_NODE_COLOR: &str = "dimgray"; +const DECORATOR_NODE_COLOR: &str = "darkseagreen"; +const ACTION_NODE_COLOR: &str = "indianred"; +const SPECIAL_ACTION_NODE_COLOR: &str = "maroon"; + +struct Visualizer<'a> { + tree: &'a BehaviorTree, + graph: Graph, + is_param_action: bool, + param_label: Option +} + +impl Visualizer<'_> { + + // =============== + // ==== NODES ==== + // =============== + + fn emit_node(&mut self, id: &usize, label: &str, color: &str) { + self.graph.add_stmt(stmt!( + node!(id; + attr!("label", label), + attr!("style", "filled"), + attr!("color", color), + attr!("fontcolor", "white") + ) + )); + } + fn emit_control_node(&mut self, id: &usize, label: &str) { + self.emit_node(id, label, CONTROL_NODE_COLOR); + } + fn emit_decorator_node(&mut self, id: &usize, label: &str) { + self.emit_node(id, label, DECORATOR_NODE_COLOR); + } + fn emit_action_node(&mut self, id: &usize, label: &str) { + self.emit_node(id, label, ACTION_NODE_COLOR); + } + fn emit_special_action_node(&mut self, id: &usize, label: &str) { + self.emit_node(id, label, SPECIAL_ACTION_NODE_COLOR); + } + + // =============== + // ==== EDGES ==== + // =============== + + fn emit_labeled_edge(&mut self, from: &usize, to: &usize) { + if let Some(label) = &self.param_label { + self.graph.add_stmt(stmt!( + edge!(node_id!(from) => node_id!(to); + attr!("label", label) + ) + )); + } + } + + fn emit_edge(&mut self, from: &usize, to: &usize) { + if self.is_param_action { + self.emit_labeled_edge(from, to); + + // reset + self.is_param_action = false; + self.param_label = None; + } else { + self.graph.add_stmt(stmt!( + edge!(node_id!(from) => node_id!(to)) + )); + } + } +} +impl BehaviorVisitor<()> for Visualizer<'_> { + fn visit_root(&mut self, node: &TreeNode) -> () { + if let TreeNode::Root { id, child } = node { + self.emit_control_node(id, "root"); + + if let Some(node) = self.tree.get_node(child.clone()) { + self.visit_node(node); + } + } else { + unreachable!() + } + } + + fn visit_sequence(&mut self, node: &TreeNode) -> () { + if let TreeNode::Sequence { id, parent, children } = node { + self.emit_control_node(id, "sequence"); + self.emit_edge(parent, id); + + for child in children { + if let Some(node) = self.tree.get_node(child.clone()) { + self.visit_node(node); + } + } + } else { + unreachable!() + } + } + + fn visit_fallback(&mut self, node: &TreeNode) -> () { + if let TreeNode::Fallback { id, parent, children } = node { + self.emit_control_node(id, "fallback"); + self.emit_edge(parent, id); + + for child in children { + if let Some(node) = self.tree.get_node(child.clone()) { + self.visit_node(node); + } + } + } else { + unreachable!() + } + } + + fn visit_is_instr(&mut self, node: &TreeNode) -> () { + if let TreeNode::Decorator { id, ty, parent, child } = node { + if let DecoratorType::IsInstr {instr_names} = ty { + let mut names = "".to_string(); + for name in instr_names { + if names.is_empty() { + names.push_str(name); + } else { + names.push_str(&format!("OR{name}")); + } + } + self.emit_decorator_node(id, &format!("IsInstr_{}", names)); + self.emit_edge(parent, id); + + if let Some(node) = self.tree.get_node(child.clone()) { + self.visit_node(node); + } + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_is_probe_type(&mut self, node: &TreeNode) -> () { + if let TreeNode::Decorator { id, ty, parent, child } = node { + if let DecoratorType::IsProbeType {probe_type} = ty { + self.emit_decorator_node(id, &format!("IsProbeType_{}", probe_type.replace(":", "_"))); + self.emit_edge(parent, id); + + if let Some(node) = self.tree.get_node(child.clone()) { + self.visit_node(node); + } + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_has_alt_call(&mut self, node: &TreeNode) -> () { + if let TreeNode::Decorator { id, ty, parent, child } = node { + if let DecoratorType::HasAltCall = ty { + self.emit_decorator_node(id, "HasAltCall"); + self.emit_edge(parent, id); + + if let Some(node) = self.tree.get_node(child.clone()) { + self.visit_node(node); + } + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_has_params(&mut self, node: &TreeNode) -> () { + if let TreeNode::Decorator { id, ty, parent, child } = node { + if let DecoratorType::HasParams = ty { + self.emit_decorator_node(id, "HasParams"); + self.emit_edge(parent, id); + + if let Some(node) = self.tree.get_node(child.clone()) { + self.visit_node(node); + } + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_pred_is(&mut self, node: &TreeNode) -> () { + if let TreeNode::Decorator { id, ty, parent, child } = node { + if let DecoratorType::PredIs{ val } = ty { + self.emit_decorator_node(id, &format!("PredIs_{}", val)); + self.emit_edge(parent, id); + + if let Some(node) = self.tree.get_node(child.clone()) { + self.visit_node(node); + } + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_for_each_probe(&mut self, node: &TreeNode) -> () { + if let TreeNode::Decorator { id, ty, parent, child } = node { + if let DecoratorType::ForEachProbe { target } = ty { + self.emit_decorator_node(id, &format!("ForEachProbe_{}", target.replace(":", "_"))); + self.emit_edge(parent, id); + + if let Some(node) = self.tree.get_node(child.clone()) { + self.visit_node(node); + } + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_for_first_probe(&mut self, node: &TreeNode) -> () { + if let TreeNode::Decorator { id, ty, parent, child } = node { + if let DecoratorType::ForFirstProbe { target } = ty { + self.emit_decorator_node(id, &format!("ForFirstProbe_{}", target.replace(":", "_"))); + self.emit_edge(parent, id); + + if let Some(node) = self.tree.get_node(child.clone()) { + self.visit_node(node); + } + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_enter_package(&mut self, node: &TreeNode) -> () { + if let TreeNode::ActionWithChild { id, ty, parent, child } = node { + let ActionWithChildType::EnterPackage { package_name } = ty; + self.emit_special_action_node(id, &format!("EnterPackage_{}", package_name.replace(":", "_"))); + self.emit_edge(parent, id); + + if let Some(node) = self.tree.get_node(child.clone()) { + self.visit_node(node); + } + } else { + unreachable!() + } + } + + fn visit_emit_if_else(&mut self, node: &TreeNode) -> () { + if let TreeNode::ParameterizedAction { id, parent, ty, .. } = node { + if let ParamActionType::EmitIfElse { cond, conseq, alt } = ty { + self.emit_special_action_node(id, "EmitIfElse"); + self.emit_edge(parent, id); + + self.is_param_action = true; + self.param_label = Some("cond".to_string()); + if let Some(node) = self.tree.get_node(cond.clone()) { + self.visit_node(node); + } + self.is_param_action = true; + self.param_label = Some("conseq".to_string()); + if let Some(node) = self.tree.get_node(conseq.clone()) { + self.visit_node(node); + } + self.is_param_action = true; + self.param_label = Some("alt".to_string()); + if let Some(node) = self.tree.get_node(alt.clone()) { + self.visit_node(node); + } + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_emit_if(&mut self, node: &TreeNode) -> () { + if let TreeNode::ParameterizedAction { id, parent, ty, .. } = node { + if let ParamActionType::EmitIf { cond, conseq } = ty { + self.emit_special_action_node(id, "EmitIf"); + self.emit_edge(parent, id); + + self.is_param_action = true; + self.param_label = Some("cond".to_string()); + if let Some(node) = self.tree.get_node(cond.clone()) { + self.visit_node(node); + } + self.is_param_action = true; + self.param_label = Some("conseq".to_string()); + if let Some(node) = self.tree.get_node(conseq.clone()) { + self.visit_node(node); + } + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_enter_scope(&mut self, node: &TreeNode) -> () { + if let TreeNode::Action { id, ty, parent} = node { + if let ActionType::EnterScope{ scope_name, .. } = ty { + self.emit_action_node(id, &format!("EnterScope_{}", scope_name.replace(":", "_"))); + self.emit_edge(parent, id); + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_enter_scope_of(&mut self, node: &TreeNode) -> () { + if let TreeNode::Action { id, ty, parent} = node { + if let ActionType::EnterScopeOf{ scope_ty, .. } = ty { + self.emit_action_node(id, &format!("EnterScopeOf_{}", scope_ty.to_string())); + self.emit_edge(parent, id); + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_exit_scope(&mut self, node: &TreeNode) -> () { + if let TreeNode::Action { id, ty, parent} = node { + if let ActionType::ExitScope = ty { + self.emit_action_node(id, "ExitScope"); + self.emit_edge(parent, id); + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_define(&mut self, node: &TreeNode) -> () { + if let TreeNode::Action { id, ty, parent} = node { + if let ActionType::Define {var_name, ..} = ty { + self.emit_action_node(id, &format!("Define_{}", var_name.replace(":", "_"))); + self.emit_edge(parent, id); + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_emit_pred(&mut self, node: &TreeNode) -> () { + if let TreeNode::Action { id, ty, parent} = node { + if let ActionType::EmitPred = ty { + self.emit_action_node(id, "EmitPred"); + self.emit_edge(parent, id); + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_fold_pred(&mut self, node: &TreeNode) -> () { + if let TreeNode::Action { id, ty, parent} = node { + if let ActionType::FoldPred = ty { + self.emit_action_node(id, "FoldPred"); + self.emit_edge(parent, id); + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_reset(&mut self, node: &TreeNode) -> () { + if let TreeNode::Action { id, ty, parent} = node { + if let ActionType::Reset = ty { + self.emit_action_node(id, "Reset"); + self.emit_edge(parent, id); + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_save_params(&mut self, node: &TreeNode) -> () { + if let TreeNode::Action { id, ty, parent} = node { + if let ActionType::SaveParams = ty { + self.emit_action_node(id, "SaveParams"); + self.emit_edge(parent, id); + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_emit_params(&mut self, node: &TreeNode) -> () { + if let TreeNode::Action { id, ty, parent} = node { + if let ActionType::EmitParams = ty { + self.emit_action_node(id, "EmitParams"); + self.emit_edge(parent, id); + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_emit_body(&mut self, node: &TreeNode) -> () { + if let TreeNode::Action { id, ty, parent} = node { + if let ActionType::EmitBody = ty { + self.emit_action_node(id, "EmitBody"); + self.emit_edge(parent, id); + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_emit_alt_call(&mut self, node: &TreeNode) -> () { + if let TreeNode::Action { id, ty, parent} = node { + if let ActionType::EmitAltCall = ty { + self.emit_action_node(id, "EmitAltCall"); + self.emit_edge(parent, id); + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_remove_orig(&mut self, node: &TreeNode) -> () { + if let TreeNode::Action { id, ty, parent} = node { + if let ActionType::RemoveOrig = ty { + self.emit_action_node(id, "RemoveOrig"); + self.emit_edge(parent, id); + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_emit_orig(&mut self, node: &TreeNode) -> () { + if let TreeNode::Action { id, ty, parent} = node { + if let ActionType::EmitOrig = ty { + self.emit_action_node(id, "EmitOrig"); + self.emit_edge(parent, id); + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_force_success(&mut self, node: &TreeNode) -> () { + if let TreeNode::Action { id, ty, parent} = node { + if let ActionType::ForceSuccess = ty { + self.emit_action_node(id, "ForceSuccess"); + self.emit_edge(parent, id); + } else { + unreachable!() + } + } else { + unreachable!() + } + } +} diff --git a/src/generator.rs b/src/generator.rs index 63721503..f2976df8 100644 --- a/src/generator.rs +++ b/src/generator.rs @@ -1,4 +1,5 @@ -pub mod code_generator; +pub mod init_generator; +pub mod instr_generator; pub mod types; pub mod emitters; diff --git a/src/generator/code_generator.rs b/src/generator/code_generator.rs deleted file mode 100644 index a9cac431..00000000 --- a/src/generator/code_generator.rs +++ /dev/null @@ -1,238 +0,0 @@ -// ======================= -// ==== CodeGenerator ==== -// ======================= - -use std::collections::HashMap; -use log::trace; -use crate::generator::emitters::Emitter; -use crate::parser::types::{DataType, Whammy, Whamm, WhammVisitorMut, Expr, Function, Module, Op, Probe, Provider, Statement, Value}; - -/// The code generator traverses the AST and calls the passed emitter to -/// emit some instruction/code/function/etc. -/// This process should ideally be generic, made to perform a specific -/// instrumentation technique by the Emitter field. -pub struct CodeGenerator { - pub emitter: Box, - pub context_name: String -} -impl CodeGenerator { - pub fn new(emitter: Box) -> Self { - Self { - emitter, - context_name: "".to_string() - } - } - pub fn generate(&mut self, whamm: &mut Whamm) -> bool { - self.visit_whamm(whamm) - } - pub fn dump_to_file(&mut self, output_wasm_path: String) -> bool { - self.emitter.dump_to_file(output_wasm_path) - } - - // Private helper functions - fn visit_globals(&mut self, globals: &HashMap)>) -> bool { - let mut is_success = true; - for (name, (ty, _expr, val)) in globals.iter() { - is_success &= self.emitter.emit_global(name.clone(), ty.clone(), val); - } - - is_success - } -} -impl WhammVisitorMut for CodeGenerator { - fn visit_whamm(&mut self, whamm: &mut Whamm) -> bool { - trace!("Entering: CodeGenerator::visit_whamm"); - self.context_name = "whamm".to_string(); - let mut is_success = self.emitter.emit_whamm(whamm); - - // visit fns - whamm.fns.iter_mut().for_each(| f | { - is_success &= self.visit_fn(f); - }); - // DO NOT inject globals (used by compiler) - // inject whammys - whamm.whammys.iter_mut().for_each(|whammy| { - is_success &= self.visit_whammy(whammy); - }); - - trace!("Exiting: CodeGenerator::visit_whamm"); - // Remove from `context_name` - self.context_name = "".to_string(); - is_success - } - - fn visit_whammy(&mut self, whammy: &mut Whammy) -> bool { - trace!("Entering: CodeGenerator::visit_whammy"); - self.emitter.enter_scope(); - self.context_name += &format!(":{}", whammy.name.clone()); - let mut is_success = self.emitter.emit_whammy(whammy); - - // visit fns - whammy.fns.iter_mut().for_each(| f | { - is_success &= self.visit_fn(f); - }); - // inject globals - is_success &= self.visit_globals(&whammy.globals); - // inject providers - whammy.providers.iter_mut().for_each(|(_name, provider)| { - is_success &= self.visit_provider(provider); - }); - - trace!("Exiting: CodeGenerator::visit_whammy"); - self.emitter.exit_scope(); - // Remove from `context_name` - self.context_name = self.context_name[..self.context_name.rfind(":").unwrap()].to_string(); - is_success - } - - fn visit_provider(&mut self, provider: &mut Provider) -> bool { - trace!("Entering: CodeGenerator::visit_provider"); - self.emitter.enter_scope(); - self.context_name += &format!(":{}", provider.name.clone()); - let mut is_success = true; - - // visit fns - provider.fns.iter_mut().for_each(| f | { - is_success &= self.visit_fn(f); - }); - // DO NOT inject globals (used by compiler) - // inject module fns/globals - provider.modules.iter_mut().for_each(|(_name, module)| { - is_success &= self.visit_module(module); - }); - - // At this point we've traversed the entire tree to generate necessary - // globals and fns! - // Now, we emit_provider which will do the actual instrumentation step! - // TODO -- this isn't flexible at all...need to visit with the generator to help generalize - // the visiting logic - self.emitter.reset_children(); - is_success &= self.emitter.emit_provider(&self.context_name, provider); - - trace!("Exiting: CodeGenerator::visit_provider"); - self.emitter.exit_scope(); - // Remove this module from `context_name` - self.context_name = self.context_name[..self.context_name.rfind(":").unwrap()].to_string(); - is_success - } - - fn visit_module(&mut self, module: &mut Module) -> bool { - trace!("Entering: CodeGenerator::visit_module"); - self.emitter.enter_scope(); - let mut is_success = true; - self.context_name += &format!(":{}", module.name.clone()); - - // visit fns - module.fns.iter_mut().for_each(| f | { - is_success &= self.visit_fn(f); - }); - // DO NOT inject globals (used by compiler) - // inject function fns/globals - module.functions.iter_mut().for_each(|(_name, function)| { - is_success &= self.visit_function(function); - }); - - trace!("Exiting: CodeGenerator::visit_module"); - self.emitter.exit_scope(); - // Remove this module from `context_name` - self.context_name = self.context_name[..self.context_name.rfind(":").unwrap()].to_string(); - is_success - } - - fn visit_function(&mut self, function: &mut Function) -> bool { - trace!("Entering: CodeGenerator::visit_function"); - self.emitter.enter_scope(); - // let mut is_success = self.emitter.emit_function(function); - self.context_name += &format!(":{}", function.name.clone()); - let mut is_success = true; - - // visit fns - function.fns.iter_mut().for_each(| f | { - is_success &= self.visit_fn(f); - }); - // DO NOT inject globals (used by compiler) - // inject probe fns/globals - function.probe_map.iter_mut().for_each(|(_name, probes)| { - probes.iter_mut().for_each(|probe| { - is_success &= self.visit_probe(probe); - }); - }); - - trace!("Exiting: CodeGenerator::visit_function"); - self.emitter.exit_scope(); - // Remove this function from `context_name` - self.context_name = self.context_name[..self.context_name.rfind(":").unwrap()].to_string(); - is_success - } - - fn visit_probe(&mut self, probe: &mut Probe) -> bool { - trace!("Entering: CodeGenerator::visit_probe"); - self.emitter.enter_scope(); - // let mut is_success = self.emitter.emit_probe(probe); - self.context_name += &format!(":{}", probe.name.clone()); - let mut is_success = true; - - // visit fns - probe.fns.iter_mut().for_each(| f | { - is_success &= self.visit_fn(f); - }); - // DO NOT inject globals (used by compiler) - - trace!("Exiting: CodeGenerator::visit_probe"); - self.emitter.exit_scope(); - // Remove this probe from `context_name` - self.context_name = self.context_name[..self.context_name.rfind(":").unwrap()].to_string(); - is_success - } - - fn visit_fn(&mut self, f: &mut crate::parser::types::Fn) -> bool { - trace!("Entering: CodeGenerator::visit_fn"); - self.emitter.enter_scope(); - let is_success = self.emitter.emit_fn(&self.context_name, f); - trace!("Exiting: CodeGenerator::visit_fn"); - self.emitter.exit_scope(); - is_success - } - - fn visit_formal_param(&mut self, param: &mut (Expr, DataType)) -> bool { - trace!("Entering: CodeGenerator::visit_formal_param"); - let is_success = self.emitter.emit_formal_param(param); - trace!("Exiting: CodeGenerator::visit_formal_param"); - is_success - } - - fn visit_stmt(&mut self, stmt: &mut Statement) -> bool { - trace!("Entering: CodeGenerator::visit_stmt"); - let is_success = self.emitter.emit_stmt(stmt); - trace!("Exiting: CodeGenerator::visit_stmt"); - is_success - } - - fn visit_expr(&mut self, expr: &mut Expr) -> bool { - trace!("Entering: CodeGenerator::visit_expr"); - let is_success = self.emitter.emit_expr(expr); - trace!("Exiting: CodeGenerator::visit_expr"); - is_success - } - - fn visit_op(&mut self, op: &mut Op) -> bool { - trace!("Entering: CodeGenerator::visit_op"); - let is_success = self.emitter.emit_op(op); - trace!("Exiting: CodeGenerator::visit_op"); - is_success - } - - fn visit_datatype(&mut self, datatype: &mut DataType) -> bool { - trace!("Entering: CodeGenerator::visit_datatype"); - let is_success = self.emitter.emit_datatype(datatype); - trace!("Exiting: CodeGenerator::visit_datatype"); - is_success - } - - fn visit_value(&mut self, val: &mut Value) -> bool { - trace!("Entering: CodeGenerator::visit_value"); - let is_success = self.emitter.emit_value(val); - trace!("Exiting: CodeGenerator::visit_value"); - is_success - } -} \ No newline at end of file diff --git a/src/generator/emitters.rs b/src/generator/emitters.rs index 251eaab0..33d0482e 100644 --- a/src/generator/emitters.rs +++ b/src/generator/emitters.rs @@ -1,11 +1,11 @@ -use std::collections::HashMap; use std::process::exit; -use log::{error, info, warn}; +use log::{debug, error, info}; use regex::Regex; -use walrus::{ActiveData, ActiveDataLocation, DataKind, FunctionBuilder, FunctionId, FunctionKind, ImportedFunction, InstrLocId, InstrSeqBuilder, LocalFunction, MemoryId, ModuleData, ValType}; +use walrus::{ActiveData, ActiveDataLocation, DataKind, FunctionBuilder, FunctionId, FunctionKind, + ImportedFunction, InstrSeqBuilder, LocalFunction, MemoryId, ModuleData, ValType}; use walrus::ir::{BinaryOp, ExtendedLoad, Instr, InstrSeqId, LoadKind, MemArg}; use crate::generator::types::ExprFolder; -use crate::parser::types::{DataType, Whammy, Whamm, Expr, Fn, Function, Module, Op, Probe, Provider, Statement, Value}; +use crate::parser::types::{DataType, Expr, Fn, Op, Statement, Value}; use crate::verifier::types::{Record, SymbolTable, VarAddr}; // ================================================= @@ -14,24 +14,41 @@ use crate::verifier::types::{Record, SymbolTable, VarAddr}; pub trait Emitter { fn enter_scope(&mut self); + fn enter_named_scope(&mut self, scope_name: &String) -> bool; fn exit_scope(&mut self); fn reset_children(&mut self); - fn emit_whamm(&mut self, whamm: &Whamm) -> bool; - fn emit_whammy(&mut self, whammy: &Whammy) -> bool; - fn emit_provider(&mut self, context: &str, provider: &mut Provider) -> bool; - - // TODO -- should emit module/function/probe be private? - fn emit_module(&mut self, context: &str, module: &mut Module) -> bool; + fn has_next_instr(&self) -> bool; + fn next_instr(&mut self) -> bool; + fn curr_instr_is_of_type(&mut self, instr_names: &Vec) -> bool; + fn curr_instr_type(&mut self) -> String; + fn has_params(&mut self) -> bool; + fn save_params(&mut self) -> bool; + fn emit_params(&mut self) -> bool; + fn define_compiler_var(&mut self, context: &String, var_name: &String) -> bool; + // fn emit_event(&mut self, context: &str, event: &mut Event) -> bool; + fn fold_expr(&mut self, expr: &mut Expr) -> bool; + fn emit_expr(&mut self, expr: &mut Expr) -> bool; fn emit_fn(&mut self, context_name: &str, f: &Fn) -> bool; fn emit_formal_param(&mut self, param: &(Expr, DataType)) -> bool; fn emit_global(&mut self, name: String, ty: DataType, val: &Option) -> bool; - fn emit_stmt(&mut self, stmt: &Statement) -> bool; - fn emit_expr(&mut self, expr: &Expr) -> bool; - fn emit_op(&mut self, op: &Op) -> bool; - fn emit_datatype(&mut self, datatype: &DataType) -> bool; - fn emit_value(&mut self, val: &Value) -> bool; + fn remove_orig(&mut self) -> bool; + fn emit_orig(&mut self) -> bool; + fn emit_if(&mut self) -> bool; + fn emit_if_else(&mut self) -> bool; + /// Will configure the emitter to emit subsequent expression as the condition of an if or if/else stmt + fn emit_condition(&mut self) -> bool; + /// Will configure the emitter to emit subsequent statements into the consequent body of an if or if/else stmt + fn emit_consequent(&mut self) -> bool; + /// Will configure the emitter to emit subsequent statements into the alternate body of an if/else stmt + fn emit_alternate(&mut self) -> bool; + /// Will configure the emitter to emit subsequent statements in the outer block of some branching logic + fn finish_branch(&mut self) -> bool; + fn emit_body(&mut self, body: &mut Vec) -> bool; + fn has_alt_call(&mut self) -> bool; // TODO -- remove need for this + fn emit_alt_call(&mut self) -> bool; // TODO -- remove need for this + fn emit_stmt(&mut self, stmt: &mut Statement) -> bool; fn dump_to_file(&mut self, output_wasm_path: String) -> bool; } @@ -46,17 +63,8 @@ pub trait Emitter { // ================================================================================= // ================================================================================= -fn emit_body(table: &mut SymbolTable, module_data: &mut ModuleData, mem_id: &MemoryId, curr_mem_offset: &mut u32, - body: &mut Vec, instr_builder: &mut InstrSeqBuilder, index: &mut usize) -> bool { - let mut is_success = true; - body.iter_mut().for_each(|stmt| { - is_success &= emit_stmt(table, module_data, mem_id, curr_mem_offset, stmt, instr_builder, index) - }); - is_success -} - -fn emit_stmt(table: &mut SymbolTable, module_data: &mut ModuleData, mem_id: &MemoryId, curr_mem_offset: &mut u32, - stmt: &mut Statement, instr_builder: &mut InstrSeqBuilder, index: &mut usize) -> bool { +fn emit_stmt(table: &mut SymbolTable, module_data: &mut ModuleData, stmt: &mut Statement, + instr_builder: &mut InstrSeqBuilder, metadata: &mut InsertionMetadata, index: &mut usize) -> bool { let mut is_success = true; match stmt { Statement::Assign { var_id, expr } => { @@ -90,7 +98,7 @@ fn emit_stmt(table: &mut SymbolTable, module_data: &mut ModuleData, mem_id: &Mem false } } else { - is_success &= emit_expr(table, module_data, mem_id, curr_mem_offset, expr, instr_builder, index); + is_success &= emit_expr(table, module_data, expr, instr_builder, metadata, index); return if let Expr::VarId { name } = var_id { let var_rec_id = match table.lookup(name) { @@ -145,18 +153,19 @@ fn emit_stmt(table: &mut SymbolTable, module_data: &mut ModuleData, mem_id: &Mem } } Statement::Expr { expr } => { - is_success &= emit_expr(table, module_data, mem_id, curr_mem_offset, expr, instr_builder, index); + is_success &= emit_expr(table, module_data, expr, instr_builder, metadata, index); } } is_success } -fn emit_expr(table: &mut SymbolTable, module_data: &mut ModuleData, mem_id: &MemoryId, curr_mem_offset: &mut u32, expr: &mut Expr, instr_builder: &mut InstrSeqBuilder, index: &mut usize) -> bool { +fn emit_expr(table: &mut SymbolTable, module_data: &mut ModuleData, expr: &mut Expr, instr_builder: &mut InstrSeqBuilder, + metadata: &mut InsertionMetadata, index: &mut usize) -> bool { let mut is_success = true; match expr { Expr::BinOp {lhs, op, rhs} => { - is_success &= emit_expr(table, module_data, mem_id, curr_mem_offset, lhs, instr_builder, index); - is_success &= emit_expr(table, module_data, mem_id, curr_mem_offset, rhs, instr_builder, index); + is_success &= emit_expr(table, module_data, lhs, instr_builder, metadata, index); + is_success &= emit_expr(table, module_data, rhs, instr_builder, metadata, index); is_success &= emit_op(op, instr_builder, index); } Expr::Call { fn_target, args } => { @@ -169,7 +178,7 @@ fn emit_expr(table: &mut SymbolTable, module_data: &mut ModuleData, mem_id: &Mem if let Some(args) = args { args.iter_mut().for_each(|boxed_arg| { let arg = &mut **boxed_arg; // unbox - is_success &= emit_expr(table, module_data, mem_id, curr_mem_offset, arg, instr_builder, index); + is_success &= emit_expr(table, module_data, arg, instr_builder, metadata, index); }) } @@ -252,7 +261,7 @@ fn emit_expr(table: &mut SymbolTable, module_data: &mut ModuleData, mem_id: &Mem } } Expr::Primitive { val } => { - is_success &= emit_value(table, mem_id, module_data, curr_mem_offset, val, instr_builder, index); + is_success &= emit_value(table, module_data, val, instr_builder, metadata, index); } } is_success @@ -380,12 +389,8 @@ fn emit_op(op: &Op, instr_builder: &mut InstrSeqBuilder, index: &mut usize) -> b } } -fn _emit_datatype(_datatype: &DataType, _instr_builder: &InstrSeqBuilder, _index: &mut usize) -> bool { - // don't think i actually need this - false -} - -fn emit_value(table: &mut SymbolTable, mem_id: &MemoryId, module_data: &mut ModuleData, curr_mem_offset: &mut u32, val: &mut Value, instr_builder: &mut InstrSeqBuilder, index: &mut usize) -> bool { +fn emit_value(table: &mut SymbolTable, module_data: &mut ModuleData, val: &mut Value, instr_builder: &mut InstrSeqBuilder, + metadata: &mut InsertionMetadata, index: &mut usize) -> bool { let mut is_success = true; match val { Value::Integer { val, .. } => { @@ -398,20 +403,20 @@ fn emit_value(table: &mut SymbolTable, mem_id: &MemoryId, module_data: &mut Modu } Value::Str { val, addr, ty: _ty } => { let data_id = module_data.add(DataKind::Active(ActiveData { - memory: *mem_id, - location: ActiveDataLocation::Absolute(curr_mem_offset.clone()) + memory: metadata.mem_id, + location: ActiveDataLocation::Absolute(metadata.curr_mem_offset.clone()) }), Vec::from(val.as_bytes())); // save the memory addresses/lens so they can be used as appropriate *addr = Some(( data_id, - curr_mem_offset.clone(), + metadata.curr_mem_offset.clone(), val.len() )); // emit Wasm instructions for the memory address and string length instr_builder.instr_at( *index,walrus::ir::Const { - value: walrus::ir::Value::I32(curr_mem_offset.clone() as i32) + value: walrus::ir::Value::I32(metadata.curr_mem_offset.clone() as i32) }); // update index to point to what follows our insertions *index += 1; @@ -422,12 +427,12 @@ fn emit_value(table: &mut SymbolTable, mem_id: &MemoryId, module_data: &mut Modu *index += 1; // update curr_mem_offset to account for new data - *curr_mem_offset += val.len() as u32; + metadata.curr_mem_offset += val.len() as u32; is_success &= true; } Value::Tuple { vals, .. } => { vals.iter_mut().for_each(|val| { - is_success &= emit_expr(table, module_data, mem_id, curr_mem_offset, val, instr_builder, index); + is_success &= emit_expr(table, module_data, val, instr_builder, metadata, index); }); } Value::Boolean { val, .. } => { @@ -452,129 +457,227 @@ fn emit_value(table: &mut SymbolTable, mem_id: &MemoryId, module_data: &mut Modu is_success } -// ============================== -// ==== WasmRewritingEmitter ==== -// ============================== -// Struct to store info on insertion locations for an instruction sequence. -// Note that blocks can be indefinitely nested. -#[derive(Debug)] -struct ProbeLoc { - // (instr position, no. of paths, nested ProbeInsertLocs) - positions: Vec<(Option, FunctionId, InstrSeqId, usize, Instr)>, -} -fn get_probe_insert_locations(probe_locs: &mut HashMap, module: &mut Module, func_id: FunctionId, func_name: Option, func: &LocalFunction, instr_seq_id: InstrSeqId) { - func.block(instr_seq_id) - .iter() - .enumerate() - .for_each(|(index, (instr, _))| { - let instr_as_str = &format!("{:?}", instr); - let instr_name = instr_as_str.split("(").next().unwrap().to_lowercase(); - - if let Some(_function) = module.functions.get_mut(&instr_name) { - // This instruction might need to be probed! - // get current probe locations for this instr type - let probe_loc = match probe_locs.get_mut(&instr_name) { - Some(probe_loc) => { - probe_loc - }, - None => { - // add new ProbeLoc instance for this instr - let probe_loc = ProbeLoc { - positions: vec![] - }; - probe_locs.insert(instr_name.clone(), probe_loc); - probe_locs.get_mut(&instr_name).unwrap() - } - }; - // add current instr - probe_loc.positions.push((func_name.clone(), func_id.clone(), instr_seq_id, index, instr.clone())); +fn get_func_info(app_wasm: &walrus::Module, func: &walrus::Function) -> FuncInfo { + match &func.kind { + FunctionKind::Import(ImportedFunction { ty: ty_id, import: import_id }) => { + let import = app_wasm.imports.get(*import_id); + let ty = app_wasm.types.get(*ty_id); + + FuncInfo { + func_kind: "import".to_string(), + module: import.module.clone(), + name: import.name.clone(), + params: Vec::from(ty.params()) } - // visit nested blocks - match instr { - Instr::Block(block) => { - get_probe_insert_locations(probe_locs, module, func_id, func_name.clone(), func, block.seq); - } - Instr::Loop(_loop) => { - get_probe_insert_locations(probe_locs, module, func_id, func_name.clone(), func, _loop.seq); - } - Instr::IfElse(if_else, ..) => { - println!("IfElse: {:#?}", if_else); - get_probe_insert_locations(probe_locs, module, func_id, func_name.clone(), func, if_else.consequent); - get_probe_insert_locations(probe_locs, module, func_id, func_name.clone(), func, if_else.alternative); - } - _ => { - // do nothing extra - } + }, + FunctionKind::Local(LocalFunction{ args, ..}) => { + let mut params = vec![]; + args.iter().for_each(|arg_id| { + let arg = app_wasm.locals.get(*arg_id); + params.push(arg.ty()); + }); + + FuncInfo { + func_kind: "local".to_string(), + module: "".to_string(), + name: "".to_string(), + params: Vec::from(params) } - }); + }, + FunctionKind::Uninitialized(ty_id) => { + let ty = app_wasm.types.get(*ty_id); + + FuncInfo { + func_kind: "uninitialized".to_string(), + module: "".to_string(), + name: "".to_string(), + params: Vec::from(ty.params()) + } + } + } } +// ============================== +// ==== WasmRewritingEmitter ==== +// ============================== -pub struct WasmRewritingEmitter { - // pub app_wasm_path: String, - pub app_wasm: walrus::Module, - pub table: SymbolTable, - - fn_providing_contexts: Vec +struct InsertionMetadata { + // curr_event: String, + mem_id: MemoryId, + curr_mem_offset: u32, } -impl WasmRewritingEmitter { - pub fn new(app_wasm: walrus::Module, table: SymbolTable) -> Self { - Self { - app_wasm, - table, - fn_providing_contexts: vec![ "whamm".to_string() ] - } - } - fn emit_wasm_bytecode_module(&mut self, module: &mut Module) -> bool { - // Initialize this to 4 MB - let mem_id = self.app_wasm.memories.iter().next() - .expect("only single memory is supported") - .id(); - let mut curr_mem_offset: u32 = 1_052_576; // Set default memory base address to DEFAULT + 4KB = 1048576 bytes + 4000 bytes = 1052576 bytes - let mut is_success = true; +#[derive(Debug)] +struct InstrIter { + instr_locs: Vec, + curr_loc: usize +} +impl InstrIter { + /// Build out a list of all local functions and their blocks/instruction indexes + /// to visit while doing instrumentation. + fn new(app_wasm: &walrus::Module) -> Self { // Figure out which functions to visit - let mut probe_locs: HashMap = HashMap::new(); - - for func in self.app_wasm.funcs.iter() { - let id = func.id(); + let mut instr_locs = vec![]; + for func in app_wasm.funcs.iter() { + let func_id = func.id(); if let Some(name) = func.name.as_ref() { // TODO -- get rid of this necessity (probably by removing the need to have // functions already present in the app code) if name.starts_with("instr_") { continue; } - - if name.contains("CallFuture$LT") { - println!("reached it!"); - } } if let FunctionKind::Local(local_func) = &func.kind { // TODO -- make sure that the id is not any of the injected function IDs (strcmp) - get_probe_insert_locations(&mut probe_locs, module, id, func.name.clone(), local_func, local_func.entry_block()); + Self::init_instr_locs(&mut instr_locs, local_func, &func_id, func.name.clone(), + local_func.entry_block()); } } + debug!("Finished creating list of instructions to visit"); + Self { + instr_locs, + curr_loc: 0 + } + } + fn init_instr_locs(locs: &mut Vec, func: &LocalFunction, func_id: &FunctionId, + func_name: Option, instr_seq_id: InstrSeqId) { + func.block(instr_seq_id) + .iter() + .enumerate() + .for_each(|(index, (instr, _))| { + let instr_as_str = &format!("{:?}", instr); + let instr_name = instr_as_str.split("(").next().unwrap().to_lowercase(); - for (function_name, ProbeLoc {positions}) in probe_locs.iter() { - for (_func_name, func_id, instr_seq_id, index, instr) in positions.iter() { - // if let Some(name) = func_name.as_ref() { - // if name.contains("CallFuture$LT") { - // println!("Possibly injecting probes for {name}"); - // } - // } - self.table.enter_named_scope(function_name); - let function = module.functions.get_mut(function_name).unwrap(); - let params = self.preprocess_instr(instr, function); + // add current instr + locs.push( ProbeLoc { + // wasm_func_name: func_name.clone(), + wasm_func_id: func_id.clone(), + instr_seq_id, + index, + instr_name: instr_name.clone(), + instr: instr.clone(), + instr_params: None, + instr_created_args: vec![], + instr_alt_call: None, + // instr_symbols: HashMap::new() + }); - // passing a clone of index so it can be mutated as instructions are injected - is_success &= self.emit_function(function, &mem_id, &mut curr_mem_offset, *func_id, instr_seq_id, ¶ms, &mut index.clone()); - self.table.exit_scope(); - } + // visit nested blocks + match instr { + Instr::Block(block) => { + Self::init_instr_locs(locs, func, func_id, func_name.clone(), block.seq); + } + Instr::Loop(_loop) => { + Self::init_instr_locs(locs, func, func_id, func_name.clone(), _loop.seq); + } + Instr::IfElse(if_else, ..) => { + println!("IfElse: {:#?}", if_else); + Self::init_instr_locs(locs, func, func_id, func_name.clone(), if_else.consequent); + Self::init_instr_locs(locs, func, func_id, func_name.clone(), if_else.alternative); + } + _ => { + // do nothing extra for other instructions + } + } + }); + } + fn has_next(&self) -> bool { + self.curr_loc + 1 < self.instr_locs.len() + } + fn next(&mut self) -> Option<&ProbeLoc> { + self.curr_loc += 1; + self.curr() + } + fn curr(&self) -> Option<&ProbeLoc> { + self.instr_locs.get(self.curr_loc) + } + fn curr_mut(&mut self) -> Option<&mut ProbeLoc> { + self.instr_locs.get_mut(self.curr_loc) + } +} + +// Struct to store info on insertion locations for an instruction sequence. +// Note that blocks can be indefinitely nested. +#[derive(Debug)] +struct ProbeLoc { + // wasm_func_name: Option, + wasm_func_id: FunctionId, + instr_seq_id: InstrSeqId, + index: usize, + + instr_name: String, + instr: Instr, + instr_params: Option>, + instr_created_args: Vec<(String, usize)>, + + // Save off the compiler-defined constants for this instruction + // instr_symbols: HashMap, + instr_alt_call: Option +} +struct FuncInfo { + func_kind: String, + module: String, + name: String, + params: Vec +} +struct EmittingInstrTracker { + curr_seq_id: InstrSeqId, + curr_idx: usize, + + /// The sequence ID of the main block (containing the instruction of-interest) + main_seq_id: InstrSeqId, + /// The current index into the main block (containing the instruction of-interest) + main_idx: usize, + + /// The sequence ID of the outer block of an injected conditional + outer_seq_id: Option, + /// The current index into the outer block of an injected conditional + outer_idx: Option, + + /// The sequence ID of the consequent block of an injected conditional + then_seq_id: Option, + /// The current index into the consequent block of an injected conditional + then_idx: Option, + + /// The sequence ID of the alternate block of an injected conditional + else_seq_id: Option, + /// The current index into the alternate block of an injected conditional + else_idx: Option +} + +pub struct WasmRewritingEmitter { + pub app_wasm: walrus::Module, + pub table: SymbolTable, + + // whamm! AST traversal bookkeeping + metadata: InsertionMetadata, + instr_iter: InstrIter, + emitting_instr: Option, + + fn_providing_contexts: Vec +} +impl WasmRewritingEmitter { + pub fn new(app_wasm: walrus::Module, table: SymbolTable) -> Self { + let mem_id = app_wasm.memories.iter().next() + .expect("only single memory is supported") + .id(); + let instr_iter = InstrIter::new(&app_wasm); + + Self { + app_wasm, + table, + metadata: InsertionMetadata { + // curr_event: "".to_string(), + mem_id, + curr_mem_offset: 1_052_576, // Set default memory base address to DEFAULT + 4KB = 1048576 bytes + 4000 bytes = 1052576 bytes + }, + instr_iter, + emitting_instr: None, + fn_providing_contexts: vec![ "whamm".to_string() ] } - is_success } fn override_var_val(&mut self, rec_id: &usize, val: Option) { @@ -587,468 +690,91 @@ impl WasmRewritingEmitter { } } - fn preprocess_instr(&mut self, instr: &Instr, function: &mut Function) -> Option> { - if function.name.to_lowercase() == "call" { - if let Instr::Call(func) = &instr { - let func = self.app_wasm.funcs.get(func.func); - // if func.name.as_ref().unwrap().contains("ZN87") { - // println!("{}", func.name.as_ref().unwrap()); - // } - let (func_kind, module, name, params) = match &func.kind { - FunctionKind::Import(ImportedFunction { ty: ty_id, import: import_id }) => { - let func_kind = "import"; - let import = self.app_wasm.imports.get(*import_id); - let ty = self.app_wasm.types.get(*ty_id); - - (func_kind, import.module.clone(), import.name.clone(), Vec::from(ty.params())) - }, - FunctionKind::Local(LocalFunction{ args, ..}) => { - let func_kind = "local"; - let mut params = vec![]; - args.iter().for_each(|arg_id| { - let arg = self.app_wasm.locals.get(*arg_id); - params.push(arg.ty()); - }); + fn define_new_target_fn_name(&mut self) -> bool { + // TODO -- change this to be an inline call() instead of setting a var + true + } - (func_kind, "".to_string(), "".to_string(), Vec::from(params)) - }, - FunctionKind::Uninitialized(ty_id) => { - let func_kind = "uninitialized"; - let ty = self.app_wasm.types.get(*ty_id); + fn define_target_imp_name(&mut self) -> bool { + let var_name = "target_imp_name".to_string(); - (func_kind, "".to_string(), "".to_string(), Vec::from(ty.params())) - } - }; - // define compiler constants - let rec_id = match self.table.lookup(&"target_fn_type".to_string()) { + if let Some(curr_instr) = self.instr_iter.curr() { + if let Instr::Call(func) = &curr_instr.instr { + let func = self.app_wasm.funcs.get(func.func); + let func_info = get_func_info(&self.app_wasm, func); + if func.name.as_ref().unwrap().contains("call_new") { + // For debugging, set breakpoint here! + println!("{}", func.name.as_ref().unwrap()); + } + + let rec_id = match self.table.lookup(&var_name) { Some(rec_id) => rec_id.clone(), _ => { - error!("target_fn_type symbol does not exist in this scope!"); - return Some(params); + error!("{} symbol does not exist in this scope!", var_name); + return false; } }; self.override_var_val(&rec_id, Some(Value::Str { ty: DataType::Str, - val: func_kind.to_string(), + val: func_info.name.to_string(), addr: None })); + } + } + true + } - let tuple = function.globals.get_mut("target_fn_type").unwrap(); - tuple.2 = Some(Value::Str { - ty: DataType::Str, - val: func_kind.to_string(), - addr: None - }); + fn define_target_fn_type(&mut self) -> bool { + let var_name = "target_fn_type".to_string(); - let rec_id = match self.table.lookup(&"target_imp_module".to_string()) { + if let Some(curr_instr) = self.instr_iter.curr() { + if let Instr::Call(func) = &curr_instr.instr { + let func = self.app_wasm.funcs.get(func.func); + let func_info = get_func_info(&self.app_wasm, func); + // if func.name.as_ref().unwrap().contains("call_perform") { + // println!("{}", func.name.as_ref().unwrap()); + // } + let rec_id = match self.table.lookup(&var_name) { Some(rec_id) => rec_id.clone(), _ => { - error!("target_imp_module symbol does not exist in this scope!"); - return Some(params); + error!("{} symbol does not exist in this scope!", var_name); + return false; } }; self.override_var_val(&rec_id, Some(Value::Str { ty: DataType::Str, - val: module.clone(), + val: func_info.func_kind.to_string(), addr: None })); + } + } + true + } - let tuple = function.globals.get_mut("target_imp_module").unwrap(); - tuple.2 = Some(Value::Str { - ty: DataType::Str, - val: module.clone(), - addr: None - }); - - let rec_id = match self.table.lookup(&"target_imp_name".to_string()) { + fn define_target_imp_module(&mut self) -> bool { + let var_name = "target_imp_module".to_string(); + if let Some(curr_instr) = self.instr_iter.curr() { + if let Instr::Call(func) = &curr_instr.instr { + let func = self.app_wasm.funcs.get(func.func); + let func_info = get_func_info(&self.app_wasm, func); + // if func.name.as_ref().unwrap().contains("call_perform") { + // println!("{}", func.name.as_ref().unwrap()); + // } + let rec_id = match self.table.lookup(&var_name) { Some(rec_id) => rec_id.clone(), _ => { - error!("target_imp_name symbol does not exist in this scope!"); - return Some(params); + error!("{} symbol does not exist in this scope!", var_name); + return false; } }; self.override_var_val(&rec_id, Some(Value::Str { ty: DataType::Str, - val: name.clone(), + val: func_info.module.to_string(), addr: None })); - - let tuple = function.globals.get_mut("target_imp_name").unwrap(); - tuple.2 = Some(Value::Str { - ty: DataType::Str, - val: name.clone(), - addr: None - }); - - return Some(params); - } - } - None - } - fn emit_function(&mut self, function: &mut Function, mem_id: &MemoryId, curr_mem_offset: &mut u32, func_id: FunctionId, - instr_seq_id: &InstrSeqId, func_params: &Option>, index: &mut usize) -> bool { - // inject probes (should be at the correct point in the `walrus::ir::VisitorMut`) - self.emit_probes_for_fn(function, mem_id, curr_mem_offset, func_id, instr_seq_id, func_params, index) - } - fn emit_probes_for_fn(&mut self, function: &mut Function, mem_id: &MemoryId, curr_mem_offset: &mut u32, func_id: FunctionId, - instr_seq_id: &InstrSeqId, func_params: &Option>, index: &mut usize) -> bool { - let mut is_success = true; - // 1. Inject BEFORE probes - if let Some(res) = self.emit_probes(function, mem_id, curr_mem_offset, func_id, instr_seq_id, func_params, &"before".to_string(), index) { - // Assumption: before probes push/pop from stack so it is equivalent to what it was originally - is_success &= res; - } - // 2a. Inject ALT probes - if let Some(res) = self.emit_probes(function, mem_id, curr_mem_offset, func_id, instr_seq_id, func_params, &"alt".to_string(), index) { - is_success &= res; - } - - // 3. Inject AFTER probes - if let Some(res) = self.emit_probes(function, mem_id, curr_mem_offset, func_id, instr_seq_id, func_params,&"after".to_string(), index) { - // Assumption: before probes push/pop from stack so it is equivalent to what it was originally - is_success &= res; - } - - is_success - } - - fn emit_probes(&mut self, function: &mut Function, mem_id: &MemoryId, curr_mem_offset: &mut u32, func_id: FunctionId, - instr_seq_id: &InstrSeqId, func_params: &Option>, probe_name: &String, index: &mut usize) -> Option { - let mut is_success = true; - - if let Some(probes) = function.probe_map.get_mut(probe_name) { - // if this is an alt probe, only will emit one! - // The last alt probe in the list will be emitted. - if probe_name == "alt" { - if probes.len() > 1 { - warn!("Detected multiple `alt` probes, will only emit the last one and ignore the rest!") - } - if let Some(probe) = probes.last_mut() { - is_success &= self.emit_probe(&function.name, probe, mem_id, curr_mem_offset, func_id, instr_seq_id, func_params, index); - } - } else { - probes.iter_mut().for_each(|probe| { - is_success &= self.emit_probe(&function.name, probe, mem_id, curr_mem_offset, func_id, instr_seq_id, func_params, index); - }); - } - Some(is_success) - } else { - None - } - } - - fn emit_probe(&mut self, function_name: &String, probe: &mut Probe, mem_id: &MemoryId, curr_mem_offset: &mut u32, func_id: FunctionId, - instr_seq_id: &InstrSeqId, func_params: &Option>, index: &mut usize) -> bool { - let mut is_success = true; - - if probe.body.is_none() { - // No need to emit the probe...there's no body! - return true; - } - // probe has a body, continue to emit logic! - - // enter the scope for this probe - self.table.enter_named_scope(&probe.name); - - // determine if I should inject a predicate. - let pred_to_inject: Option = if probe.predicate.is_some() { - // Fold predicate via constant propagation - let folded_pred = ExprFolder::fold_expr(&probe.predicate.as_ref().unwrap(), &self.table); - - if let Some(pred_as_bool) = ExprFolder::get_single_bool(&folded_pred) { - if !pred_as_bool { - // predicate is FALSE, DON'T INJECT PROBE IN GENERAL, so just return from this fn call! - self.table.exit_scope(); - info!("Predicate is false, no need to inject probe."); - return true; - } - // predicate is TRUE, unconditionally inject body stmts - None - } else { - // predicate has not been reduced to a boolean value, will need to inject the folded predicate - // println!("{:#?}", folded_pred); - Some(folded_pred) } - } else { - None - }; - - let emitted_params = if function_name == "call" { - // save the inputs to the current bytecode (do this once) - Some(self.create_arg_vars(func_params, func_id, instr_seq_id, index)) - } else { - None - }; - - if let Some(mut pred) = pred_to_inject { - if probe.name == "alt" { - self.emit_predicate(&mut pred, mem_id, curr_mem_offset, func_id, instr_seq_id, index); - - // an alternate probe will need to emit an if/else - // if pred { ; Optional(;) } else { } - let (if_then_block_id, mut if_then_idx, else_block_id, mut else_idx) = self.emit_alt_body(function_name, probe, &emitted_params, mem_id, curr_mem_offset, func_id, instr_seq_id, index); - - // 2. possibly emit alt call (if configured to do so) - if function_name == "call" { - self.emit_alt_call(&emitted_params, func_id, &if_then_block_id, &mut if_then_idx); - - // This is a call instruction, emit original parameters for the original call in the `else` block - self.emit_params(&emitted_params, func_id, &else_block_id, &mut else_idx); - } - } else { - // other probe types will just need to have an if block conditional on the predicate - // if pred { ; } - self.emit_predicated_body(probe, &mut pred, mem_id, curr_mem_offset, func_id, instr_seq_id, index); - } - } else { - // No predicate, just emit the un-predicated probe body - // ; - is_success &= self.emit_body(mem_id, curr_mem_offset, probe.body.as_mut().unwrap(), func_id, instr_seq_id, index); - - if function_name == "call" && probe.name == "alt" { - self.remove_orig_bytecode(probe, func_id, instr_seq_id, index); - - // 2. possibly emit alt call (if configured to do so) - self.emit_alt_call(&emitted_params, func_id, instr_seq_id, index); - } - } - - self.table.exit_scope(); - is_success - } - - fn create_arg_vars(&mut self, func_params: &Option>, func_id: FunctionId, instr_seq_id: &InstrSeqId, index: &mut usize) -> Vec<(String, usize)> { - // This MUST be `self.app_wasm` so we're mutating what will be the instrumented application. - let func = self.app_wasm.funcs.get_mut(func_id).kind.unwrap_local_mut(); - let func_builder = func.builder_mut(); - let mut instr_builder = func_builder.instr_seq(*instr_seq_id); - - // No bytecodes should have been emitted in the module yet! - // So, we can just save off the first * items in the stack as the args - // to the call. - let mut arg_recs = vec![]; // vec to retain order! - if let Some(params) = func_params { - params.iter().enumerate().for_each(|(num, param_ty)| { - // create local for the param in the module - let arg_local_id = self.app_wasm.locals.add(*param_ty); - - // emit a bytecode in the function to assign the ToS to this new local - instr_builder.instr_at( *index,walrus::ir::LocalSet { - local: arg_local_id.clone() - }); - - // update index to point to what follows our insertions - *index += 1; - - // place in symbol table with var addr for future reference - let arg_name = format!("arg{}", num); - let id = self.table.put(arg_name.clone(), Record::Var { - ty: DataType::Integer, // we only support integers right now. - name: arg_name.clone(), - value: None, - addr: Some(VarAddr::Local { - addr: arg_local_id - }) - }); - arg_recs.push((arg_name, id)); - }); - } - arg_recs - } - - fn emit_predicated_body(&mut self, probe: &mut Probe, predicate: &mut Expr, mem_id: &MemoryId, curr_mem_offset: &mut u32, func_id: FunctionId, - instr_seq_id: &InstrSeqId, index: &mut usize) { - // This MUST be `self.app_wasm` so we're mutating what will be the instrumented application. - let func = self.app_wasm.funcs.get_mut(func_id).kind.unwrap_local_mut(); - let func_builder = func.builder_mut(); - let mut instr_builder = func_builder.instr_seq(*instr_seq_id); - - instr_builder.block_at( - *index, - None, - |mut probe_block| { - let probe_block_id = probe_block.id(); - // create new `index` var to store current index into the of the `then` instr sequence - let mut probe_block_idx = 0 as usize; - - // inject predicate - if !emit_expr(&mut self.table, &mut self.app_wasm.data, mem_id, curr_mem_offset, predicate, &mut probe_block, &mut probe_block_idx) { - error!("Failed to inject predicate!"); - exit(1); - } - - // If result of predicate equals 0, break out of the probe block. - // Will continue with the application code. - probe_block - .i32_const(0) - .binop(BinaryOp::I32Eq) - .br_if(probe_block_id); - - probe_block_idx += 3; // account for the 3 instructions above! - - // At this point we know the predicate returned `true`, so we need to fire the probe body - emit_body(&mut self.table, &mut self.app_wasm.data, mem_id, curr_mem_offset, probe.body.as_mut().unwrap(), &mut probe_block, &mut probe_block_idx); - }); - - *index += 1; - } - - fn emit_predicate(&mut self, predicate: &mut Expr, mem_id: &MemoryId, curr_mem_offset: &mut u32, func_id: FunctionId, instr_seq_id: &InstrSeqId, index: &mut usize) { - // This MUST be `self.app_wasm` so we're mutating what will be the instrumented application. - let func = self.app_wasm.funcs.get_mut(func_id).kind.unwrap_local_mut(); - let func_builder = func.builder_mut(); - let mut instr_builder = func_builder.instr_seq(*instr_seq_id); - - if !emit_expr(&mut self.table, &mut self.app_wasm.data, mem_id, curr_mem_offset, predicate, &mut instr_builder, index) { - error!("Failed to inject predicate!"); - exit(1); - } - } - - fn emit_body(&mut self, mem_id: &MemoryId, curr_mem_offset: &mut u32, body: &mut Vec, func_id: FunctionId, instr_seq_id: &InstrSeqId, index: &mut usize) -> bool { - // This MUST be `self.app_wasm` so we're mutating what will be the instrumented application. - let func = self.app_wasm.funcs.get_mut(func_id).kind.unwrap_local_mut(); - let func_builder = func.builder_mut(); - let mut instr_builder = func_builder.instr_seq(*instr_seq_id); - - emit_body(&mut self.table, &mut self.app_wasm.data, mem_id, curr_mem_offset, body, &mut instr_builder, index) - } - - fn remove_orig_bytecode(&mut self, probe: &mut Probe, func_id: FunctionId, - instr_seq_id: &InstrSeqId, index: &mut usize) -> Option<(Instr, InstrLocId)> { - // This MUST be `self.app_wasm` so we're mutating what will be the instrumented application. - let func = self.app_wasm.funcs.get_mut(func_id).kind.unwrap_local_mut(); - let func_builder = func.builder_mut(); - let mut instr_builder = func_builder.instr_seq(*instr_seq_id); - - // remove the original instruction and store it for later use - let mut orig_instr: Option<(Instr, InstrLocId)> = None; - if probe.name == "alt" { - // remove the original bytecode first - orig_instr = Some(instr_builder.instrs_mut().remove(*index)) } - orig_instr - } - - fn emit_params(&mut self, emitted_params: &Option>, func_id: FunctionId, - instr_seq_id: &InstrSeqId, index: &mut usize) { - // This MUST be `self.app_wasm` so we're mutating what will be the instrumented application. - let func = self.app_wasm.funcs.get_mut(func_id).kind.unwrap_local_mut(); - let func_builder = func.builder_mut(); - let mut instr_builder = func_builder.instr_seq(*instr_seq_id); - - if let Some(params) = emitted_params { - for (_param_name, param_rec_id) in params.iter() { - let param_rec = self.table.get_record_mut(¶m_rec_id); - if let Some(Record::Var { addr: Some(VarAddr::Local {addr}), .. }) = param_rec { - instr_builder.instr_at(*index, walrus::ir::LocalGet { - local: addr.clone() - }); - *index += 1; - } else { - error!("Could not inject alternate call to function, something went wrong..."); - exit(1); - } - } - } - } - - /// Returns the InstrSeqId of the `then` block - fn emit_alt_body(&mut self, _function_name: &String, probe: &mut Probe, _emitted_params: &Option>, mem_id: &MemoryId, curr_mem_offset: &mut u32, func_id: FunctionId, - instr_seq_id: &InstrSeqId, index: &mut usize) -> (InstrSeqId, usize, InstrSeqId, usize) { - let mut is_success = true; - - let orig_instr = self.remove_orig_bytecode(probe, func_id, instr_seq_id, index); - - // This MUST be `self.app_wasm` so we're mutating what will be the instrumented application. - let func = self.app_wasm.funcs.get_mut(func_id).kind.unwrap_local_mut(); - let func_builder = func.builder_mut(); - let mut instr_builder = func_builder.instr_seq(*instr_seq_id); - - // We've injected a predicate prior to this point, need to create if/else - // block to conditionally execute the body. - let mut then_seq_id = None; - let mut then_idx = None; - let mut else_seq_id = None; - let mut else_idx = None; - instr_builder.if_else_at( - *index, - None, - | then | { - then_seq_id = Some(then.id()); - // create new `index` var to store current index into the of the `then` instr sequence - let mut idx = 0 as usize; - // 1. emit alt body - is_success &= emit_body(&mut self.table, &mut self.app_wasm.data, mem_id, curr_mem_offset, - probe.body.as_mut().unwrap(), then, &mut idx); - then_idx = Some(idx); - // Will not emit the original instruction since this is an alternate probe - }, - |else_| { - else_seq_id = Some(else_.id()); - else_idx = Some(0 as usize); // leave at 0 to allow injecting parameters before the original bytecode - if let Some((instr, _instr_loc_id)) = orig_instr { - else_.instr(instr.clone()); - } - }, - ); - - (then_seq_id.unwrap(), then_idx.unwrap(), else_seq_id.unwrap(), else_idx.unwrap()) - } - - fn emit_alt_call(&mut self, emitted_params: &Option>, func_id: FunctionId, - instr_seq_id: &InstrSeqId, index: &mut usize) -> bool { - let mut is_success = true; - // check if we should inject an alternate call! - // At this point the body has been visited, so "new_target_fn_name" would be defined - let rec_id = match self.table.lookup(&"new_target_fn_name".to_string()) { - Some(rec_id) => Some(rec_id.clone()), - None => None - }; - - if rec_id.is_none() { - info!("`new_target_fn_name` not configured for this probe module."); - } else { - let (name, func_call_id) = match rec_id { - Some(r_id) => { - let rec = self.table.get_record_mut(&r_id); - if let Some(Record::Var { value: Some(Value::Str {val, ..}), .. }) = rec { - (val.clone(), self.app_wasm.funcs.by_name(val)) - } else { - ("".to_string(), None) - } - } - None => { - ("".to_string(), None) - }, - }; - - if let Some(f_call_id) = func_call_id { - // we need to inject an alternate call to the specified fn name! - // replace the arguments - self.emit_params(&emitted_params, func_id, &instr_seq_id, index); - - // This MUST be `self.app_wasm` so we're mutating what will be the instrumented application. - let func = self.app_wasm.funcs.get_mut(func_id).kind.unwrap_local_mut(); - let func_builder = func.builder_mut(); - let mut instr_seq = func_builder.instr_seq(*instr_seq_id); - - // inject call - instr_seq.instr_at(*index, walrus::ir::Call { - func: f_call_id.clone() - }); - *index += 1; - - is_success &= true; - } else if name != "".to_string() { - info!("Could not find function in app Wasm specified by `new_target_fn_name`: {}", name); - exit(1); - } else { - error!("Could not inject alternate call to function, something went wrong..."); - exit(1); - } - } - is_success + true } fn emit_provided_fn(&mut self, context: &str, f: &Fn) -> bool { @@ -1066,13 +792,6 @@ impl WasmRewritingEmitter { let mut strcmp = FunctionBuilder::new(&mut self.app_wasm.types, &strcmp_params, &strcmp_result); - // get memory id - let memory_id = self.app_wasm.memories - .iter() - .next() - .expect("only single memory is supported") - .id(); - // create params let str0_offset = self.app_wasm.locals.add(ValType::I32); let str0_size = self.app_wasm.locals.add(ValType::I32); @@ -1129,7 +848,7 @@ impl WasmRewritingEmitter { .local_get(i) .binop(BinaryOp::I32Add) .load( - memory_id, + self.metadata.mem_id, LoadKind::I32_8 { kind: ExtendedLoad::ZeroExtend, }, @@ -1146,7 +865,7 @@ impl WasmRewritingEmitter { .local_get(i) .binop(BinaryOp::I32Add) .load( - memory_id, + self.metadata.mem_id, LoadKind::I32_8 { kind: ExtendedLoad::ZeroExtend, }, @@ -1206,55 +925,200 @@ impl WasmRewritingEmitter { }; } } -/// Walrus Visitor over `app.wasm` -/// - as we get relevant info, lookup in SymbolTable for binding to globally set that value -/// - for each bytecode, do we have a probe? -/// - fold predicate with known globals. FALSE? Don't inject! NOT FALSE? inject (with remaining Expr, not folded parts) -/// - See fold Rust pattern: https://rust-unofficial.github.io/patterns/patterns/creational/fold.html -/// - now we have instrumented `app.wasm` -/// - write to app_instr.wasm + impl Emitter for WasmRewritingEmitter { fn enter_scope(&mut self) { self.table.enter_scope(); } + fn enter_named_scope(&mut self, scope_name: &String) -> bool { + self.table.enter_named_scope(scope_name) + } fn exit_scope(&mut self) { self.table.exit_scope(); } fn reset_children(&mut self) { self.table.reset_children(); } - - fn emit_whamm(&mut self, _whamm: &Whamm) -> bool { - // nothing to do here - true + + /// bool -> whether there is a next instruction to process + fn has_next_instr(&self) -> bool { + self.instr_iter.has_next() } - fn emit_whammy(&mut self, _whammy: &Whammy) -> bool { - // nothing to do here - true + + /// bool -> whether it found a next instruction + fn next_instr(&mut self) -> bool { + if self.instr_iter.has_next() { + if let Some(next) = self.instr_iter.next() { + self.emitting_instr = Some(EmittingInstrTracker { + curr_seq_id: next.instr_seq_id.clone(), + curr_idx: next.index.clone(), + main_seq_id: next.instr_seq_id.clone(), + main_idx: next.index.clone(), + outer_seq_id: None, + outer_idx: None, + then_seq_id: None, + then_idx: None, + else_seq_id: None, + else_idx: None, + }); + return true; + } + } + false } - fn emit_provider(&mut self, context: &str, provider: &mut Provider) -> bool { - let mut is_success = true; - provider.modules.iter_mut().for_each(|(name, module)| { - is_success &= self.emit_module(&format!("{}:{}", context, name), module); - }); - is_success + + /// bool -> whether the current instruction is one of the passed list of types + fn curr_instr_is_of_type(&mut self, instr_names: &Vec) -> bool { + if let Some(instr) = self.instr_iter.curr() { + return instr_names.contains(&instr.instr_name); + } + false } - fn emit_module(&mut self, context: &str, module: &mut Module) -> bool { - self.table.enter_scope(); + + /// bool -> whether the current instruction is one of the passed list of types + fn curr_instr_type(&mut self) -> String { + if let Some(instr) = self.instr_iter.curr() { + return instr.instr_name.clone(); + } + unreachable!() + } + + fn has_params(&mut self) -> bool { + if let Some(curr_instr) = self.instr_iter.curr_mut() { + if let Some(params) = &curr_instr.instr_params { + return !params.is_empty(); + } + + // We haven't defined the params for this instr yet, let's do that + if let Instr::Call(func) = &curr_instr.instr { + let func = self.app_wasm.funcs.get(func.func); + let func_info = get_func_info(&self.app_wasm, func); + // if func.name.as_ref().unwrap().contains("call_perform") { + // println!("{}", func.name.as_ref().unwrap()); + // } + + curr_instr.instr_params = Some(func_info.params); + } + return curr_instr.instr_params.as_ref().unwrap().len() > 0; + } + error!("Something went wrong when trying to access the current instruction."); + false + } + + fn save_params(&mut self) -> bool { + if let Some(curr_loc) = self.instr_iter.curr_mut() { + if let Some(tracker) = &mut self.emitting_instr { + let func = self.app_wasm.funcs.get_mut(curr_loc.wasm_func_id).kind.unwrap_local_mut(); + let func_builder = func.builder_mut(); + let mut instr_builder = func_builder.instr_seq(tracker.curr_seq_id); + + // No bytecodes should have been emitted in the module yet! + // So, we can just save off the first * items in the stack as the args + // to the call. + let mut arg_recs = vec![]; // vec to retain order! + if let Some(params) = &curr_loc.instr_params { + params.iter().enumerate().for_each(|(num, param_ty)| { + // create local for the param in the module + let arg_local_id = self.app_wasm.locals.add(*param_ty); + + // emit a bytecode in the event to assign the ToS to this new local + instr_builder.instr_at( tracker.curr_idx,walrus::ir::LocalSet { + local: arg_local_id.clone() + }); + + // update index to point to what follows our insertions + tracker.curr_idx += 1; + + // place in symbol table with var addr for future reference + let arg_name = format!("arg{}", num); + let id = self.table.put(arg_name.clone(), Record::Var { + ty: DataType::Integer, // we only support integers right now. + name: arg_name.clone(), + value: None, + addr: Some(VarAddr::Local { + addr: arg_local_id + }) + }); + arg_recs.push((arg_name, id)); + }); + } + curr_loc.instr_created_args = arg_recs; + } + } + false + } + + fn emit_params(&mut self) -> bool { + if let Some(curr_loc) = self.instr_iter.curr_mut() { + if let Some(tracker) = &mut self.emitting_instr { + let func = self.app_wasm.funcs.get_mut(curr_loc.wasm_func_id).kind.unwrap_local_mut(); + let func_builder = func.builder_mut(); + let mut instr_builder = func_builder.instr_seq(tracker.curr_seq_id); + + for (_param_name, param_rec_id) in curr_loc.instr_created_args.iter() { + let param_rec = self.table.get_record_mut(¶m_rec_id); + if let Some(Record::Var { addr: Some(VarAddr::Local {addr}), .. }) = param_rec { + instr_builder.instr_at(tracker.curr_idx, walrus::ir::LocalGet { + local: addr.clone() + }); + tracker.curr_idx += 1; + } else { + error!("Could not emit parameters, something went wrong..."); + exit(1); + } + } + } + } + false + } + + fn define_compiler_var(&mut self, context: &String, var_name: &String) -> bool { let regex = Regex::new(r"whamm:whammy([0-9]+):wasm:bytecode").unwrap(); return if let Some(_caps) = regex.captures(context) { - let res = self.emit_wasm_bytecode_module(module); - self.table.exit_scope(); - res + match var_name.as_str() { + "new_target_fn_name" => { + self.define_new_target_fn_name() + }, + "target_imp_name" => { + self.define_target_imp_name() + }, + "target_fn_type" => { + self.define_target_fn_type() + }, + "target_imp_module" => { + self.define_target_imp_module() + } + _ => { + error!("Current context `{}` does not provide definition for variable `{}`", context, var_name); + false + } + } } else { - self.table.exit_scope(); - error!("Provided module, but could not find a context to provide the definition, context: {}", context); + error!("Could not find a context to provide the definition, context: {}", context); false }; } + + fn fold_expr(&mut self, expr: &mut Expr) -> bool { + *expr = ExprFolder::fold_expr(expr, &self.table); + true + } + fn emit_expr(&mut self, expr: &mut Expr) -> bool { + if let Some(curr_loc) = self.instr_iter.curr_mut() { + if let Some(tracker) = &mut self.emitting_instr { + let func = self.app_wasm.funcs.get_mut(curr_loc.wasm_func_id).kind.unwrap_local_mut(); + let func_builder = func.builder_mut(); + let mut instr_builder = func_builder.instr_seq(tracker.curr_seq_id); + + return emit_expr(&mut self.table, &mut self.app_wasm.data, expr, + &mut instr_builder, &mut self.metadata, &mut tracker.curr_idx); + } + } + false + } fn emit_fn(&mut self, context: &str, f: &Fn) -> bool { // figure out if this is a provided fn. - if f.is_provided { + if f.is_comp_provided { return if self.fn_providing_contexts.contains(&context.to_string()) { self.emit_provided_fn(context, f) } else { @@ -1267,10 +1131,12 @@ impl Emitter for WasmRewritingEmitter { // only when we're supporting user-defined fns in whammy... unimplemented!(); } + fn emit_formal_param(&mut self, _param: &(Expr, DataType)) -> bool { // only when we're supporting user-defined fns in whammy... unimplemented!(); } + fn emit_global(&mut self, name: String, _ty: DataType, _val: &Option) -> bool { let rec_id = match self.table.lookup(&name) { Some(rec_id) => rec_id.clone(), @@ -1298,24 +1164,267 @@ impl Emitter for WasmRewritingEmitter { } } - fn emit_stmt(&mut self, _stmt: &Statement) -> bool { - unimplemented!() + fn remove_orig(&mut self) -> bool { + if let Some(curr_loc) = self.instr_iter.curr_mut() { + if let Some(tracker) = &self.emitting_instr { + let func = self.app_wasm.funcs.get_mut(curr_loc.wasm_func_id).kind.unwrap_local_mut(); + let func_builder = func.builder_mut(); + let mut instr_builder = func_builder.instr_seq(tracker.curr_seq_id); + + instr_builder.instrs_mut().remove(tracker.curr_idx); + return true; + } + } + return false; + } + + fn emit_orig(&mut self) -> bool { + if let Some(curr_loc) = self.instr_iter.curr_mut() { + if let Some(tracker) = &self.emitting_instr { + let func = self.app_wasm.funcs.get_mut(curr_loc.wasm_func_id).kind.unwrap_local_mut(); + let func_builder = func.builder_mut(); + let mut instr_builder = func_builder.instr_seq(tracker.curr_seq_id); + + instr_builder.instr_at(tracker.curr_idx, curr_loc.instr.clone()); + return true; + } + } + return false; + } + + fn emit_if(&mut self) -> bool { + if let Some(curr_loc) = self.instr_iter.curr_mut() { + if let Some(tracker) = &mut self.emitting_instr { + // This MUST be `self.app_wasm` so we're mutating what will be the instrumented application. + let func = self.app_wasm.funcs.get_mut(curr_loc.wasm_func_id).kind.unwrap_local_mut(); + let func_builder = func.builder_mut(); + let mut instr_builder = func_builder.instr_seq(tracker.curr_seq_id); + + instr_builder.block_at( + tracker.curr_idx, + None, + |outer_block| { + let outer_block_id = outer_block.id(); + // create new `index` var to store current index into the of the `then` instr sequence + let outer_block_idx = 0 as usize; + + // Add logic that will execute after the injected conditional to + // break out of the if block if it evaluates to true. + // If result of predicate equals 0, break out of the probe block. + // Will continue with the application code. + outer_block + .i32_const(0) + .binop(BinaryOp::I32Eq) + .br_if(outer_block_id); + + // Leave block index at 0 to enable injecting conditional before the + // above instructions. + + // Save the block information for future reference + tracker.outer_seq_id = Some(outer_block_id); + tracker.outer_idx = Some(outer_block_idx); + }); + + tracker.curr_idx += 1; + return true; + } + } + false + } + + fn emit_if_else(&mut self) -> bool { + if let Some(curr_loc) = self.instr_iter.curr_mut() { + if let Some(tracker) = &mut self.emitting_instr { + // This MUST be `self.app_wasm` so we're mutating what will be the instrumented application. + let func = self.app_wasm.funcs.get_mut(curr_loc.wasm_func_id).kind.unwrap_local_mut(); + let func_builder = func.builder_mut(); + let mut instr_builder = func_builder.instr_seq(tracker.curr_seq_id); + + let mut outer_seq_id = None; + let mut outer_idx = None; + let mut then_seq_id = None; + let mut then_idx = None; + let mut else_seq_id = None; + let mut else_idx = None; + + instr_builder.block_at( + tracker.curr_idx, + None, + |outer_block| { + outer_seq_id = Some(outer_block.id()); + outer_idx = Some(0 as usize); + outer_block.if_else( + None, + | then | { + then_seq_id = Some(then.id()); + then_idx = Some(0 as usize); + }, + |else_| { + else_seq_id = Some(else_.id()); + else_idx = Some(0 as usize); + }, + ); + }); + // leave outer_block_idx as 0 to enable injection of condition! + + // Save the block information for future reference + tracker.outer_seq_id = outer_seq_id; + tracker.outer_idx = outer_idx; + tracker.then_seq_id = then_seq_id; + tracker.then_idx = then_idx; + tracker.else_seq_id = else_seq_id; + tracker.else_idx = else_idx; + tracker.curr_idx += 1; + return true; + } + } + false + } + + /// Will configure the emitter to emit subsequent expression as the condition of an if or if/else stmt + /// Then emits the passed condition at that location. + fn emit_condition(&mut self) -> bool { + if let Some(tracker) = &mut self.emitting_instr { + if let Some(outer_seq_id) = &tracker.outer_seq_id { + if let Some(outer_idx) = &tracker.outer_idx { + tracker.curr_seq_id = outer_seq_id.clone(); + tracker.curr_idx = outer_idx.clone(); + } + } + } + false + } + + /// Will configure the emitter to emit subsequent statements into the consequent body of an if or if/else stmt + fn emit_consequent(&mut self) -> bool { + if let Some(tracker) = &mut self.emitting_instr { + if let Some(then_seq_id) = &tracker.then_seq_id { + if let Some(then_idx) = &tracker.then_idx { + tracker.curr_seq_id = then_seq_id.clone(); + tracker.curr_idx = then_idx.clone(); + } + } + return true; + } + false + } + + /// Will configure the emitter to emit subsequent statements into the alternate body of an if/else stmt + fn emit_alternate(&mut self) -> bool { + if let Some(tracker) = &mut self.emitting_instr { + if let Some(else_seq_id) = &tracker.else_seq_id { + if let Some(else_idx) = &tracker.else_idx { + tracker.curr_seq_id = else_seq_id.clone(); + tracker.curr_idx = else_idx.clone(); + return true; + } + } + } + false + } + + /// Will configure the emitter to emit subsequent statements in the outer block of some branching logic + fn finish_branch(&mut self) -> bool { + if let Some(tracker) = &mut self.emitting_instr { + tracker.curr_seq_id = tracker.main_seq_id; + tracker.curr_idx = tracker.main_idx; + + tracker.outer_seq_id = None; + tracker.outer_idx = None; + tracker.then_seq_id = None; + tracker.then_idx = None; + tracker.else_seq_id = None; + tracker.else_idx = None; + return true; + } + true } - fn emit_expr(&mut self, _expr: &Expr) -> bool { - unimplemented!() + fn emit_body(&mut self, body: &mut Vec) -> bool { + let mut is_success = true; + body.iter_mut().for_each(|stmt| { + is_success &= self.emit_stmt(stmt); + }); + is_success } - fn emit_op(&mut self, _op: &Op) -> bool { - unimplemented!() + fn has_alt_call(&mut self) -> bool { + // check if we should inject an alternate call! + // At this point the body has been visited, so "new_target_fn_name" would be defined + let rec_id = match self.table.lookup(&"new_target_fn_name".to_string()) { + Some(rec_id) => Some(rec_id.clone()), + None => None + }; + + if rec_id.is_none() { + info!("`new_target_fn_name` not configured for this probe."); + return false; + } else { + let (name, func_call_id) = match rec_id { + Some(r_id) => { + let rec = self.table.get_record_mut(&r_id); + if let Some(Record::Var { value: Some(Value::Str { val, .. }), .. }) = rec { + (val.clone(), self.app_wasm.funcs.by_name(val)) + } else { + ("".to_string(), None) + } + } + None => { + ("".to_string(), None) + }, + }; + if func_call_id.is_none() { + info!("Could not find function in app Wasm specified by `new_target_fn_name`: {}", name); + return false; + } + if let Some(curr_loc) = self.instr_iter.curr_mut() { + curr_loc.instr_alt_call = func_call_id; + } else { + info!("The instruction iterator has not been initialized, we've hit a bug!"); + return false; + } + } + true } - fn emit_datatype(&mut self, _datatype: &DataType) -> bool { - unimplemented!() + fn emit_alt_call(&mut self) -> bool { + let mut is_success = true; + if let Some(curr_loc) = self.instr_iter.curr_mut() { + if let Some(tracker) = &mut self.emitting_instr { + + if let Some(alt_fn_id) = curr_loc.instr_alt_call { + // we need to inject an alternate call to the specified fn name! + let func = self.app_wasm.funcs.get_mut(curr_loc.wasm_func_id).kind.unwrap_local_mut(); + let func_builder = func.builder_mut(); + let mut instr_builder = func_builder.instr_seq(tracker.curr_seq_id); + + // inject call + instr_builder.instr_at(tracker.curr_idx, walrus::ir::Call { + func: alt_fn_id.clone() + }); + tracker.curr_idx += 1; + + is_success &= true; + } else { + error!("Could not inject alternate call to function, something went wrong..."); + } + } + } + is_success } - fn emit_value(&mut self, _val: &Value) -> bool { - unimplemented!() + fn emit_stmt(&mut self, stmt: &mut Statement) -> bool { + if let Some(curr_loc) = self.instr_iter.curr_mut() { + if let Some(tracker) = &mut self.emitting_instr { + let func = self.app_wasm.funcs.get_mut(curr_loc.wasm_func_id).kind.unwrap_local_mut(); + let func_builder = func.builder_mut(); + let mut instr_builder = func_builder.instr_seq(tracker.curr_seq_id); + + return emit_stmt(&mut self.table, &mut self.app_wasm.data, stmt, + &mut instr_builder, &mut self.metadata, &mut tracker.curr_idx); + } + } + false } fn dump_to_file(&mut self, output_wasm_path: String) -> bool { diff --git a/src/generator/init_generator.rs b/src/generator/init_generator.rs new file mode 100644 index 00000000..f0c727ec --- /dev/null +++ b/src/generator/init_generator.rs @@ -0,0 +1,266 @@ +// ======================= +// ==== CodeGenerator ==== +// ======================= + +use std::collections::HashMap; +use log::{trace, warn}; +use crate::generator::emitters::Emitter; +use crate::parser::types::{DataType, Whammy, Whamm, WhammVisitorMut, Expr, Event, Package, Op, Probe, Provider, Statement, Value, Global}; + +/// Serves as the first phase of instrumenting a module by setting up +/// the groundwork. +/// +/// The code generator traverses the AST and calls the passed emitter to +/// emit some compiler-provided functions and user-defined globals. +/// This process should ideally be generic, made to perform a specific +/// instrumentation technique by the Emitter field. +pub struct InitGenerator<'a> { + pub emitter: Box<&'a mut dyn Emitter>, + pub context_name: String +} +impl InitGenerator<'_> { + pub fn run(&mut self, whamm: &mut Whamm) -> bool { + // Generate globals and fns defined by `whamm` (this should modify the app_wasm) + self.visit_whamm(whamm) + } + + // Private helper functions + fn visit_globals(&mut self, globals: &HashMap) -> bool { + let mut is_success = true; + for (name, global) in globals.iter() { + // do not inject globals into Wasm that are used/defined by the compiler + if !global.is_comp_provided { + is_success &= self.emitter.emit_global(name.clone(), global.ty.clone(), &global.value); + } + } + + is_success + } +} +impl WhammVisitorMut for InitGenerator<'_> { + fn visit_whamm(&mut self, whamm: &mut Whamm) -> bool { + trace!("Entering: CodeGenerator::visit_whamm"); + self.context_name = "whamm".to_string(); + let mut is_success = true; + + // visit fns + whamm.fns.iter_mut().for_each(| f | { + is_success &= self.visit_fn(f); + }); + // inject globals + is_success &= self.visit_globals(&whamm.globals); + // visit whammys + whamm.whammys.iter_mut().for_each(|whammy| { + is_success &= self.visit_whammy(whammy); + }); + + trace!("Exiting: CodeGenerator::visit_whamm"); + // Remove from `context_name` + self.context_name = "".to_string(); + is_success + } + + fn visit_whammy(&mut self, whammy: &mut Whammy) -> bool { + trace!("Entering: CodeGenerator::visit_whammy"); + self.emitter.enter_scope(); + self.context_name += &format!(":{}", whammy.name.clone()); + let mut is_success = true; + + // visit fns + whammy.fns.iter_mut().for_each(| f | { + is_success &= self.visit_fn(f); + }); + // inject globals + is_success &= self.visit_globals(&whammy.globals); + // visit providers + whammy.providers.iter_mut().for_each(|(_name, provider)| { + is_success &= self.visit_provider(provider); + }); + + trace!("Exiting: CodeGenerator::visit_whammy"); + self.emitter.exit_scope(); + // Remove from `context_name` + self.context_name = self.context_name[..self.context_name.rfind(":").unwrap()].to_string(); + is_success + } + + fn visit_provider(&mut self, provider: &mut Provider) -> bool { + trace!("Entering: CodeGenerator::visit_provider"); + self.emitter.enter_scope(); + self.context_name += &format!(":{}", provider.name.clone()); + let mut is_success = true; + + // visit fns + provider.fns.iter_mut().for_each(| f | { + is_success &= self.visit_fn(f); + }); + // inject globals + is_success &= self.visit_globals(&provider.globals); + // visit the packages + provider.packages.iter_mut().for_each(|(_name, package)| { + is_success &= self.visit_package(package); + }); + + trace!("Exiting: CodeGenerator::visit_provider"); + self.emitter.exit_scope(); + // Remove this package from `context_name` + self.context_name = self.context_name[..self.context_name.rfind(":").unwrap()].to_string(); + is_success + } + + fn visit_package(&mut self, package: &mut Package) -> bool { + trace!("Entering: CodeGenerator::visit_package"); + self.emitter.enter_scope(); + let mut is_success = true; + self.context_name += &format!(":{}", package.name.clone()); + + // visit fns + package.fns.iter_mut().for_each(| f | { + is_success &= self.visit_fn(f); + }); + // inject globals + is_success &= self.visit_globals(&package.globals); + // visit the events + package.events.iter_mut().for_each(|(_name, event)| { + is_success &= self.visit_event(event); + }); + + trace!("Exiting: CodeGenerator::visit_package"); + self.emitter.exit_scope(); + // Remove this package from `context_name` + self.context_name = self.context_name[..self.context_name.rfind(":").unwrap()].to_string(); + is_success + } + + fn visit_event(&mut self, event: &mut Event) -> bool { + trace!("Entering: CodeGenerator::visit_event"); + self.emitter.enter_scope(); + // let mut is_success = self.emitter.emit_event(event); + self.context_name += &format!(":{}", event.name.clone()); + let mut is_success = true; + + // visit fns + event.fns.iter_mut().for_each(| f | { + is_success &= self.visit_fn(f); + }); + // inject globals + is_success &= self.visit_globals(&event.globals); + + // 1. visit the BEFORE probes + if let Some(probes) = event.probe_map.get_mut(&"before".to_string()) { + probes.iter_mut().for_each(|probe| { + is_success &= self.visit_probe(probe); + }); + } + // 2. visit the ALT probes + if let Some(probes) = event.probe_map.get_mut(&"alt".to_string()) { + // only will emit one alt probe! + // The last alt probe in the list will be emitted. + if probes.len() > 1 { + warn!("Detected multiple `alt` probes, will only emit the last one and ignore the rest!") + } + if let Some(probe) = probes.last_mut() { + is_success &= self.visit_probe(probe); + } + } + // 3. visit the AFTER probes + if let Some(probes) = event.probe_map.get_mut(&"after".to_string()) { + probes.iter_mut().for_each(|probe| { + is_success &= self.visit_probe(probe); + }); + } + + trace!("Exiting: CodeGenerator::visit_event"); + self.emitter.exit_scope(); + // Remove this event from `context_name` + self.context_name = self.context_name[..self.context_name.rfind(":").unwrap()].to_string(); + is_success + } + + fn visit_probe(&mut self, probe: &mut Probe) -> bool { + trace!("Entering: CodeGenerator::visit_probe"); + self.emitter.enter_scope(); + // let mut is_success = self.emitter.emit_probe(probe); + self.context_name += &format!(":{}", probe.name.clone()); + let mut is_success = true; + + // visit fns + probe.fns.iter_mut().for_each(| f | { + is_success &= self.visit_fn(f); + }); + // inject globals + is_success &= self.visit_globals(&probe.globals); + + trace!("Exiting: CodeGenerator::visit_probe"); + self.emitter.exit_scope(); + // Remove this probe from `context_name` + self.context_name = self.context_name[..self.context_name.rfind(":").unwrap()].to_string(); + is_success + } + + fn visit_fn(&mut self, f: &mut crate::parser::types::Fn) -> bool { + trace!("Entering: CodeGenerator::visit_fn"); + self.emitter.enter_scope(); + let mut is_success = true; + if f.is_comp_provided { + is_success = self.emitter.emit_fn(&self.context_name, f); + } + trace!("Exiting: CodeGenerator::visit_fn"); + self.emitter.exit_scope(); + is_success + } + + fn visit_formal_param(&mut self, _param: &mut (Expr, DataType)) -> bool { + // never called + unreachable!(); + // trace!("Entering: CodeGenerator::visit_formal_param"); + // let is_success = self.emitter.emit_formal_param(param); + // trace!("Exiting: CodeGenerator::visit_formal_param"); + // is_success + } + + fn visit_stmt(&mut self, _stmt: &mut Statement) -> bool { + // never called + unreachable!(); + // trace!("Entering: CodeGenerator::visit_stmt"); + // let is_success = self.emitter.emit_stmt(stmt); + // trace!("Exiting: CodeGenerator::visit_stmt"); + // is_success + } + + fn visit_expr(&mut self, _expr: &mut Expr) -> bool { + // never called + unreachable!(); + // trace!("Entering: CodeGenerator::visit_expr"); + // let is_success = self.emitter.emit_expr(expr); + // trace!("Exiting: CodeGenerator::visit_expr"); + // is_success + } + + fn visit_op(&mut self, _op: &mut Op) -> bool { + // never called + unreachable!(); + // trace!("Entering: CodeGenerator::visit_op"); + // let is_success = self.emitter.emit_op(op); + // trace!("Exiting: CodeGenerator::visit_op"); + // is_success + } + + fn visit_datatype(&mut self, _datatype: &mut DataType) -> bool { + // never called + unreachable!(); + // trace!("Entering: CodeGenerator::visit_datatype"); + // let is_success = self.emitter.emit_datatype(datatype); + // trace!("Exiting: CodeGenerator::visit_datatype"); + // is_success + } + + fn visit_value(&mut self, _val: &mut Value) -> bool { + // never called + unreachable!(); + // trace!("Entering: CodeGenerator::visit_value"); + // let is_success = self.emitter.emit_value(val); + // trace!("Exiting: CodeGenerator::visit_value"); + // is_success + } +} \ No newline at end of file diff --git a/src/generator/instr_generator.rs b/src/generator/instr_generator.rs new file mode 100644 index 00000000..3d1bf3ac --- /dev/null +++ b/src/generator/instr_generator.rs @@ -0,0 +1,639 @@ +use log::{error, warn}; +use crate::behavior::builder_visitor::SimpleAST; +use crate::behavior::tree::{ActionType, ActionWithChildType, BehaviorVisitor, DecoratorType, ParamActionType}; +use crate::behavior::tree::{BehaviorTree, Node}; +use crate::generator::emitters::Emitter; +use crate::generator::types::ExprFolder; +use crate::parser::types::Probe; +use crate::verifier::types::ScopeType; + +/// The second phase of instrumenting a Wasm module by actually emitting the +/// instrumentation code. +/// +/// To do this, the generator traverses the BehaviorTree AST and calls the +/// passed emitter to emit instrumentation code. +/// This process should ideally be generic, made to perform a specific +/// instrumentation technique by the passed Emitter type. +pub struct InstrGenerator<'a, 'b> { + pub tree: &'a BehaviorTree, + pub emitter: Box<&'b mut dyn Emitter>, + pub ast: SimpleAST, + + pub context_name: String, + pub curr_provider_name: String, + pub curr_package_name: String, + pub curr_event_name: String, + pub curr_probe_name: String, + pub curr_probe: Option +} +impl InstrGenerator<'_, '_> { + pub fn run(&mut self, + behavior: &BehaviorTree + ) -> bool { + // Reset the symbol table in the emitter just in case + self.emitter.reset_children(); + if let Some(root) = behavior.get_root() { + // Traverse `behavior` tree and emit the probes held in `ast` + return self.visit_root(root); + } + warn!("The behavior tree was empty! Nothing to emit!"); + false + } + + fn emit_cond(&mut self, cond: &usize) -> bool { + let mut is_success = true; + if let Some(node) = self.tree.get_node(cond.clone()) { + // emit the branch conditional + self.emitter.emit_condition(); + is_success &= self.visit_node(node); + } else { + error!("Node to define conditional logic node does not exist!"); + } + is_success + } + + fn emit_conseq(&mut self, conseq: &usize) -> bool { + let mut is_success = true; + if let Some(node) = self.tree.get_node(conseq.clone()) { + // emit the consequent logic + self.emitter.emit_consequent(); + is_success &= self.visit_node(node); + } else { + error!("Node to define consequent logic node does not exist!"); + } + is_success + } + + fn emit_alt(&mut self, alt: &usize) -> bool { + let mut is_success = true; + if let Some(node) = self.tree.get_node(alt.clone()) { + // emit the alternate logic + self.emitter.emit_alternate(); + is_success &= self.visit_node(node); + } else { + error!("Node to define alternate logic node does not exist!"); + } + is_success + } +} +impl BehaviorVisitor for InstrGenerator<'_, '_> { + fn visit_root(&mut self, node: &Node) -> bool { + let mut is_success = true; + if let Node::Root { child, .. } = node { + if let Some(node) = self.tree.get_node(child.clone()) { + is_success &= self.visit_node(node); + } + } else { + unreachable!() + } + is_success + } + + fn visit_sequence(&mut self, node: &Node) -> bool { + if let Node::Sequence { children, .. } = node { + for child in children { + let mut child_is_success = true; + if let Some(node) = self.tree.get_node(child.clone()) { + child_is_success &= self.visit_node(node); + } + if !child_is_success { + // If the child was unsuccessful, don't execute the following children + // and return `false` (failure) + return child_is_success; + } + } + } else { + unreachable!() + } + true + } + + fn visit_fallback(&mut self, node: &Node) -> bool { + if let Node::Fallback { children, .. } = node { + for child in children { + let mut child_is_success = true; + if let Some(node) = self.tree.get_node(child.clone()) { + child_is_success &= self.visit_node(node); + } + if child_is_success { + // If that child was successful, don't execute the fallback + // and return `true` (success) + return child_is_success; + } + } + } else { + unreachable!() + } + // Never successfully executed a child + false + } + + fn visit_is_instr(&mut self, node: &Node) -> bool { + let mut is_success = true; + if let Node::Decorator {ty, child, ..} = node { + if let DecoratorType::IsInstr {instr_names} = ty { + if self.emitter.curr_instr_is_of_type(instr_names) { + // If the current instruction is of-interest, continue with the behavior tree logic + if let Some(node) = self.tree.get_node(child.clone()) { + is_success &= self.visit_node(node); + } + } else { + // If the decorator condition is false, return false + return false; + } + } else { + unreachable!() + } + } else { + unreachable!() + } + is_success + } + + fn visit_is_probe_type(&mut self, node: &Node) -> bool { + let mut is_success = true; + if let Node::Decorator { ty, child, .. } = node { + if let DecoratorType::IsProbeType {probe_type} = ty { + if self.curr_probe_name == *probe_type { + if let Some(node) = self.tree.get_node(child.clone()) { + is_success &= self.visit_node(node); + } + } else { + // If the decorator condition is false, return false + return false; + } + } else { + unreachable!() + } + } else { + unreachable!() + } + is_success + } + + fn visit_has_alt_call(&mut self, node: &Node) -> bool { + let mut is_success = true; + if let Node::Decorator { ty, child, .. } = node { + if let DecoratorType::HasAltCall = ty { + if self.emitter.has_alt_call() { + // The current probe has a defined alt call, continue with behavior + if let Some(node) = self.tree.get_node(child.clone()) { + is_success &= self.visit_node(node); + } + } else { + // If the decorator condition is false, return false + return false; + } + } else { + unreachable!() + } + } else { + unreachable!() + } + is_success + } + + fn visit_has_params(&mut self, node: &Node) -> bool { + let mut is_success = true; + if let Node::Decorator { ty, child, .. } = node { + if let DecoratorType::HasParams = ty { + if self.emitter.has_params() { + // The current instruction has parameters, continue with behavior + if let Some(node) = self.tree.get_node(child.clone()) { + is_success &= self.visit_node(node); + } + } else { + // If the decorator condition is false, return false + return false; + } + } else { + unreachable!() + } + } else { + unreachable!() + } + is_success + } + + fn visit_pred_is(&mut self, node: &Node) -> bool { + if let Node::Decorator {ty, child, ..} = node { + if let DecoratorType::PredIs{ val } = ty { + if let Some(probe) = &self.curr_probe { + if let Some(pred) = &probe.predicate { + if let Some(pred_as_bool) = ExprFolder::get_single_bool(&pred) { + // predicate has been reduced to a boolean value + if pred_as_bool == *val { + // predicate is reduced to desired value, execute child node + if let Some(node) = self.tree.get_node(child.clone()) { + return self.visit_node(node); + } + } + } + } + } + } else { + unreachable!() + } + } else { + unreachable!() + } + false + } + + fn visit_for_each_probe(&mut self, node: &Node) -> bool { + let mut is_success = true; + // Assumption: before probes push/pop from stack so it is equivalent to what it was originally + // Assumption: after probes push/pop from stack so it is equivalent to what it was originally + + if let Node::Decorator { ty, child, .. } = node { + if let DecoratorType::ForEachProbe { target } = ty { + self.curr_probe_name = target.clone(); + + // Must pull the probe by index due to Rust calling constraints... + let probe_list_len = get_probes_from_ast(&self.ast, &self.curr_provider_name, &self.curr_package_name, + &self.curr_event_name, target).len(); + for i in Vec::from_iter(0..probe_list_len).iter() { + + if let Some(probe) = get_probe_at_idx(&self.ast, &self.curr_provider_name, &self.curr_package_name, + &self.curr_event_name, target, i) { + // make a clone of the current probe per instruction traversal + // this will reset the clone pred/body for each instruction! + self.curr_probe = Some(probe.clone()); + } + + if let Some(node) = self.tree.get_node(child.clone()) { + is_success &= self.visit_node(node); + } + } + } else { + unreachable!() + } + } else { + unreachable!() + } + is_success + } + + fn visit_for_first_probe(&mut self, node: &Node) -> bool { + let mut is_success = true; + if let Node::Decorator {ty, child, .. } = node { + if let DecoratorType::ForFirstProbe { target } = ty { + let probe_list = get_probes_from_ast(&self.ast, &self.curr_provider_name, &self.curr_package_name, + &self.curr_event_name, target); + if probe_list.len() > 1 { + warn!("There is more than one probe for probe type '{}'. So only emitting first probe, ignoring rest.", target) + } + self.curr_probe_name = target.clone(); + // make a clone of the first probe per instruction traversal + // this will reset the clone pred/body for each instruction! + if let Some(probe) = probe_list.get(0) { + self.curr_probe = Some(probe.clone()); + } + + // Process the instructions for this single probe! + if let Some(node) = self.tree.get_node(child.clone()) { + is_success &= self.visit_node(node); + } + } else { + unreachable!() + } + } else { + unreachable!() + } + is_success + } + + fn visit_enter_package(&mut self, node: &Node) -> bool { + let mut is_success = true; + if let Node::ActionWithChild { ty, child, .. } = node { + let ActionWithChildType::EnterPackage { package_name } = ty; + if package_name == "bytecode" { + // Process first instruction! + if let Some(node) = self.tree.get_node(child.clone()) { + is_success &= self.visit_node(node); + } + + // Process the rest of the instructions + while self.emitter.has_next_instr() { + self.emitter.next_instr(); + if let Some(node) = self.tree.get_node(child.clone()) { + is_success &= self.visit_node(node); + } + } + } + } else { + unreachable!() + } + is_success + } + + fn visit_emit_if_else(&mut self, node: &Node) -> bool { + if let Node::ParameterizedAction {ty, .. } = node { + if let ParamActionType::EmitIfElse { cond, conseq, alt } = ty { + self.emitter.emit_if_else(); + self.emit_cond(cond); + self.emit_conseq(conseq); + self.emit_alt(alt); + self.emitter.finish_branch(); + return true; + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_emit_if(&mut self, node: &Node) -> bool { + if let Node::ParameterizedAction { ty, .. } = node { + if let ParamActionType::EmitIf { cond, conseq } = ty { + self.emitter.emit_if(); + self.emit_cond(cond); + self.emit_conseq(conseq); + self.emitter.finish_branch(); + return true; + } else { + unreachable!() + } + } else { + unreachable!() + } + } + + fn visit_enter_scope(&mut self, node: &Node) -> bool { + let mut is_success = true; + if let Node::Action { ty, ..} = node { + if let ActionType::EnterScope{ context, scope_name } = ty { + is_success &= self.emitter.enter_named_scope(scope_name); + if is_success { + // Set the current context info for probe lookup + self.context_name = context.clone(); + + let mut spec_split = context.split(":"); + if let Some(_whamm) = spec_split.next() { + if let Some(_whammy) = spec_split.next() { + if let Some(provider) = spec_split.next() { + self.curr_provider_name = provider.to_string(); + if let Some(package) = spec_split.next() { + self.curr_package_name = package.to_string(); + if let Some(event) = spec_split.next() { + self.curr_event_name = event.to_string(); + if let Some(probe) = spec_split.next() { + self.curr_probe_name = probe.to_string() + } + } + } + } + } + } + } + } else { + unreachable!() + } + } else { + unreachable!() + } + is_success + } + + fn visit_enter_scope_of(&mut self, node: &Node) -> bool { + let mut is_success = true; + if let Node::Action { ty, ..} = node { + if let ActionType::EnterScopeOf { context, scope_ty } = ty { + match scope_ty { + ScopeType::Event => { + let instr_name = self.emitter.curr_instr_type(); + is_success &= self.emitter.enter_named_scope(&instr_name); + if is_success { + // Set the current context info for probe lookup + self.context_name = context.clone(); + + let mut spec_split = context.split(":"); + if let Some(_whamm) = spec_split.next() { + if let Some(_whammy) = spec_split.next() { + if let Some(provider) = spec_split.next() { + self.curr_provider_name = provider.to_string(); + if let Some(package) = spec_split.next() { + self.curr_package_name = package.to_string(); + self.curr_event_name = instr_name; + } + } + } + } + } + } + _ => unimplemented!() + } + } else { + unreachable!() + } + } else { + unreachable!() + } + is_success + } + + fn visit_exit_scope(&mut self, node: &Node) -> bool { + let is_success = true; + if let Node::Action {ty, ..} = node { + if let ActionType::ExitScope = ty { + self.emitter.exit_scope(); + } else { + unreachable!() + } + } else { + unreachable!() + } + is_success + } + + fn visit_define(&mut self, node: &Node) -> bool { + let mut is_success = true; + if let Node::Action {ty, ..} = node { + if let ActionType::Define {var_name, ..} = ty { + is_success &= self.emitter.define_compiler_var(&self.context_name, var_name); + } else { + unreachable!() + } + } else { + unreachable!() + } + is_success + } + + fn visit_emit_pred(&mut self, node: &Node) -> bool { + let mut is_success = true; + if let Node::Action {ty, ..} = node { + if let ActionType::EmitPred = ty { + if let Some(probe) = &mut self.curr_probe { + if let Some(pred) = &mut probe.predicate { + is_success &= self.emitter.emit_expr(pred); + } + } + } else { + unreachable!() + } + } else { + unreachable!() + } + is_success + } + + fn visit_fold_pred(&mut self, node: &Node) -> bool { + let mut is_success = true; + if let Node::Action { ty, ..} = node { + if let ActionType::FoldPred = ty { + if let Some(probe) = &mut self.curr_probe { + if let Some(pred) = &mut probe.predicate { + is_success &= self.emitter.fold_expr(pred); + } + } + } else { + unreachable!() + } + } else { + unreachable!() + } + is_success + } + + fn visit_reset(&mut self, node: &Node) -> bool { + let is_success = true; + if let Node::Action {ty, ..} = node { + if let ActionType::Reset = ty { + self.emitter.reset_children(); + } else { + unreachable!() + } + } else { + unreachable!() + } + is_success + } + + fn visit_save_params(&mut self, node: &Node) -> bool { + let mut is_success = true; + if let Node::Action {ty, ..} = node { + if let ActionType::SaveParams = ty { + is_success &= self.emitter.save_params(); + } else { + unreachable!() + } + } else { + unreachable!() + } + is_success + } + + fn visit_emit_params(&mut self, node: &Node) -> bool { + let mut is_success = true; + if let Node::Action { ty, ..} = node { + if let ActionType::EmitParams = ty { + is_success &= self.emitter.emit_params(); + } else { + unreachable!() + } + } else { + unreachable!() + } + is_success + } + + fn visit_emit_body(&mut self, node: &Node) -> bool { + let mut is_success = true; + if let Node::Action {ty, ..} = node { + if let ActionType::EmitBody = ty { + if let Some(probe) = &mut self.curr_probe { + if let Some(body) = &mut probe.body { + is_success &= self.emitter.emit_body(body); + } + } + } else { + unreachable!() + } + } else { + unreachable!() + } + is_success + } + + fn visit_emit_alt_call(&mut self, node: &Node) -> bool { + let mut is_success = true; + if let Node::Action {ty, ..} = node { + if let ActionType::EmitAltCall = ty { + is_success &= self.emitter.emit_alt_call(); + } else { + unreachable!() + } + } else { + unreachable!() + } + is_success + } + + fn visit_remove_orig(&mut self, node: &Node) -> bool { + let mut is_success = true; + if let Node::Action {ty, ..} = node { + if let ActionType::RemoveOrig = ty { + is_success &= self.emitter.remove_orig(); + } else { + unreachable!() + } + } else { + unreachable!() + } + is_success + } + + fn visit_emit_orig(&mut self, node: &Node) -> bool { + let mut is_success = true; + if let Node::Action {ty, ..} = node { + if let ActionType::EmitOrig = ty { + is_success &= self.emitter.emit_orig(); + } else { + unreachable!() + } + } else { + unreachable!() + } + is_success + } + + fn visit_force_success(&mut self, node: &Node) -> bool { + if let Node::Action {ty, ..} = node { + if let ActionType::ForceSuccess = ty { + return true; + } else { + unreachable!() + } + } else { + unreachable!() + } + } +} + +// ================== +// = AST OPERATIONS = +// ================== + +fn get_probes_from_ast<'a>(ast: &'a SimpleAST, + curr_provider_name: &String, curr_package_name: &String, curr_event_name: &String, + name: &String) -> &'a Vec { + if let Some(provider) = ast.get(curr_provider_name) { + if let Some(package) = provider.get(curr_package_name) { + if let Some(event) = package.get(curr_event_name) { + if let Some(probes) = event.get(name) { + return probes; + } + } + } + } + unreachable!() +} + +fn get_probe_at_idx<'a>(ast: &'a SimpleAST, + curr_provider_name: &String, curr_package_name: &String, curr_event_name: &String, + name: &String, idx: &usize) -> Option<&'a Probe> { + get_probes_from_ast(ast, curr_provider_name, curr_package_name, curr_event_name, name) + .get(*idx) +} diff --git a/src/generator/tests.rs b/src/generator/tests.rs index 330ddb07..cc0baa4e 100644 --- a/src/generator/tests.rs +++ b/src/generator/tests.rs @@ -39,8 +39,8 @@ fn get_rec<'a>(table: &'a mut SymbolTable, name: &str) -> Option<&'a mut Record> fn get_pred(whamm: &Whamm) -> &Expr { whamm.whammys.get(0).unwrap() .providers.get("wasm").unwrap() - .modules.get("bytecode").unwrap() - .functions.get("call").unwrap() + .packages.get("bytecode").unwrap() + .events.get("call").unwrap() .probe_map.get("alt").unwrap() .get(0).unwrap().predicate.as_ref().unwrap() } @@ -59,13 +59,13 @@ fn hardcode_compiler_constants(table: &mut SymbolTable) { } println!("Scope name: {}", table.get_curr_scope().unwrap().name); table.enter_scope(); // enter bytecode scope - while table.get_curr_scope().unwrap().ty != ScopeType::Module { + while table.get_curr_scope().unwrap().ty != ScopeType::Package { table.exit_scope(); table.enter_scope() } println!("Scope name: {}", table.get_curr_scope().unwrap().name); table.enter_scope(); // enter call scope - while table.get_curr_scope().unwrap().ty != ScopeType::Function { + while table.get_curr_scope().unwrap().ty != ScopeType::Event { table.exit_scope(); table.enter_scope() } @@ -125,7 +125,7 @@ fn assert_simplified_predicate(pred: &Expr) { fn basic_run(script: &str) { match tests::get_ast(script) { Some(whamm) => { - let mut table = verifier::verify(&whamm, false); + let mut table = verifier::build_symbol_table(&whamm); table.reset(); let pred = get_pred(&whamm); @@ -210,7 +210,7 @@ wasm::call:alt / match tests::get_ast(script) { Some(whamm) => { - let mut table = verifier::verify(&whamm, false); + let mut table = verifier::build_symbol_table(&whamm); table.reset(); let pred = get_pred(&whamm); diff --git a/src/lib.rs b/src/lib.rs index daf2b003..fb706825 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +pub mod behavior; pub mod generator; pub mod parser; pub mod verifier; \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index db04d604..1ef10277 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,26 +1,85 @@ extern crate core; +use std::path::PathBuf; use crate::parser::whamm_parser::*; -use crate::verifier::verifier::*; -use crate::generator::emitters::{WasmRewritingEmitter}; -use crate::generator::code_generator::{CodeGenerator}; +use crate::behavior::builder_visitor::*; +use crate::generator::emitters::{Emitter, WasmRewritingEmitter}; +use crate::generator::init_generator::{InitGenerator}; +use crate::generator::instr_generator::{InstrGenerator}; pub mod parser; +pub mod behavior; pub mod verifier; pub mod generator; -use clap::Parser; +use clap::{Args, Parser, Subcommand}; +use graphviz_rust::exec_dot; use log::{info, error}; use std::process::exit; +use graphviz_rust::cmd::{CommandArg, Format}; +use project_root::get_project_root; +use walrus::Module; + +use crate::behavior::tree::BehaviorTree; +use crate::behavior::visualize::visualization_to_file; +use crate::parser::types::Whamm; +use crate::verifier::types::SymbolTable; +use crate::verifier::verifier::{build_symbol_table, verify}; fn setup_logger() { env_logger::init(); } /// `whamm` instruments a Wasm application with the Probes defined in the specified Whammy. -#[derive(Parser, Debug)] +#[derive(Debug, Parser)] #[clap(author, version, about, long_about = None)] -struct Args { +pub struct WhammCli { + // #[clap(flatten)] + // global_opts: GlobalOpts, + + #[clap(subcommand)] + command: Command +} + +#[derive(Debug, Subcommand)] +enum Command { + /// To instrument a Wasm application. + Instr(InstrArgs), + + /// To visualize the relationship between various structures in the module and its instructions + VisWasm { + /// The path to the Wasm module we want to visualize. + #[clap(short, long, value_parser)] + wasm: String, + + /// The path to output the visualization to. + #[clap(short, long, value_parser, default_value = "output/wasm.dot")] + output_path: String, + }, + + /// To visualize the generated behavior tree from the specified `whammy` + VisWhammy { + /// The path to the `whammy` file we want to visualize. + #[clap(short, long, value_parser)] + whammy: String, + + /// Whether to run the verifier on the specified whammy + #[clap(long, short, action, default_value = "false")] // TODO -- change this default value to true when I have this implemented + run_verifier: bool, + + /// The path to output the visualization to. + #[clap(short, long, value_parser, default_value = "output/vis.svg")] + output_path: String, + } +} + +// #[derive(Debug, Args)] +// struct GlobalOpts { +// // (not needed yet) +// } + +#[derive(Debug, Args)] +struct InstrArgs { /// The path to the application's Wasm module we want to instrument. #[clap(short, long, value_parser)] app: String, @@ -36,13 +95,11 @@ struct Args { virgil: bool, /// Whether to run the verifier on the specified whammy - #[clap(long, short, action, default_value = "true")] + #[clap(long, short, action, default_value = "false")] // TODO -- change this default value to true when I have this implemented run_verifier: bool } fn main() { - // TODO add subcommands for virgil/wasm with different options per subcommand - // https://github.com/clap-rs/clap/blob/4e07b438584bb8a19e37599d4c5b11797bec5579/examples/git.rs if let Err(e) = try_main() { eprintln!("error: {}", e); for c in e.iter_chain().skip(1) { @@ -57,57 +114,195 @@ fn try_main() -> Result<(), failure::Error> { setup_logger(); // Get information from user command line args - let args = Args::parse(); - let app_wasm_path = args.app; - let whammy_path = args.whammy; - let whammy = std::fs::read_to_string(&whammy_path); - let output_wasm_path = args.output_path; + let cli = WhammCli::parse(); + + match cli.command { + Command::Instr(args) => { + run_instr(args.app, args.whammy, args.output_path, args.virgil, args.run_verifier); + } + Command::VisWasm {wasm, output_path} => { + run_vis_wasm(wasm, output_path); + } + Command::VisWhammy {whammy, run_verifier, output_path} => { + run_vis_whammy(whammy, run_verifier, output_path); + } + } - let emit_virgil = args.virgil; - let run_verifier = args.run_verifier; + Ok(()) +} - match whammy { - Ok(unparsed_str) => { - // Parse the script and build the AST - let mut whamm = match parse_script(unparsed_str) { - Ok(ast) => { - info!("successfully parsed"); - ast +fn run_instr(app_wasm_path: String, whammy_path: String, output_wasm_path: String, emit_virgil: bool, run_verifier: bool) { + let mut whamm = get_whammy_ast(&whammy_path); + let symbol_table = get_symbol_table(&whamm, run_verifier); + let (behavior_tree, simple_ast) = build_behavior(&whamm); + + // Read app Wasm into Walrus module + let _config = walrus::ModuleConfig::new(); + let app_wasm = Module::from_file(&app_wasm_path).unwrap(); + + // Configure the emitter based on target instrumentation code format + let mut emitter = if emit_virgil { + unimplemented!(); + } else { + WasmRewritingEmitter::new( + app_wasm, + symbol_table + ) + }; + + // Phase 0 of instrumentation (emit globals and provided fns) + let mut init = InitGenerator { + emitter: Box::new(&mut emitter), + context_name: "".to_string(), + }; + init.run(&mut whamm); + + // Phase 1 of instrumentation (actually emits the instrumentation code) + // This structure is necessary since we need to have the fns/globals injected (a single time) + // and ready to use in every body/predicate. + let mut instr = InstrGenerator { + tree: &behavior_tree, + emitter: Box::new(&mut emitter), + ast: simple_ast, + context_name: "".to_string(), + curr_provider_name: "".to_string(), + curr_package_name: "".to_string(), + curr_event_name: "".to_string(), + curr_probe_name: "".to_string(), + curr_probe: None, + }; + instr.run(&behavior_tree); + + emitter.dump_to_file(output_wasm_path); +} + +fn run_vis_wasm(wasm_path: String, output_path: String) { + // Read app Wasm into Walrus module + let _config = walrus::ModuleConfig::new(); + let app_wasm = Module::from_file(&wasm_path).unwrap(); + + match app_wasm.write_graphviz_dot(output_path.clone()) { + Ok(_) => { + match std::fs::read_to_string(&output_path.clone()) { + Ok(dot_str) => { + let svg_path = format!("{}.svg", output_path.clone()); + + match exec_dot( + dot_str, + vec![Format::Svg.into(), CommandArg::Output(svg_path.clone())] + ) { + Err(e) => { + println!("{}", e.to_string()); + exit(1); + } + _ => {} + } + + match opener::open(svg_path.clone()) { + Err(err) => { + error!("Could not open visualization of wasm at: {}", svg_path); + error!("{:?}", err) + } + _ => {} + } }, Err(error) => { - error!("Parse failed: {}", error); + error!("Cannot read specified file {}: {}", output_path, error); exit(1); } }; + } + Err(_) => {} + } + exit(0); +} - // Build the symbol table from the AST - let mut symbol_table = verify(&whamm, run_verifier); - println!("{:#?}", symbol_table); - symbol_table.reset(); +fn run_vis_whammy(whammy_path: String, run_verifier: bool, output_path: String) { + let whamm = get_whammy_ast(&whammy_path); + verify_ast(&whamm, run_verifier); + let (behavior_tree, ..) = build_behavior(&whamm); - // Read app Wasm into Walrus module - let _config = walrus::ModuleConfig::new(); - let app_wasm = walrus::Module::from_file(&app_wasm_path).unwrap(); + let path = match get_pb(&PathBuf::from(output_path.clone())) { + Ok(pb) => { + pb + } + Err(_) => { + exit(1) + } + }; - // Configure the emitter based on target instrumentation code format - let emitter = if emit_virgil { - unimplemented!(); - } else { - WasmRewritingEmitter::new( - app_wasm, - symbol_table - )}; + // visualization_to_file(&behavior_tree, path) + match visualization_to_file(&behavior_tree, path) { + Ok(_) => { + match opener::open(output_path.clone()) { + Err(err) => { + error!("Could not open visualization tree at: {}", output_path); + error!("{:?}", err) + } + _ => {} + } + } + Err(_) => {} + } + exit(0); +} + +fn get_symbol_table(ast: &Whamm, run_verifier: bool) -> SymbolTable { + let st = build_symbol_table(&ast); + verify_ast(ast, run_verifier); + st +} - let mut generator = CodeGenerator::new(Box::new(emitter)); +fn verify_ast(ast: &Whamm, run_verifier: bool) { + if run_verifier { + if !verify(ast) { + error!("AST failed verification!"); + exit(1); + } + } +} - generator.generate(&mut whamm); - generator.dump_to_file(output_wasm_path); +fn get_whammy_ast(whammy_path: &String) -> Whamm { + match std::fs::read_to_string(&whammy_path) { + Ok(unparsed_str) => { + // Parse the script and build the AST + match parse_script(unparsed_str) { + Ok(ast) => { + info!("successfully parsed"); + return ast; + }, + Err(error) => { + error!("Parse failed: {}", error); + exit(1); + } + }; }, Err(error) => { error!("Cannot read specified file {}: {}", whammy_path, error); exit(1); } } +} - Ok(()) +fn build_behavior(whamm: &Whamm) -> (BehaviorTree, SimpleAST) { + // Build the behavior tree from the AST + let (mut behavior, simple_ast) = build_behavior_tree(&whamm); + behavior.reset(); + + (behavior, simple_ast) +} + +pub(crate) fn get_pb(file_pb: &PathBuf) -> Result { + if file_pb.is_relative() { + match get_project_root() { + Ok(r) => { + let mut full_path = r.clone(); + full_path.push(file_pb); + Ok(full_path) + } + Err(e) => Err(format!("the root folder does not exist: {:?}", e)), + } + } else { + Ok(file_pb.clone()) + } } \ No newline at end of file diff --git a/src/parser/print_visitor.rs b/src/parser/print_visitor.rs index 7777be5c..0a8e540f 100644 --- a/src/parser/print_visitor.rs +++ b/src/parser/print_visitor.rs @@ -3,7 +3,7 @@ use parser_types::{WhammVisitor}; use std::cmp; use std::collections::HashMap; -use crate::parser::types::{DataType, Whammy, Whamm, Expr, Function, Module, Op, Probe, Provider, Statement, Value}; +use crate::parser::types::{DataType, Whammy, Whamm, Expr, Event, Package, Op, Probe, Provider, Statement, Value, Global}; const NL: &str = "\n"; @@ -23,11 +23,11 @@ impl AsStrVisitor { "--".repeat(cmp::max(0, self.indent as usize)) } - fn visit_globals(&mut self, globals: &HashMap)>) -> String { + fn visit_globals(&mut self, globals: &HashMap) -> String { let mut s = "".to_string(); - for (name, (_ty, _var_id, val)) in globals.iter() { + for (name, global) in globals.iter() { s += &format!("{}{} := ", self.get_indent(), name); - match val { + match &global.value { Some(v) => s += &format!("{}{}", self.visit_value(v), NL), None => s += &format!("None{}", NL) } @@ -42,7 +42,7 @@ impl WhammVisitor for AsStrVisitor { // print fns if whamm.fns.len() > 0 { - s += &format!("Whamm functions:{}", NL); + s += &format!("Whamm events:{}", NL); self.increase_indent(); for f in whamm.fns.iter() { s += &format!("{}{}", self.visit_fn(f), NL); @@ -54,9 +54,9 @@ impl WhammVisitor for AsStrVisitor { if whamm.globals.len() > 0 { s += &format!("Whamm globals:{}", NL); self.increase_indent(); - for (name, (_ty, _var_id, val)) in whamm.globals.iter() { + for (name, global) in whamm.globals.iter() { s += &format!("{}{} := ", self.get_indent(), name); - match val { + match &global.value { Some(v) => s += &format!("{}{}", self.visit_value(v), NL), None => s += &format!("None{}", NL) } @@ -82,7 +82,7 @@ impl WhammVisitor for AsStrVisitor { // print fns if whammy.fns.len() > 0 { - s += &format!("{} whammy functions:{}", self.get_indent(), NL); + s += &format!("{} whammy events:{}", self.get_indent(), NL); self.increase_indent(); for f in whammy.fns.iter() { s += &format!("{}{}{}", self.get_indent(), self.visit_fn(f), NL); @@ -120,7 +120,7 @@ impl WhammVisitor for AsStrVisitor { // print fns if provider.fns.len() > 0 { - s += &format!("{} functions:{}", self.get_indent(), NL); + s += &format!("{} events:{}", self.get_indent(), NL); self.increase_indent(); for f in provider.fns.iter() { s += &format!("{}{}{}", self.get_indent(), self.visit_fn(f), NL); @@ -136,15 +136,15 @@ impl WhammVisitor for AsStrVisitor { self.decrease_indent(); } - // print modules - if provider.modules.len() > 0 { - s += &format!("{} modules:{}", self.get_indent(), NL); - for (name, module) in provider.modules.iter() { + // print packages + if provider.packages.len() > 0 { + s += &format!("{} packages:{}", self.get_indent(), NL); + for (name, package) in provider.packages.iter() { self.increase_indent(); s += &format!("{} `{}` {{{}", self.get_indent(), name, NL); self.increase_indent(); - s += &format!("{}", self.visit_module(module)); + s += &format!("{}", self.visit_package(package)); self.decrease_indent(); s += &format!("{} }}{}", self.get_indent(), NL); @@ -155,35 +155,35 @@ impl WhammVisitor for AsStrVisitor { s } - fn visit_module(&mut self, module: &Module) -> String { + fn visit_package(&mut self, package: &Package) -> String { let mut s = "".to_string(); // print fns - if module.fns.len() > 0 { - s += &format!("{} module fns:{}", self.get_indent(), NL); + if package.fns.len() > 0 { + s += &format!("{} package fns:{}", self.get_indent(), NL); self.increase_indent(); - for f in module.fns.iter() { + for f in package.fns.iter() { s += &format!("{}{}{}", self.get_indent(), self.visit_fn(f), NL); } self.decrease_indent(); } // print globals - if module.globals.len() > 0 { - s += &format!("{} module globals:{}", self.get_indent(), NL); + if package.globals.len() > 0 { + s += &format!("{} package globals:{}", self.get_indent(), NL); self.increase_indent(); - self.visit_globals(&module.globals); + self.visit_globals(&package.globals); self.decrease_indent(); } - // print functions - s += &format!("{} module functions:{}", self.get_indent(), NL); - for (name, function) in module.functions.iter() { + // print events + s += &format!("{} package events:{}", self.get_indent(), NL); + for (name, event) in package.events.iter() { self.increase_indent(); s += &format!("{} `{}` {{{}", self.get_indent(), name, NL); self.increase_indent(); - s += &format!("{}", self.visit_function(function)); + s += &format!("{}", self.visit_event(event)); self.decrease_indent(); s += &format!("{} }}{}", self.get_indent(), NL); @@ -193,31 +193,31 @@ impl WhammVisitor for AsStrVisitor { s } - fn visit_function(&mut self, function: &Function) -> String { + fn visit_event(&mut self, event: &Event) -> String { let mut s = "".to_string(); // print fns - if function.fns.len() > 0 { - s += &format!("{} function fns:{}", self.get_indent(), NL); + if event.fns.len() > 0 { + s += &format!("{} event fns:{}", self.get_indent(), NL); self.increase_indent(); - for f in function.fns.iter() { + for f in event.fns.iter() { s += &format!("{}{}{}", self.get_indent(), self.visit_fn(f), NL); } self.decrease_indent(); } // print globals - if function.globals.len() > 0 { - s += &format!("{} function globals:{}", self.get_indent(), NL); + if event.globals.len() > 0 { + s += &format!("{} event globals:{}", self.get_indent(), NL); self.increase_indent(); - self.visit_globals(&function.globals); + self.visit_globals(&event.globals); self.decrease_indent(); } // print probes - if function.probe_map.len() > 0 { - s += &format!("{} function probe_map:{}", self.get_indent(), NL); - for (name, probes) in function.probe_map.iter() { + if event.probe_map.len() > 0 { + s += &format!("{} event probe_map:{}", self.get_indent(), NL); + for (name, probes) in event.probe_map.iter() { self.increase_indent(); s += &format!("{} {}: ", self.get_indent(), name); @@ -285,6 +285,10 @@ impl WhammVisitor for AsStrVisitor { s } + // fn visit_predicate(&mut self, _predicate: &Expr) -> String { + // unimplemented!() + // } + fn visit_fn(&mut self, f: &parser_types::Fn) -> String { let mut s = "".to_string(); diff --git a/src/parser/tests.rs b/src/parser/tests.rs index 200303fd..0b91dd22 100644 --- a/src/parser/tests.rs +++ b/src/parser/tests.rs @@ -239,22 +239,22 @@ wasm::call:alt / assert_eq!(0, provider.globals.len()); assert_eq!(0, provider.fns.len()); - assert_eq!(1, provider.modules.len()); - let module = provider.modules.get("bytecode").unwrap(); - assert_eq!("bytecode", module.name); - assert_eq!(0, module.globals.len()); - assert_eq!(0, module.fns.len()); - - assert_eq!(1, module.functions.len()); - let function = module.functions.get("call").unwrap(); - assert_eq!("call", function.name); - assert_eq!(4, function.globals.len()); - assert_eq!(0, function.fns.len()); - - assert_eq!(1, function.probe_map.len()); - assert_eq!(1, function.probe_map.get("alt").unwrap().len()); - - let probe = function.probe_map.get("alt").unwrap().get(0).unwrap(); + assert_eq!(1, provider.packages.len()); + let package = provider.packages.get("bytecode").unwrap(); + assert_eq!("bytecode", package.name); + assert_eq!(0, package.globals.len()); + assert_eq!(0, package.fns.len()); + + assert_eq!(1, package.events.len()); + let event = package.events.get("call").unwrap(); + assert_eq!("call", event.name); + assert_eq!(4, event.globals.len()); + assert_eq!(0, event.fns.len()); + + assert_eq!(1, event.probe_map.len()); + assert_eq!(1, event.probe_map.get("alt").unwrap().len()); + + let probe = event.probe_map.get("alt").unwrap().get(0).unwrap(); assert_eq!(0, probe.globals.len()); assert_eq!(0, probe.fns.len()); assert_eq!("alt", probe.name); diff --git a/src/parser/types.rs b/src/parser/types.rs index 8ce05540..eb93c983 100644 --- a/src/parser/types.rs +++ b/src/parser/types.rs @@ -72,7 +72,7 @@ pub enum Value { // Statements -#[derive(Clone)] +#[derive(Clone, Debug)] pub enum Statement { Assign { var_id: Expr, // Should be VarId @@ -101,7 +101,7 @@ pub enum Expr { args: Option>> }, VarId { - // is_provided: bool, // TODO -- do I need this? + // is_comp_provided: bool, // TODO -- do I need this? name: String }, Primitive { // Type is val.ty @@ -130,18 +130,28 @@ pub enum Expr { // } // Functions +#[derive(Clone, Debug)] pub struct Fn { - pub(crate) is_provided: bool, + pub(crate) is_comp_provided: bool, pub(crate) name: String, pub(crate) params: Vec<(Expr, DataType)>, // Expr::VarId -> DataType pub(crate) return_ty: Option, pub(crate) body: Option> } +#[derive(Clone, Debug)] +pub struct Global { + pub is_comp_provided: bool, + + pub ty: DataType, + pub var_name: Expr, // Should be VarId + pub value: Option +} + pub struct Whamm { pub provided_probes: HashMap>>>, - pub(crate) fns: Vec, // Comp-provided - pub globals: HashMap)>, // Comp-provided, should be VarId + pub(crate) fns: Vec, // Comp-provided + pub globals: HashMap, // Comp-provided pub whammys: Vec } @@ -179,7 +189,7 @@ impl Whamm { ) ]; let strcmp_fn = Fn { - is_provided: true, + is_comp_provided: true, name: "strcmp".to_string(), params, return_ty: Some(DataType::Boolean), @@ -188,18 +198,18 @@ impl Whamm { vec![ strcmp_fn ] } - fn get_provided_globals() -> HashMap)> { + fn get_provided_globals() -> HashMap { HashMap::new() } fn init_provided_probes(&mut self) { - // A giant data structure to encode the available `providers->modules->functions->probe_types` + // A giant data structure to encode the available `providers->packages->events->probe_types` self.init_core_probes(); self.init_wasm_probes(); } fn init_core_probes(&mut self) { - // Not really any modules or functions for a core probe...just two types! + // Not really any packages or events for a core probe...just two types! self.provided_probes.insert("core".to_string(), HashMap::from([ ("".to_string(), HashMap::from([ ("".to_string(), vec![ @@ -211,9 +221,9 @@ impl Whamm { } fn init_wasm_probes(&mut self) { - // This list of functions matches up with bytecodes supported by Walrus. + // This list of events matches up with bytecodes supported by Walrus. // See: https://docs.rs/walrus/latest/walrus/ir/ - let wasm_bytecode_functions = vec![ + let wasm_bytecode_events = vec![ "Block".to_string(), "Loop".to_string(), "Call".to_string(), @@ -271,8 +281,8 @@ impl Whamm { let mut wasm_bytecode_map = HashMap::new(); // Build out the wasm_bytecode_map - for function in wasm_bytecode_functions { - wasm_bytecode_map.insert(function, wasm_bytecode_probe_types.clone()); + for event in wasm_bytecode_events { + wasm_bytecode_map.insert(event, wasm_bytecode_probe_types.clone()); } self.provided_probes.insert("wasm".to_string(), HashMap::from([ @@ -292,8 +302,8 @@ pub struct Whammy { pub name: String, /// The providers of the probes that have been used in the Whammy. pub providers: HashMap, - pub fns: Vec, // User-provided - pub globals: HashMap)>, // User-provided, should be VarId + pub fns: Vec, // User-provided + pub globals: HashMap, // User-provided, should be VarId } impl Whammy { pub fn new() -> Self { @@ -305,7 +315,7 @@ impl Whammy { } } - /// Iterates over all of the matched providers, modules, functions, and probe names + /// Iterates over all of the matched providers, packages, events, and probe names /// to add a copy of the user-defined Probe for each of them. pub fn add_probe(&mut self, provided_probes: &HashMap>>>, prov_patt: &str, mod_patt: &str, func_patt: &str, nm_patt: &str, @@ -321,30 +331,30 @@ impl Whammy { self.providers.get_mut(&provider_str.to_lowercase()).unwrap() } }; - for module_str in Module::get_matches(provided_probes,provider_str, mod_patt).iter() { - // Does module exist yet? - let module = match provider.modules.get_mut(module_str) { + for package_str in Package::get_matches(provided_probes,provider_str, mod_patt).iter() { + // Does package exist yet? + let package = match provider.packages.get_mut(package_str) { Some(m) => m, None => { - // add the module! - let new_mod = Module::new(module_str.to_lowercase().to_string()); - provider.modules.insert(module_str.to_lowercase().to_string(), new_mod); - provider.modules.get_mut(&module_str.to_lowercase()).unwrap() + // add the package! + let new_mod = Package::new(package_str.to_lowercase().to_string()); + provider.packages.insert(package_str.to_lowercase().to_string(), new_mod); + provider.packages.get_mut(&package_str.to_lowercase()).unwrap() } }; - for function_str in Function::get_matches(provided_probes, provider_str, module_str, func_patt).iter() { - // Does function exist yet? - let function = match module.functions.get_mut(function_str) { + for event_str in Event::get_matches(provided_probes, provider_str, package_str, func_patt).iter() { + // Does event exist yet? + let event = match package.events.get_mut(event_str) { Some(f) => f, None => { - // add the module! - let new_fn = Function::new(function_str.to_lowercase().to_string()); - module.functions.insert(function_str.to_lowercase().to_string(), new_fn); - module.functions.get_mut(&function_str.to_lowercase()).unwrap() + // add the package! + let new_fn = Event::new(event_str.to_lowercase().to_string()); + package.events.insert(event_str.to_lowercase().to_string(), new_fn); + package.events.get_mut(&event_str.to_lowercase()).unwrap() } }; - for name_str in Probe::get_matches(provided_probes, provider_str, module_str, function_str, nm_patt).iter() { - function.insert_probe(name_str.to_string(), Probe::new(nm_patt.to_string(), predicate.clone(), body.clone())); + for name_str in Probe::get_matches(provided_probes, provider_str, package_str, event_str, nm_patt).iter() { + event.insert_probe(name_str.to_string(), Probe::new(nm_patt.to_string(), predicate.clone(), body.clone())); } } } @@ -354,12 +364,12 @@ impl Whammy { pub struct Provider { pub name: String, - pub fns: Vec, // Comp-provided - pub globals: HashMap)>, // Comp-provided, should be VarId + pub fns: Vec, // Comp-provided + pub globals: HashMap, // Comp-provided - /// The modules of the probes that have been used in the Whammy. - /// These will be sub-modules of this Provider. - pub modules: HashMap + /// The packages of the probes that have been used in the Whammy. + /// These will be sub-packages of this Provider. + pub packages: HashMap } impl Provider { pub fn new(name: String) -> Self { @@ -369,7 +379,7 @@ impl Provider { name, fns, globals, - modules: HashMap::new() + packages: HashMap::new() } } @@ -377,7 +387,7 @@ impl Provider { vec![] } - fn get_provided_globals(_name: &String) -> HashMap)> { + fn get_provided_globals(_name: &String) -> HashMap { HashMap::new() } @@ -396,24 +406,24 @@ impl Provider { } } -pub struct Module { +pub struct Package { pub name: String, - pub fns: Vec, // Comp-provided - pub globals: HashMap)>, // Comp-provided, should be VarId + pub fns: Vec, // Comp-provided + pub globals: HashMap, // Comp-provided - /// The functions of the probes that have been used in the Whammy. - /// These will be sub-functions of this Module. - pub functions: HashMap + /// The events of the probes that have been used in the Whammy. + /// These will be sub-events of this Package. + pub events: HashMap } -impl Module { +impl Package { pub fn new(name: String) -> Self { - let fns = Module::get_provided_fns(&name); - let globals = Module::get_provided_globals(&name); - Module { + let fns = Package::get_provided_fns(&name); + let globals = Package::get_provided_globals(&name); + Package { name, fns, globals, - functions: HashMap::new() + events: HashMap::new() } } @@ -421,17 +431,17 @@ impl Module { vec![] } - fn get_provided_globals(_name: &String) -> HashMap)> { + fn get_provided_globals(_name: &String) -> HashMap { HashMap::new() } - /// Get the Module names that match the passed glob pattern + /// Get the Package names that match the passed glob pattern pub fn get_matches(provided_probes: &HashMap>>>, provider: &str, mod_patt: &str) -> Vec { let glob = Pattern::new(&mod_patt.to_lowercase()).unwrap(); let mut matches = vec![]; - for (mod_name, _module) in provided_probes.get(provider).unwrap().into_iter() { + for (mod_name, _package) in provided_probes.get(provider).unwrap().into_iter() { if glob.matches(&mod_name.to_lowercase()) { matches.push(mod_name.clone()); } @@ -441,17 +451,17 @@ impl Module { } } -pub struct Function { +pub struct Event { pub name: String, - pub fns: Vec, // Comp-provided - pub globals: HashMap)>, // Comp-provided, should be VarId + pub fns: Vec, // Comp-provided + pub globals: HashMap, // Comp-provided pub probe_map: HashMap> } -impl Function { +impl Event { pub fn new(name: String) -> Self { - let fns = Function::get_provided_fns(&name); - let globals = Function::get_provided_globals(&name); - Function { + let fns = Event::get_provided_fns(&name); + let globals = Event::get_provided_globals(&name); + Event { name, fns, globals, @@ -463,34 +473,54 @@ impl Function { vec![] } - fn get_provided_globals(name: &String) -> HashMap)> { + fn get_provided_globals(name: &String) -> HashMap { let mut globals = HashMap::new(); if name.to_lowercase() == "call" { - // Add in provided globals for the "call" function - globals.insert("target_fn_type".to_string(),(DataType::Str, Expr::VarId { - name: "target_fn_type".to_string(), - }, None)); - globals.insert("target_imp_module".to_string(), (DataType::Str, Expr::VarId { - name: "target_imp_module".to_string(), - }, None)); - globals.insert("target_imp_name".to_string(), (DataType::Str, Expr::VarId { - name: "target_imp_name".to_string(), - }, None)); - globals.insert("new_target_fn_name".to_string(), (DataType::Str, Expr::VarId { - name: "new_target_fn_name".to_string(), - }, None)); + // Add in provided globals for the "call" event + globals.insert("target_fn_type".to_string(),Global { + is_comp_provided: true, + ty: DataType::Str, + var_name: Expr::VarId { + name: "target_fn_type".to_string(), + }, + value: None + }); + globals.insert("target_imp_module".to_string(),Global { + is_comp_provided: true, + ty: DataType::Str, + var_name: Expr::VarId { + name: "target_imp_module".to_string(), + }, + value: None + }); + globals.insert("target_imp_name".to_string(),Global { + is_comp_provided: true, + ty: DataType::Str, + var_name: Expr::VarId { + name: "target_imp_name".to_string(), + }, + value: None + }); + globals.insert("new_target_fn_name".to_string(),Global { + is_comp_provided: true, + ty: DataType::Str, + var_name: Expr::VarId { + name: "new_target_fn_name".to_string(), + }, + value: None + }); } globals } - /// Get the Function names that match the passed glob pattern - pub fn get_matches(provided_probes: &HashMap>>>, provider: &str, module: &str, func_patt: &str) -> Vec { + /// Get the Event names that match the passed glob pattern + pub fn get_matches(provided_probes: &HashMap>>>, provider: &str, package: &str, func_patt: &str) -> Vec { let glob = Pattern::new(&func_patt.to_lowercase()).unwrap(); let mut matches = vec![]; - for (fn_name, _module) in provided_probes.get(provider).unwrap().get(module).unwrap().into_iter() { + for (fn_name, _package) in provided_probes.get(provider).unwrap().get(package).unwrap().into_iter() { if glob.matches(&fn_name.to_lowercase()) { matches.push(fn_name.clone()); } @@ -513,10 +543,11 @@ impl Function { } } +#[derive(Clone, Debug)] pub struct Probe { pub name: String, - pub fns: Vec, // Comp-provided - pub globals: HashMap)>, // Comp-provided, should be VarId + pub fns: Vec, // Comp-provided + pub globals: HashMap, // Comp-provided pub predicate: Option, pub body: Option> @@ -539,17 +570,17 @@ impl Probe { vec![] } - fn get_provided_globals(_name: &String) -> HashMap)> { + fn get_provided_globals(_name: &String) -> HashMap { HashMap::new() } /// Get the Probe names that match the passed glob pattern - pub fn get_matches(provided_probes: &HashMap>>>, provider: &str, module: &str, function: &str, probe_patt: &str) -> Vec { + pub fn get_matches(provided_probes: &HashMap>>>, provider: &str, package: &str, event: &str, probe_patt: &str) -> Vec { let glob = Pattern::new(&probe_patt.to_lowercase()).unwrap(); let mut matches = vec![]; - for p_name in provided_probes.get(provider).unwrap().get(module).unwrap().get(function).unwrap().iter() { + for p_name in provided_probes.get(provider).unwrap().get(package).unwrap().get(event).unwrap().iter() { if glob.matches(&p_name.to_lowercase()) { matches.push(p_name.clone()); } @@ -595,9 +626,10 @@ pub trait WhammVisitor { fn visit_whamm(&mut self, whamm: &Whamm) -> T; fn visit_whammy(&mut self, whammy: &Whammy) -> T; fn visit_provider(&mut self, provider: &Provider) -> T; - fn visit_module(&mut self, module: &Module) -> T; - fn visit_function(&mut self, function: &Function) -> T; + fn visit_package(&mut self, package: &Package) -> T; + fn visit_event(&mut self, event: &Event) -> T; fn visit_probe(&mut self, probe: &Probe) -> T; + // fn visit_predicate(&mut self, predicate: &Expr) -> T; fn visit_fn(&mut self, f: &Fn) -> T; fn visit_formal_param(&mut self, param: &(Expr, DataType)) -> T; fn visit_stmt(&mut self, stmt: &Statement) -> T; @@ -612,9 +644,10 @@ pub trait WhammVisitorMut { fn visit_whamm(&mut self, whamm: &mut Whamm) -> T; fn visit_whammy(&mut self, whammy: &mut Whammy) -> T; fn visit_provider(&mut self, provider: &mut Provider) -> T; - fn visit_module(&mut self, module: &mut Module) -> T; - fn visit_function(&mut self, function: &mut Function) -> T; + fn visit_package(&mut self, package: &mut Package) -> T; + fn visit_event(&mut self, event: &mut Event) -> T; fn visit_probe(&mut self, probe: &mut Probe) -> T; + // fn visit_predicate(&mut self, predicate: &mut Expr) -> T; fn visit_fn(&mut self, f: &mut Fn) -> T; fn visit_formal_param(&mut self, param: &mut (Expr, DataType)) -> T; fn visit_stmt(&mut self, stmt: &mut Statement) -> T; diff --git a/src/parser/whamm_parser.rs b/src/parser/whamm_parser.rs index 22ef8dd1..30914444 100644 --- a/src/parser/whamm_parser.rs +++ b/src/parser/whamm_parser.rs @@ -50,8 +50,8 @@ fn process_pair(whamm: &mut Whamm, whammy_count: usize, pair: Pair) { // Get out the spec info let provider = spec_split.next().unwrap(); - let module = spec_split.next().unwrap(); - let function = spec_split.next().unwrap(); + let package = spec_split.next().unwrap(); + let event = spec_split.next().unwrap(); let name = spec_split.next().unwrap(); // Get out the probe predicate/body contents @@ -91,7 +91,7 @@ fn process_pair(whamm: &mut Whamm, whammy_count: usize, pair: Pair) { // Add probe definition to the whammy let whammy: &mut Whammy = whamm.whammys.get_mut(whammy_count).unwrap(); - whammy.add_probe(&whamm.provided_probes, provider, module, function, name, this_predicate, this_body); + whammy.add_probe(&whamm.provided_probes, provider, package, event, name, this_predicate, this_body); trace!("Exiting probe_def"); }, @@ -356,7 +356,9 @@ pub fn parse_script(script: String) -> Result { // debug!("Parsed: {:#?}", res); match res { - Ok(ast) => Ok(ast), + Ok(ast) => { + Ok(ast) + }, Err(e) => Err(e.to_string()), } }, diff --git a/src/verifier/builder_visitor.rs b/src/verifier/builder_visitor.rs index 5a4ef84e..13d29b55 100644 --- a/src/verifier/builder_visitor.rs +++ b/src/verifier/builder_visitor.rs @@ -1,9 +1,10 @@ use std::collections::HashMap; use crate::parser::types as parser_types; -use parser_types::{DataType, Whammy, Whamm, WhammVisitor, Expr, Fn, Function, Module, Op, Probe, Provider, Statement, Value}; +use parser_types::{DataType, Whammy, Whamm, WhammVisitor, Expr, Fn, Event, Package, Op, Probe, Provider, Statement, Value}; use crate::verifier::types::{Record, ScopeType, SymbolTable}; use log::{error, trace}; +use crate::parser::types::Global; pub struct SymbolTableBuilder { pub table: SymbolTable, @@ -12,8 +13,8 @@ pub struct SymbolTableBuilder { curr_whamm: Option, // indexes into this::table::records curr_whammy: Option, // indexes into this::table::records curr_provider: Option, // indexes into this::table::records - curr_module: Option, // indexes into this::table::records - curr_function: Option, // indexes into this::table::records + curr_package: Option, // indexes into this::table::records + curr_event: Option, // indexes into this::table::records curr_probe: Option, // indexes into this::table::records curr_fn: Option, // indexes into this::table::records @@ -25,8 +26,8 @@ impl SymbolTableBuilder { curr_whamm: None, curr_whammy: None, curr_provider: None, - curr_module: None, - curr_function: None, + curr_package: None, + curr_event: None, curr_probe: None, curr_fn: None, } @@ -77,7 +78,7 @@ impl SymbolTableBuilder { name: provider.name.clone(), fns: vec![], globals: vec![], - modules: vec![], + packages: vec![], }; // Add provider to scope @@ -101,72 +102,72 @@ impl SymbolTableBuilder { self.table.set_curr_scope_info(provider.name.clone(), ScopeType::Provider); } - fn add_module(&mut self, module: &Module) { - if self.table.lookup(&module.name).is_some() { - error!("duplicated module [ {} ]", &module.name); + fn add_package(&mut self, package: &Package) { + if self.table.lookup(&package.name).is_some() { + error!("duplicated package [ {} ]", &package.name); } // create record - let module_rec = Record::Module { - name: module.name.clone(), + let package_rec = Record::Package { + name: package.name.clone(), fns: vec![], globals: vec![], - functions: vec![], + events: vec![], }; - // Add module to scope - let id = self.table.put(module.name.clone(), module_rec); + // Add package to scope + let id = self.table.put(package.name.clone(), package_rec); - // Add module to current provider record + // Add package to current provider record match self.table.get_record_mut(&self.curr_provider.unwrap()).unwrap() { - Record::Provider { modules, .. } => { - modules.push(id.clone()); + Record::Provider { packages, .. } => { + packages.push(id.clone()); } _ => { unreachable!() } } - // enter module scope + // enter package scope self.table.enter_scope(); - self.curr_module = Some(id.clone()); + self.curr_package = Some(id.clone()); // set scope name and type - self.table.set_curr_scope_info(module.name.clone(), ScopeType::Module); + self.table.set_curr_scope_info(package.name.clone(), ScopeType::Package); } - fn add_function(&mut self, function: &Function) { - if self.table.lookup(&function.name).is_some() { - error!("duplicated function [ {} ]", &function.name); + fn add_event(&mut self, event: &Event) { + if self.table.lookup(&event.name).is_some() { + error!("duplicated event [ {} ]", &event.name); } // create record - let function_rec = Record::Function { - name: function.name.clone(), + let event_rec = Record::Event { + name: event.name.clone(), fns: vec![], globals: vec![], probes: vec![], }; - // Add function to scope - let id = self.table.put(function.name.clone(), function_rec); + // Add event to scope + let id = self.table.put(event.name.clone(), event_rec); - // Add function to current module record - match self.table.get_record_mut(&self.curr_module.unwrap()).unwrap() { - Record::Module { functions, .. } => { - functions.push(id.clone()); + // Add event to current package record + match self.table.get_record_mut(&self.curr_package.unwrap()).unwrap() { + Record::Package { events, .. } => { + events.push(id.clone()); } _ => { unreachable!() } } - // enter function scope + // enter event scope self.table.enter_scope(); - self.curr_function = Some(id.clone()); + self.curr_event = Some(id.clone()); // set scope name and type - self.table.set_curr_scope_info(function.name.clone(), ScopeType::Function); + self.table.set_curr_scope_info(event.name.clone(), ScopeType::Event); } fn add_probe(&mut self, probe: &Probe) { @@ -184,9 +185,9 @@ impl SymbolTableBuilder { // Add probe to scope let id = self.table.put(probe.name.clone(), probe_rec); - // Add probe to current function record - match self.table.get_record_mut(&self.curr_function.unwrap()) { - Some(Record::Function { probes, .. }) => { + // Add probe to current event record + match self.table.get_record_mut(&self.curr_event.unwrap()) { + Some(Record::Event { probes, .. }) => { probes.push(id.clone()); } _ => { @@ -236,8 +237,8 @@ impl SymbolTableBuilder { Some(Record::Whamm { fns, .. }) | Some(Record::Whammy { fns, .. }) | Some(Record::Provider { fns, .. }) | - Some(Record::Module { fns, .. }) | - Some(Record::Function { fns, .. }) | + Some(Record::Package { fns, .. }) | + Some(Record::Event { fns, .. }) | Some(Record::Probe { fns, .. }) => { fns.push(id.clone()); } @@ -295,9 +296,9 @@ impl SymbolTableBuilder { self.add_fn_id_to_curr_rec(id); } - fn visit_globals(&mut self, globals: &HashMap)>) { - for (name, (ty, _expr, _val)) in globals.iter() { - self.add_global(ty.clone(), name.clone()); + fn visit_globals(&mut self, globals: &HashMap) { + for (name, global) in globals.iter() { + self.add_global(global.ty.clone(), name.clone()); } } } @@ -355,8 +356,8 @@ impl WhammVisitor<()> for SymbolTableBuilder { self.add_provider(provider); provider.fns.iter().for_each(| f | self.visit_fn(f) ); self.visit_globals(&provider.globals); - provider.modules.iter().for_each(| (_name, module) | { - self.visit_module(module) + provider.packages.iter().for_each(| (_name, package) | { + self.visit_package(package) }); trace!("Exiting: visit_provider"); @@ -364,38 +365,38 @@ impl WhammVisitor<()> for SymbolTableBuilder { self.curr_provider = None; } - fn visit_module(&mut self, module: &Module) -> () { - trace!("Entering: visit_module"); + fn visit_package(&mut self, package: &Package) -> () { + trace!("Entering: visit_package"); - self.add_module(module); - module.fns.iter().for_each(| f | self.visit_fn(f) ); - self.visit_globals(&module.globals); - module.functions.iter().for_each(| (_name, function) | { - self.visit_function(function) + self.add_package(package); + package.fns.iter().for_each(| f | self.visit_fn(f) ); + self.visit_globals(&package.globals); + package.events.iter().for_each(| (_name, event) | { + self.visit_event(event) }); - trace!("Exiting: visit_module"); + trace!("Exiting: visit_package"); self.table.exit_scope(); - self.curr_module = None; + self.curr_package = None; } - fn visit_function(&mut self, function: &Function) -> () { - trace!("Entering: visit_function"); + fn visit_event(&mut self, event: &Event) -> () { + trace!("Entering: visit_event"); - self.add_function(function); - function.fns.iter().for_each(| f | self.visit_fn(f) ); - self.visit_globals(&function.globals); + self.add_event(event); + event.fns.iter().for_each(| f | self.visit_fn(f) ); + self.visit_globals(&event.globals); // visit probe_map - function.probe_map.iter().for_each(| probes | { + event.probe_map.iter().for_each(| probes | { probes.1.iter().for_each(| probe | { self.visit_probe(probe); }); }); - trace!("Exiting: visit_function"); + trace!("Exiting: visit_event"); self.table.exit_scope(); - self.curr_function = None; + self.curr_event = None; } fn visit_probe(&mut self, probe: &Probe) -> () { @@ -412,6 +413,10 @@ impl WhammVisitor<()> for SymbolTableBuilder { self.curr_probe = None; } + // fn visit_predicate(&mut self, _predicate: &Expr) -> () { + // unimplemented!() + // } + fn visit_fn(&mut self, f: &Fn) -> () { trace!("Entering: visit_fn"); @@ -435,7 +440,7 @@ impl WhammVisitor<()> for SymbolTableBuilder { } fn visit_stmt(&mut self, _assign: &Statement) -> () { - // Not visiting function/probe bodies + // Not visiting event/probe bodies unreachable!() } diff --git a/src/verifier/tests.rs b/src/verifier/tests.rs index 708ee0e6..08ab60a4 100644 --- a/src/verifier/tests.rs +++ b/src/verifier/tests.rs @@ -30,7 +30,7 @@ pub fn test_build_table() { for script in VALID_SCRIPTS { match tests::get_ast(script) { Some(ast) => { - let table = verifier::verify(&ast, false); + let table = verifier::build_symbol_table(&ast); println!("{:#?}", table); }, None => { @@ -57,7 +57,7 @@ wasm::call:alt / match tests::get_ast(script) { Some(ast) => { - let table = verifier::verify(&ast, false); + let table = verifier::build_symbol_table(&ast); println!("{:#?}", table); // 7 scopes: whamm, strcmp, whammy, wasm, bytecode, call, alt diff --git a/src/verifier/types.rs b/src/verifier/types.rs index 31889183..7dd8dbcf 100644 --- a/src/verifier/types.rs +++ b/src/verifier/types.rs @@ -57,7 +57,7 @@ impl SymbolTable { }); } - pub fn enter_named_scope(&mut self, scope_name: &String) { + pub fn enter_named_scope(&mut self, scope_name: &String) -> bool { let curr = self.get_curr_scope_mut().unwrap(); let children = curr.children.clone(); @@ -78,9 +78,10 @@ impl SymbolTable { if let (Some(new_curr), Some(new_next)) = (new_curr_scope, new_next) { curr.next = new_next; self.curr_scope = new_curr; - } else { - error!("Could not find the specified scope by name: `{}`", scope_name); + return true; } + error!("Could not find the specified scope by name: `{}`", scope_name); + return false; } pub fn enter_scope(&mut self) { @@ -148,8 +149,8 @@ impl SymbolTable { Record::Whamm { .. } | Record::Whammy { .. } | Record::Provider { .. } | - Record::Module { .. } | - Record::Function { .. } | + Record::Package { .. } | + Record::Event { .. } | Record::Probe { .. } => { self.curr_rec = new_rec_id.clone(); } @@ -279,15 +280,45 @@ pub enum ScopeType { Whamm, Whammy, Provider, - Module, - Function, + Package, + Event, Probe, Fn, Null } +impl ScopeType { + pub fn to_string(&self) -> String { + return match self { + ScopeType::Whamm {..} => { + "Whamm".to_string() + }, + ScopeType::Whammy {..} => { + "Whammy".to_string() + }, + ScopeType::Provider {..} => { + "Provider".to_string() + }, + ScopeType::Package {..} => { + "Package".to_string() + }, + ScopeType::Event {..} => { + "Event".to_string() + }, + ScopeType::Probe {..} => { + "Probe".to_string() + }, + ScopeType::Fn {..} => { + "Fn".to_string() + }, + ScopeType::Null {..} => { + "Null".to_string() + } + } + } +} /// The usize values in the record fields index into the SymbolTable::records Vec. -#[derive(Debug)] +#[derive(Clone, Debug)] pub enum Record { Whamm { name: String, @@ -305,15 +336,15 @@ pub enum Record { name: String, fns: Vec, globals: Vec, - modules: Vec + packages: Vec }, - Module { + Package { name: String, fns: Vec, globals: Vec, - functions: Vec + events: Vec }, - Function { + Event { name: String, fns: Vec, globals: Vec, diff --git a/src/verifier/verifier.rs b/src/verifier/verifier.rs index 75ed04a8..099ffc00 100644 --- a/src/verifier/verifier.rs +++ b/src/verifier/verifier.rs @@ -2,20 +2,13 @@ use crate::parser::types::{Whamm, WhammVisitor}; use crate::verifier::builder_visitor::SymbolTableBuilder; use crate::verifier::types::SymbolTable; -pub fn verify(ast: &Whamm, verify: bool) -> SymbolTable { - if verify { - // TODO do typechecking! - unimplemented!() - } - build_symbol_table(&ast) -} - -// ================ -// = SYMBOL TABLE = -// ================ - -fn build_symbol_table(ast: &Whamm) -> SymbolTable { +pub fn build_symbol_table(ast: &Whamm) -> SymbolTable { let mut visitor = SymbolTableBuilder::new(); visitor.visit_whamm(ast); visitor.table } + +pub fn verify(_ast: &Whamm) -> bool { + // TODO do typechecking! + unimplemented!() +} diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 56fd0a7e..97fefc5c 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -1,9 +1,11 @@ use whamm::parser::whamm_parser::*; use whamm::parser::types::Whamm; -use whamm::verifier::verifier::verify; +use whamm::verifier::verifier::build_symbol_table; use glob::{glob, glob_with}; use log::{info, error, warn}; +use whamm::behavior::builder_visitor::{build_behavior_tree, SimpleAST}; +use whamm::behavior::tree::BehaviorTree; use whamm::verifier::types::SymbolTable; // ================= @@ -71,21 +73,26 @@ fn parse_all_scripts(scripts: Vec) -> Vec { whammys } -fn process_scripts(scripts: Vec) -> Vec<(Whamm, SymbolTable)> { +fn process_scripts(scripts: Vec) -> Vec<(Whamm, SymbolTable, BehaviorTree, SimpleAST)> { let asts = parse_all_scripts(scripts); // Build the symbol table from the AST let mut result = vec![]; for ast in asts { - let mut symbol_table = verify(&ast, false); + let mut symbol_table = build_symbol_table(&ast); symbol_table.reset(); - result.push((ast, symbol_table)); + + // Build the behavior tree from the AST + let (mut behavior, simple_ast) = build_behavior_tree(&ast); + behavior.reset(); + + result.push((ast, symbol_table, behavior, simple_ast)); } result } -pub fn setup_fault_injection() -> Vec<(Whamm, SymbolTable)> { +pub fn setup_fault_injection() -> Vec<(Whamm, SymbolTable, BehaviorTree, SimpleAST)> { setup_logger(); let scripts = get_test_scripts("fault_injection"); if scripts.len() == 0 { @@ -95,7 +102,7 @@ pub fn setup_fault_injection() -> Vec<(Whamm, SymbolTable)> { process_scripts(scripts) } -pub fn setup_wizard_monitors() -> Vec<(Whamm, SymbolTable)> { +pub fn setup_wizard_monitors() -> Vec<(Whamm, SymbolTable, BehaviorTree, SimpleAST)> { setup_logger(); let scripts = get_test_scripts("wizard_monitors"); if scripts.len() == 0 { @@ -105,7 +112,7 @@ pub fn setup_wizard_monitors() -> Vec<(Whamm, SymbolTable)> { process_scripts(scripts) } -pub fn setup_replay() -> Vec<(Whamm, SymbolTable)> { +pub fn setup_replay() -> Vec<(Whamm, SymbolTable, BehaviorTree, SimpleAST)> { setup_logger(); let scripts = get_test_scripts("replay"); if scripts.len() == 0 { diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 2e22fa0a..b51662ad 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -1,13 +1,14 @@ mod common; -use whamm::generator::emitters::{WasmRewritingEmitter}; -use whamm::generator::code_generator::{CodeGenerator}; use log::error; use std::fs; use std::process::{Command, Stdio}; use std::path::Path; use walrus::Module; +use whamm::generator::init_generator::InitGenerator; +use whamm::generator::emitters::{Emitter, WasmRewritingEmitter}; +use whamm::generator::instr_generator::InstrGenerator; const APP_WASM_PATH: &str = "tests/apps/users.wasm"; @@ -27,16 +28,35 @@ fn instrument_with_fault_injection() { let processed_scripts = common::setup_fault_injection(); assert!(processed_scripts.len() > 0); - for (mut whamm, symbol_table) in processed_scripts { + for (mut whamm, symbol_table, behavior, simple_ast) in processed_scripts { let app_wasm = get_wasm_module(); - let emitter = WasmRewritingEmitter::new( + let mut emitter = WasmRewritingEmitter::new( app_wasm, symbol_table ); + // Phase 0 of instrumentation (emit globals and provided fns) + let mut init = InitGenerator { + emitter: Box::new(&mut emitter), + context_name: "".to_string(), + }; + assert!(init.run(&mut whamm)); - let mut generator = CodeGenerator::new(Box::new(emitter)); - - assert!(generator.generate(&mut whamm)); + // Phase 1 of instrumentation (actually emits the instrumentation code) + // This structure is necessary since we need to have the fns/globals injected (a single time) + // and ready to use in every body/predicate. + let mut instr = InstrGenerator { + tree: &behavior, + emitter: Box::new(&mut emitter), + ast: simple_ast, + context_name: "".to_string(), + curr_provider_name: "".to_string(), + curr_package_name: "".to_string(), + curr_event_name: "".to_string(), + curr_probe_name: "".to_string(), + curr_probe: None, + }; + // TODO add assertions here once I have error logic in place to check that it worked! + instr.run(&behavior); if !Path::new(OUT_BASE_DIR).exists() { match fs::create_dir(OUT_BASE_DIR) { @@ -49,7 +69,7 @@ fn instrument_with_fault_injection() { } let out_wasm_path = format!("{OUT_BASE_DIR}/{OUT_WASM_NAME}"); - generator.dump_to_file(out_wasm_path.to_string()); + emitter.dump_to_file(out_wasm_path.to_string()); let mut wasm2wat = Command::new("wasm2wat"); wasm2wat.stdout(Stdio::null()) diff --git a/tests/whammys/fault_injection/dfinity_sync.mm b/tests/whammys/fault_injection/dfinity_sync.mm index 0e482907..66c1ccdb 100644 --- a/tests/whammys/fault_injection/dfinity_sync.mm +++ b/tests/whammys/fault_injection/dfinity_sync.mm @@ -5,7 +5,7 @@ * to be added to collect call site/target canister info. * See file: `dfinity_sync-with-pred.d` */ -wasm::call:alt / +wasm:bytecode:call:alt / target_fn_type == "import" && target_imp_module == "ic0" && target_imp_name == "call_perform"