From b4d35f2e4da8c4ccd7abce5167a8a69d51182984 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Mon, 13 Sep 2021 11:41:04 +0200 Subject: [PATCH 01/29] Add syntax for optional nodes (marked with ? postfix) and improve error messages for non-bound variables --- graphannis/src/annis/db/aql/ast.rs | 2 ++ graphannis/src/annis/db/aql/conjunction.rs | 20 ++++++++++++++------ graphannis/src/annis/db/aql/mod.rs | 19 ++++++++++++------- graphannis/src/annis/db/aql/parser.lalrpop | 10 ++++++---- 4 files changed, 34 insertions(+), 17 deletions(-) diff --git a/graphannis/src/annis/db/aql/ast.rs b/graphannis/src/annis/db/aql/ast.rs index 01d163d02..e9ea1ff7e 100644 --- a/graphannis/src/annis/db/aql/ast.rs +++ b/graphannis/src/annis/db/aql/ast.rs @@ -37,6 +37,7 @@ pub enum Literal { spec: NodeSearchSpec, pos: Option, variable: Option, + optional: bool, }, BinaryOp { lhs: Operand, @@ -63,6 +64,7 @@ pub enum Operand { spec: Rc, pos: Pos, variable: Option, + optional: bool, }, } diff --git a/graphannis/src/annis/db/aql/conjunction.rs b/graphannis/src/annis/db/aql/conjunction.rs index 00fbe71a1..064b700c5 100644 --- a/graphannis/src/annis/db/aql/conjunction.rs +++ b/graphannis/src/annis/db/aql/conjunction.rs @@ -64,6 +64,7 @@ pub struct UnaryOperatorEntry { #[derive(Debug)] pub struct Conjunction { nodes: Vec<(String, NodeSearchSpec)>, + optional_nodes: Vec<(String, NodeSearchSpec)>, binary_operators: Vec, unary_operators: Vec, variables: HashMap, @@ -210,6 +211,7 @@ impl Conjunction { pub fn new() -> Conjunction { Conjunction { nodes: vec![], + optional_nodes: vec![], binary_operators: vec![], unary_operators: vec![], variables: HashMap::default(), @@ -222,6 +224,7 @@ impl Conjunction { pub fn with_offset(var_idx_offset: usize) -> Conjunction { Conjunction { nodes: vec![], + optional_nodes: vec![], binary_operators: vec![], unary_operators: vec![], variables: HashMap::default(), @@ -255,7 +258,7 @@ impl Conjunction { } pub fn add_node(&mut self, node: NodeSearchSpec, variable: Option<&str>) -> String { - self.add_node_from_query(node, variable, None, true) + self.add_node_from_query(node, variable, None, true, false) } pub fn add_node_from_query( @@ -264,6 +267,7 @@ impl Conjunction { variable: Option<&str>, location: Option, included_in_output: bool, + optional: bool, ) -> String { let idx = self.var_idx_offset + self.nodes.len(); let variable = if let Some(variable) = variable { @@ -271,7 +275,11 @@ impl Conjunction { } else { (idx + 1).to_string() }; - self.nodes.push((variable.clone(), node)); + if optional { + self.optional_nodes.push((variable.clone(), node)); + } else { + self.nodes.push((variable.clone(), node)); + } self.variables.insert(variable.clone(), idx); if included_in_output { self.include_in_output.insert(variable.clone()); @@ -294,7 +302,7 @@ impl Conjunction { Ok(()) } else { Err(GraphAnnisError::AQLSemanticError(AQLError { - desc: format!("Operand '#{}' not found", var), + desc: format!("Operand \"#{}\" not found", var), location, })) } @@ -346,7 +354,7 @@ impl Conjunction { return Ok(*pos); } Err(GraphAnnisError::AQLSemanticError(AQLError { - desc: format!("Operand '#{}' not found", variable), + desc: format!("Operand \"#{}\" not found", variable), location, })) } @@ -375,7 +383,7 @@ impl Conjunction { } Err(GraphAnnisError::AQLSemanticError(AQLError { - desc: format!("Operand '#{}' not found", variable), + desc: format!("Operand \"#{}\" not found", variable), location, })) } @@ -812,7 +820,7 @@ impl Conjunction { return Err(GraphAnnisError::AQLSemanticError(AQLError { desc: format!( - "Variable \"{}\" not bound (use linguistic operators)", + "Variable \"#{}\" not bound (use linguistic operators)", n_var ), location: location.cloned(), diff --git a/graphannis/src/annis/db/aql/mod.rs b/graphannis/src/annis/db/aql/mod.rs index 776f3edf1..333c2cffe 100644 --- a/graphannis/src/annis/db/aql/mod.rs +++ b/graphannis/src/annis/db/aql/mod.rs @@ -173,7 +173,7 @@ fn map_conjunction( Ok(q) } -type PosToNodeMap = BTreeMap)>; +type PosToNodeMap = BTreeMap, bool)>; type PosToEndPosMap = BTreeMap; fn calculate_node_positions( @@ -190,9 +190,10 @@ fn calculate_node_positions( spec, pos, variable, + optional, } => { if let Some(pos) = pos { - pos_to_node.insert(pos.start, (spec.clone(), variable.clone())); + pos_to_node.insert(pos.start, (spec.clone(), variable.clone(), *optional)); pos_to_endpos.insert(pos.start, pos.end); } } @@ -201,22 +202,24 @@ fn calculate_node_positions( spec, pos, variable, + optional, } = lhs { pos_to_node .entry(pos.start) - .or_insert_with(|| (spec.as_ref().clone(), variable.clone())); + .or_insert_with(|| (spec.as_ref().clone(), variable.clone(), *optional)); pos_to_endpos.entry(pos.start).or_insert_with(|| pos.end); } if let ast::Operand::Literal { spec, pos, variable, + optional, } = rhs { pos_to_node .entry(pos.start) - .or_insert_with(|| (spec.as_ref().clone(), variable.clone())); + .or_insert_with(|| (spec.as_ref().clone(), variable.clone(), *optional)); pos_to_endpos.entry(pos.start).or_insert_with(|| pos.end); } } @@ -244,12 +247,12 @@ fn calculate_node_positions( fn add_node_specs_by_start( q: &mut Conjunction, - pos_to_node: BTreeMap)>, + pos_to_node: BTreeMap, bool)>, pos_to_endpos: BTreeMap, offsets: &BTreeMap, ) -> Result> { let mut pos_to_node_id: BTreeMap = BTreeMap::default(); - for (start_pos, (node_spec, variable)) in pos_to_node { + for (start_pos, (node_spec, variable, optional)) in pos_to_node { let start = get_line_and_column_for_pos(start_pos, offsets); let end = pos_to_endpos .get(&start_pos) @@ -260,6 +263,7 @@ fn add_node_specs_by_start( variable.as_deref(), Some(LineColumnRange { start, end }), true, + optional, ); pos_to_node_id.insert(start_pos, idx.clone()); } @@ -277,7 +281,7 @@ fn add_legacy_metadata_constraints( // TODO: add warning to the user not to use this construct anymore for (spec, _pos) in legacy_meta_search { // add an artificial node that describes the document/corpus node - let meta_node_idx = q.add_node_from_query(spec, None, None, false); + let meta_node_idx = q.add_node_from_query(spec, None, None, false, false); if let Some(first_meta_idx) = first_meta_idx.clone() { // avoid nested loops by joining additional meta nodes with a "identical node" q.add_operator( @@ -309,6 +313,7 @@ fn add_legacy_metadata_constraints( None, None, false, + false, ); q.add_operator( Box::new(IdenticalNodeSpec {}), diff --git a/graphannis/src/annis/db/aql/parser.lalrpop b/graphannis/src/annis/db/aql/parser.lalrpop index 12099973e..5ffdddcd2 100644 --- a/graphannis/src/annis/db/aql/parser.lalrpop +++ b/graphannis/src/annis/db/aql/parser.lalrpop @@ -66,9 +66,10 @@ Factor : ast::Expr = { Literal : ast::Expr = { // any node annotation search - => { + => { let pos = Some(ast::Pos{start, end}); - Expr::Terminal(ast::Literal::NodeSearch{pos, spec, variable: var.and_then(|s| Some(s[0..s.len()-1].to_string()))}) + let optional = optional.is_some(); + Expr::Terminal(ast::Literal::NodeSearch{pos, spec, optional, variable: var.and_then(|s| Some(s[0..s.len()-1].to_string()))}) }, // binary operator => { @@ -181,11 +182,12 @@ Literal : ast::Expr = { Operand : ast::Operand = { NodeRef => ast::Operand::NodeRef(<>), - => { + => { let pos = ast::Pos {start, end}; let spec = Rc::from(spec); let variable = var.and_then(|s| Some(s[0..s.len()-1].to_string())); - ast::Operand::Literal{spec, pos, variable, } + let optional = optional.is_some(); + ast::Operand::Literal{spec, pos, variable, optional} }, } From 2a33af394a59e44f3f01bcb25433723b94cd0cc0 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Mon, 13 Sep 2021 12:01:03 +0200 Subject: [PATCH 02/29] Use a struct to describe the vector of nodes --- graphannis/src/annis/db/aql/conjunction.rs | 43 ++++++++++++---------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/graphannis/src/annis/db/aql/conjunction.rs b/graphannis/src/annis/db/aql/conjunction.rs index 064b700c5..bda5ab1bf 100644 --- a/graphannis/src/annis/db/aql/conjunction.rs +++ b/graphannis/src/annis/db/aql/conjunction.rs @@ -61,10 +61,16 @@ pub struct UnaryOperatorEntry { pub node_nr: usize, } +#[derive(Debug)] +struct Node { + var: String, + spec: NodeSearchSpec, + optional: bool, +} + #[derive(Debug)] pub struct Conjunction { - nodes: Vec<(String, NodeSearchSpec)>, - optional_nodes: Vec<(String, NodeSearchSpec)>, + nodes: Vec, binary_operators: Vec, unary_operators: Vec, variables: HashMap, @@ -211,7 +217,6 @@ impl Conjunction { pub fn new() -> Conjunction { Conjunction { nodes: vec![], - optional_nodes: vec![], binary_operators: vec![], unary_operators: vec![], variables: HashMap::default(), @@ -224,7 +229,6 @@ impl Conjunction { pub fn with_offset(var_idx_offset: usize) -> Conjunction { Conjunction { nodes: vec![], - optional_nodes: vec![], binary_operators: vec![], unary_operators: vec![], variables: HashMap::default(), @@ -240,16 +244,16 @@ impl Conjunction { pub fn get_node_descriptions(&self) -> Vec { let mut result = Vec::default(); - for (var, spec) in &self.nodes { - let anno_name = match spec { + for n in &self.nodes { + let anno_name = match &n.spec { NodeSearchSpec::ExactValue { name, .. } => Some(name.clone()), NodeSearchSpec::RegexValue { name, .. } => Some(name.clone()), _ => None, }; let desc = QueryAttributeDescription { alternative: 0, - query_fragment: format!("{}", spec), - variable: var.clone(), + query_fragment: format!("{}", n.spec), + variable: n.var.clone(), anno_name, }; result.push(desc); @@ -275,11 +279,12 @@ impl Conjunction { } else { (idx + 1).to_string() }; - if optional { - self.optional_nodes.push((variable.clone(), node)); - } else { - self.nodes.push((variable.clone(), node)); - } + self.nodes.push(Node { + var: variable.clone(), + spec: node, + optional, + }); + self.variables.insert(variable.clone(), idx); if included_in_output { self.include_in_output.insert(variable.clone()); @@ -365,7 +370,7 @@ impl Conjunction { pub fn get_variable_by_pos(&self, pos: usize) -> Option { if pos < self.nodes.len() { - return Some(self.nodes[pos].0.clone()); + return Some(self.nodes[pos].var.clone()); } None } @@ -378,7 +383,7 @@ impl Conjunction { let idx = self.resolve_variable_pos(variable, location.clone())?; if let Some(pos) = idx.checked_sub(self.var_idx_offset) { if pos < self.nodes.len() { - return Ok(self.nodes[pos].1.clone()); + return Ok(self.nodes[pos].spec.clone()); } } @@ -404,7 +409,7 @@ impl Conjunction { result.extend(c); } for n in &self.nodes { - result.extend(n.1.necessary_components(db)); + result.extend(n.spec.necessary_components(db)); } result @@ -585,8 +590,8 @@ impl Conjunction { let mut node2cost: BTreeMap = BTreeMap::new(); for node_nr in 0..self.nodes.len() { - let n_spec = &self.nodes[node_nr].1; - let n_var = &self.nodes[node_nr].0; + let n_spec = &self.nodes[node_nr].spec; + let n_var = &self.nodes[node_nr].var; let node_search = NodeSearch::from_spec( n_spec.clone(), @@ -815,7 +820,7 @@ impl Conjunction { } else if let Some(first) = first_component_id { if first != *cid { // add location and description which node is not connected - let n_var = &self.nodes[*node_nr].0; + let n_var = &self.nodes[*node_nr].var; let location = self.location_in_query.get(n_var); return Err(GraphAnnisError::AQLSemanticError(AQLError { From 43a53626874396bed02688dd06cab27e246d1a63 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Mon, 13 Sep 2021 12:46:25 +0200 Subject: [PATCH 03/29] Re-organize the functions to create the execution plan to prepare adding non-existing nodes --- graphannis/src/annis/db/aql/conjunction.rs | 351 ++++++++++++--------- graphannis/src/annis/db/aql/mod.rs | 8 +- 2 files changed, 199 insertions(+), 160 deletions(-) diff --git a/graphannis/src/annis/db/aql/conjunction.rs b/graphannis/src/annis/db/aql/conjunction.rs index bda5ab1bf..17f554ef1 100644 --- a/graphannis/src/annis/db/aql/conjunction.rs +++ b/graphannis/src/annis/db/aql/conjunction.rs @@ -61,11 +61,11 @@ pub struct UnaryOperatorEntry { pub node_nr: usize, } -#[derive(Debug)] -struct Node { - var: String, - spec: NodeSearchSpec, - optional: bool, +#[derive(Debug, Clone)] +pub struct Node { + pub var: String, + pub spec: NodeSearchSpec, + pub optional: bool, } #[derive(Debug)] @@ -379,11 +379,11 @@ impl Conjunction { &self, variable: &str, location: Option, - ) -> Result { + ) -> Result { let idx = self.resolve_variable_pos(variable, location.clone())?; if let Some(pos) = idx.checked_sub(self.var_idx_offset) { if pos < self.nodes.len() { - return Ok(self.nodes[pos].spec.clone()); + return Ok(self.nodes[pos].clone()); } } @@ -569,6 +569,175 @@ impl Conjunction { None } + fn add_node_to_exec_plan<'a>( + &'a self, + node_nr: usize, + g: &'a AnnotationGraph, + component2exec: &mut BTreeMap + 'a>>, + node2component: &mut BTreeMap, + node2cost: &mut BTreeMap, + node_search_errors: &mut Vec, + ) { + let n_spec = &self.nodes[node_nr].spec; + let n_var = &self.nodes[node_nr].var; + + let node_search = NodeSearch::from_spec( + n_spec.clone(), + node_nr, + g, + self.location_in_query.get(n_var).cloned(), + ); + match node_search { + Ok(mut node_search) => { + node2component.insert(node_nr, node_nr); + + let (orig_query_frag, orig_impl_desc, cost) = + if let Some(d) = node_search.get_desc() { + if let Some(ref c) = d.cost { + node2cost.insert(node_nr, c.clone()); + } + + ( + d.query_fragment.clone(), + d.impl_description.clone(), + d.cost.clone(), + ) + } else { + (String::from(""), String::from(""), None) + }; + // make sure the description is correct + let mut node_pos = BTreeMap::new(); + node_pos.insert(node_nr, 0); + let new_desc = Desc { + component_nr: node_nr, + lhs: None, + rhs: None, + node_pos, + impl_description: orig_impl_desc, + query_fragment: orig_query_frag, + cost, + }; + node_search.set_desc(Some(new_desc)); + + let node_by_component_search = self.optimize_node_search_by_operator( + node_search.get_node_search_desc(), + node_search.get_desc(), + Box::new(self.binary_operators.iter()), + g, + ); + + // move to map + if let Some(node_by_component_search) = node_by_component_search { + component2exec.insert(node_nr, node_by_component_search); + } else { + component2exec.insert(node_nr, Box::new(node_search)); + } + } + Err(e) => node_search_errors.push(e), + }; + } + + fn add_binary_operator_to_exec_plan<'a>( + &'a self, + op_spec_entry: &BinaryOperatorSpecEntry, + g: &'a AnnotationGraph, + config: &Config, + component2exec: &mut BTreeMap + 'a>>, + node2component: &mut BTreeMap, + node2cost: &BTreeMap, + ) -> Result<()> { + let mut op: BinaryOperator<'a> = op_spec_entry.op.create_operator(g).ok_or_else(|| { + GraphAnnisError::ImpossibleSearch(format!( + "could not create operator {:?}", + op_spec_entry + )) + })?; + + let mut spec_idx_left = op_spec_entry.args.left; + let mut spec_idx_right = op_spec_entry.args.right; + + let inverse_op = op.get_inverse_operator(g); + if let Some(inverse_op) = inverse_op { + if should_switch_operand_order(op_spec_entry, &node2cost) { + spec_idx_left = op_spec_entry.args.right; + spec_idx_right = op_spec_entry.args.left; + + op = inverse_op; + } + } + + // substract the offset from the specificated numbers to get the internal node number for this conjunction + spec_idx_left -= self.var_idx_offset; + spec_idx_right -= self.var_idx_offset; + + let op_entry = BinaryOperatorEntry { + op, + args: BinaryOperatorArguments { + left: spec_idx_left + 1, + right: spec_idx_right + 1, + global_reflexivity: op_spec_entry.args.global_reflexivity, + }, + }; + + let component_left: usize = *(node2component + .get(&spec_idx_left) + .ok_or_else(|| GraphAnnisError::NoComponentForNode(spec_idx_left + 1))?); + let component_right: usize = *(node2component + .get(&spec_idx_right) + .ok_or_else(|| GraphAnnisError::NoComponentForNode(spec_idx_right + 1))?); + + // get the original execution node + let exec_left: Box + 'a> = component2exec + .remove(&component_left) + .ok_or(GraphAnnisError::NoExecutionNode(component_left))?; + + let idx_left: usize = *(exec_left + .get_desc() + .ok_or(GraphAnnisError::PlanDescriptionMissing)? + .node_pos + .get(&spec_idx_left) + .ok_or(GraphAnnisError::LHSOperandNotFound)?); + + let new_exec: Box> = + if component_left == component_right { + // don't create new tuples, only filter the existing ones + // TODO: check if LHS or RHS is better suited as filter input iterator + let idx_right: usize = *(exec_left + .get_desc() + .ok_or(GraphAnnisError::PlanDescriptionMissing)? + .node_pos + .get(&spec_idx_right) + .ok_or(GraphAnnisError::RHSOperandNotFound)?); + + let filter = Filter::new_binary(exec_left, idx_left, idx_right, op_entry); + Box::new(filter) + } else { + let exec_right = component2exec + .remove(&component_right) + .ok_or(GraphAnnisError::NoExecutionNode(component_right))?; + let idx_right: usize = *(exec_right + .get_desc() + .ok_or(GraphAnnisError::PlanDescriptionMissing)? + .node_pos + .get(&spec_idx_right) + .ok_or(GraphAnnisError::RHSOperandNotFound)?); + + create_join( + g, config, op_entry, exec_left, exec_right, idx_left, idx_right, + ) + }; + + let new_component_nr = new_exec + .get_desc() + .ok_or(GraphAnnisError::PlanDescriptionMissing)? + .component_nr; + update_components_for_nodes(node2component, component_left, new_component_nr); + update_components_for_nodes(node2component, component_right, new_component_nr); + component2exec.insert(new_component_nr, new_exec); + + Ok(()) + } + fn make_exec_plan_with_order<'a>( &'a self, db: &'a AnnotationGraph, @@ -581,72 +750,24 @@ impl Conjunction { // semantics check has been performed. let mut node_search_errors: Vec = Vec::default(); - // 1. add all nodes - // Create a map where the key is the component number // and move all nodes with their index as component number. let mut component2exec: BTreeMap + 'a>> = BTreeMap::new(); let mut node2cost: BTreeMap = BTreeMap::new(); + // 1. add all non-optional nodes for node_nr in 0..self.nodes.len() { - let n_spec = &self.nodes[node_nr].spec; - let n_var = &self.nodes[node_nr].var; - - let node_search = NodeSearch::from_spec( - n_spec.clone(), - node_nr, - db, - self.location_in_query.get(n_var).cloned(), - ); - match node_search { - Ok(mut node_search) => { - node2component.insert(node_nr, node_nr); - - let (orig_query_frag, orig_impl_desc, cost) = - if let Some(d) = node_search.get_desc() { - if let Some(ref c) = d.cost { - node2cost.insert(node_nr, c.clone()); - } - - ( - d.query_fragment.clone(), - d.impl_description.clone(), - d.cost.clone(), - ) - } else { - (String::from(""), String::from(""), None) - }; - // make sure the description is correct - let mut node_pos = BTreeMap::new(); - node_pos.insert(node_nr, 0); - let new_desc = Desc { - component_nr: node_nr, - lhs: None, - rhs: None, - node_pos, - impl_description: orig_impl_desc, - query_fragment: orig_query_frag, - cost, - }; - node_search.set_desc(Some(new_desc)); - - let node_by_component_search = self.optimize_node_search_by_operator( - node_search.get_node_search_desc(), - node_search.get_desc(), - Box::new(self.binary_operators.iter()), - db, - ); - - // move to map - if let Some(node_by_component_search) = node_by_component_search { - component2exec.insert(node_nr, node_by_component_search); - } else { - component2exec.insert(node_nr, Box::new(node_search)); - } - } - Err(e) => node_search_errors.push(e), - }; + if !self.nodes[node_nr].optional { + self.add_node_to_exec_plan( + node_nr, + db, + &mut component2exec, + &mut node2component, + &mut node2cost, + &mut node_search_errors, + ); + } } // 2. add unary operators as filter to the existing node search @@ -674,96 +795,14 @@ impl Conjunction { // 3. add the joins which produce the results in operand order for i in operator_order { let op_spec_entry: &BinaryOperatorSpecEntry = &self.binary_operators[i]; - - let mut op: BinaryOperator<'a> = - op_spec_entry.op.create_operator(db).ok_or_else(|| { - GraphAnnisError::ImpossibleSearch(format!( - "could not create operator {:?}", - op_spec_entry - )) - })?; - - let mut spec_idx_left = op_spec_entry.args.left; - let mut spec_idx_right = op_spec_entry.args.right; - - let inverse_op = op.get_inverse_operator(db); - if let Some(inverse_op) = inverse_op { - if should_switch_operand_order(op_spec_entry, &node2cost) { - spec_idx_left = op_spec_entry.args.right; - spec_idx_right = op_spec_entry.args.left; - - op = inverse_op; - } - } - - // substract the offset from the specificated numbers to get the internal node number for this conjunction - spec_idx_left -= self.var_idx_offset; - spec_idx_right -= self.var_idx_offset; - - let op_entry = BinaryOperatorEntry { - op, - args: BinaryOperatorArguments { - left: spec_idx_left + 1, - right: spec_idx_right + 1, - global_reflexivity: op_spec_entry.args.global_reflexivity, - }, - }; - - let component_left: usize = *(node2component - .get(&spec_idx_left) - .ok_or_else(|| GraphAnnisError::NoComponentForNode(spec_idx_left + 1))?); - let component_right: usize = *(node2component - .get(&spec_idx_right) - .ok_or_else(|| GraphAnnisError::NoComponentForNode(spec_idx_right + 1))?); - - // get the original execution node - let exec_left: Box + 'a> = component2exec - .remove(&component_left) - .ok_or(GraphAnnisError::NoExecutionNode(component_left))?; - - let idx_left: usize = *(exec_left - .get_desc() - .ok_or(GraphAnnisError::PlanDescriptionMissing)? - .node_pos - .get(&spec_idx_left) - .ok_or(GraphAnnisError::LHSOperandNotFound)?); - - let new_exec: Box> = - if component_left == component_right { - // don't create new tuples, only filter the existing ones - // TODO: check if LHS or RHS is better suited as filter input iterator - let idx_right: usize = *(exec_left - .get_desc() - .ok_or(GraphAnnisError::PlanDescriptionMissing)? - .node_pos - .get(&spec_idx_right) - .ok_or(GraphAnnisError::RHSOperandNotFound)?); - - let filter = Filter::new_binary(exec_left, idx_left, idx_right, op_entry); - Box::new(filter) - } else { - let exec_right = component2exec - .remove(&component_right) - .ok_or(GraphAnnisError::NoExecutionNode(component_right))?; - let idx_right: usize = *(exec_right - .get_desc() - .ok_or(GraphAnnisError::PlanDescriptionMissing)? - .node_pos - .get(&spec_idx_right) - .ok_or(GraphAnnisError::RHSOperandNotFound)?); - - create_join( - db, config, op_entry, exec_left, exec_right, idx_left, idx_right, - ) - }; - - let new_component_nr = new_exec - .get_desc() - .ok_or(GraphAnnisError::PlanDescriptionMissing)? - .component_nr; - update_components_for_nodes(&mut node2component, component_left, new_component_nr); - update_components_for_nodes(&mut node2component, component_right, new_component_nr); - component2exec.insert(new_component_nr, new_exec); + self.add_binary_operator_to_exec_plan( + op_spec_entry, + db, + config, + &mut component2exec, + &mut node2component, + &node2cost, + )?; } // apply the the node error check diff --git a/graphannis/src/annis/db/aql/mod.rs b/graphannis/src/annis/db/aql/mod.rs index 333c2cffe..784c26a83 100644 --- a/graphannis/src/annis/db/aql/mod.rs +++ b/graphannis/src/annis/db/aql/mod.rs @@ -102,8 +102,8 @@ fn map_conjunction( end: Some(get_line_and_column_for_pos(pos.end, offsets)), }); - let spec_left = q.resolve_variable(&var_left, op_pos.clone())?; - let spec_right = q.resolve_variable(&var_right, op_pos.clone())?; + let spec_left = q.resolve_variable(&var_left, op_pos.clone())?.spec; + let spec_right = q.resolve_variable(&var_right, op_pos.clone())?.spec; if quirks_mode { match op { @@ -162,8 +162,8 @@ fn map_conjunction( let num_joins = num_pointing_or_dominance_joins.get(orig_var).unwrap_or(&0); // add an additional node for each extra join and join this artificial node with identity relation for _ in 1..*num_joins { - if let Ok(node_spec) = q.resolve_variable(orig_var, None) { - let new_var = q.add_node(node_spec, None); + if let Ok(node) = q.resolve_variable(orig_var, None) { + let new_var = q.add_node(node.spec, None); q.add_operator(Box::new(IdenticalNodeSpec {}), orig_var, &new_var, false)?; } } From 07925765833571f02282197fdee56a700bd25f5a Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Mon, 13 Sep 2021 13:40:32 +0200 Subject: [PATCH 04/29] Add empty implementation of the non existing unary operator. Externally, negated operators between optional (possible non-existing) nodes are binary operators. But internally, these will be implemented as unary operator on the one node that is not optional. --- graphannis/src/annis/db/aql/conjunction.rs | 4 +- graphannis/src/annis/db/aql/mod.rs | 105 ++++++++++-------- graphannis/src/annis/db/aql/operators/mod.rs | 2 + .../annis/db/aql/operators/non_existing.rs | 55 +++++++++ graphannis/src/annis/db/exec/filter.rs | 2 +- graphannis/src/annis/operator.rs | 5 +- 6 files changed, 123 insertions(+), 50 deletions(-) create mode 100644 graphannis/src/annis/db/aql/operators/non_existing.rs diff --git a/graphannis/src/annis/db/aql/conjunction.rs b/graphannis/src/annis/db/aql/conjunction.rs index 17f554ef1..3b93ed6be 100644 --- a/graphannis/src/annis/db/aql/conjunction.rs +++ b/graphannis/src/annis/db/aql/conjunction.rs @@ -56,8 +56,8 @@ pub struct BinaryOperatorEntry<'a> { pub args: BinaryOperatorArguments, } -pub struct UnaryOperatorEntry { - pub op: Box, +pub struct UnaryOperatorEntry<'a> { + pub op: Box, pub node_nr: usize, } diff --git a/graphannis/src/annis/db/aql/mod.rs b/graphannis/src/annis/db/aql/mod.rs index 784c26a83..37d5e7df4 100644 --- a/graphannis/src/annis/db/aql/mod.rs +++ b/graphannis/src/annis/db/aql/mod.rs @@ -102,55 +102,68 @@ fn map_conjunction( end: Some(get_line_and_column_for_pos(pos.end, offsets)), }); - let spec_left = q.resolve_variable(&var_left, op_pos.clone())?.spec; - let spec_right = q.resolve_variable(&var_right, op_pos.clone())?.spec; - - if quirks_mode { - match op { - ast::BinaryOpSpec::Dominance(_) | ast::BinaryOpSpec::Pointing(_) => { - let entry_lhs = num_pointing_or_dominance_joins - .entry(var_left.clone()) - .or_insert(0); - *entry_lhs += 1; - let entry_rhs = num_pointing_or_dominance_joins - .entry(var_right.clone()) - .or_insert(0); - *entry_rhs += 1; - } - ast::BinaryOpSpec::Precedence(ref mut spec) => { - // limit unspecified .* precedence to 50 - spec.dist = if let RangeSpec::Unbound = spec.dist { - RangeSpec::Bound { - min_dist: 1, - max_dist: 50, - } - } else { - spec.dist.clone() - }; - } - ast::BinaryOpSpec::Near(ref mut spec) => { - // limit unspecified ^* near-by operator to 50 - spec.dist = if let RangeSpec::Unbound = spec.dist { - RangeSpec::Bound { - min_dist: 1, - max_dist: 50, - } - } else { - spec.dist.clone() - }; + let node_left = q.resolve_variable(&var_left, op_pos.clone())?; + let node_right = q.resolve_variable(&var_right, op_pos.clone())?; + + if node_left.optional && node_right.optional { + // Not supported yet + return Err(GraphAnnisError::AQLSemanticError(AQLError { + desc: format!( + "Negated binary operator needs a non-optional left or right operand, but both operands (#{}, #{}) are optional, as indicated by their \"?\" suffix.", + var_left, var_right), + location: op_pos, + })); + } else if node_left.optional { + } else if node_right.optional { + } else { + if quirks_mode { + match op { + ast::BinaryOpSpec::Dominance(_) | ast::BinaryOpSpec::Pointing(_) => { + let entry_lhs = num_pointing_or_dominance_joins + .entry(var_left.clone()) + .or_insert(0); + *entry_lhs += 1; + let entry_rhs = num_pointing_or_dominance_joins + .entry(var_right.clone()) + .or_insert(0); + *entry_rhs += 1; + } + ast::BinaryOpSpec::Precedence(ref mut spec) => { + // limit unspecified .* precedence to 50 + spec.dist = if let RangeSpec::Unbound = spec.dist { + RangeSpec::Bound { + min_dist: 1, + max_dist: 50, + } + } else { + spec.dist.clone() + }; + } + ast::BinaryOpSpec::Near(ref mut spec) => { + // limit unspecified ^* near-by operator to 50 + spec.dist = if let RangeSpec::Unbound = spec.dist { + RangeSpec::Bound { + min_dist: 1, + max_dist: 50, + } + } else { + spec.dist.clone() + }; + } + _ => {} } - _ => {} } + let mut op_spec = + make_binary_operator_spec(op, node_left.spec.clone(), node_right.spec.clone())?; + if negated { + op_spec = Box::new(NegatedOpSpec { + spec_left: node_left.spec, + spec_right: node_right.spec, + negated_op: op_spec, + }); + } + q.add_operator_from_query(op_spec, &var_left, &var_right, op_pos, !quirks_mode)?; } - let mut op_spec = make_binary_operator_spec(op, spec_left.clone(), spec_right.clone())?; - if negated { - op_spec = Box::new(NegatedOpSpec { - spec_left, - spec_right, - negated_op: op_spec, - }); - } - q.add_operator_from_query(op_spec, &var_left, &var_right, op_pos, !quirks_mode)?; } } diff --git a/graphannis/src/annis/db/aql/operators/mod.rs b/graphannis/src/annis/db/aql/operators/mod.rs index 89cd67fd7..1027561f1 100644 --- a/graphannis/src/annis/db/aql/operators/mod.rs +++ b/graphannis/src/annis/db/aql/operators/mod.rs @@ -46,6 +46,7 @@ mod inclusion; mod leftalignment; mod near; mod negated_op; +mod non_existing; mod overlap; mod precedence; mod rightalignment; @@ -59,6 +60,7 @@ pub use self::inclusion::InclusionSpec; pub use self::leftalignment::LeftAlignmentSpec; pub use self::near::NearSpec; pub use self::negated_op::NegatedOpSpec; +pub use self::non_existing::NonExistingUnaryOperatorSpec; pub use self::overlap::OverlapSpec; pub use self::precedence::PrecedenceSpec; pub use self::rightalignment::RightAlignmentSpec; diff --git a/graphannis/src/annis/db/aql/operators/non_existing.rs b/graphannis/src/annis/db/aql/operators/non_existing.rs new file mode 100644 index 000000000..b2977ade8 --- /dev/null +++ b/graphannis/src/annis/db/aql/operators/non_existing.rs @@ -0,0 +1,55 @@ +use std::fmt::Display; + +use crate::{ + annis::{ + db::exec::nodesearch::NodeSearchSpec, + operator::{BinaryOperator, BinaryOperatorSpec, UnaryOperator, UnaryOperatorSpec}, + }, + AnnotationGraph, +}; + +#[derive(Debug)] +pub struct NonExistingUnaryOperatorSpec { + pub spec_left: NodeSearchSpec, + pub spec_right: NodeSearchSpec, + pub negated_op: Box, +} + +impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { + fn necessary_components( + &self, + g: &crate::AnnotationGraph, + ) -> std::collections::HashSet< + graphannis_core::types::Component, + > { + self.negated_op.necessary_components(g) + } + + fn create_operator<'a>( + &'a self, + g: &'a AnnotationGraph, + ) -> Option> { + let op = NonExistingUnaryOperator { + negated_op: self.negated_op.create_operator(g)?, + }; + Some(Box::new(op)) + } +} + +struct NonExistingUnaryOperator<'a> { + negated_op: BinaryOperator<'a>, +} + +impl<'a> Display for NonExistingUnaryOperator<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "!",)?; + self.negated_op.fmt(f)?; + Ok(()) + } +} + +impl<'a> UnaryOperator for NonExistingUnaryOperator<'a> { + fn filter_match(&self, m: &graphannis_core::annostorage::Match) -> bool { + todo!() + } +} diff --git a/graphannis/src/annis/db/exec/filter.rs b/graphannis/src/annis/db/exec/filter.rs index 660bc6d12..d3cbfc88c 100644 --- a/graphannis/src/annis/db/exec/filter.rs +++ b/graphannis/src/annis/db/exec/filter.rs @@ -81,7 +81,7 @@ impl<'a> Filter<'a> { pub fn new_unary( exec: Box + 'a>, idx: usize, - op_entry: UnaryOperatorEntry, + op_entry: UnaryOperatorEntry<'a>, ) -> Filter<'a> { let desc = if let Some(orig_desc) = exec.get_desc() { let cost_est = if let Some(ref orig_cost) = orig_desc.cost { diff --git a/graphannis/src/annis/operator.rs b/graphannis/src/annis/operator.rs index fca861299..2e6784df7 100644 --- a/graphannis/src/annis/operator.rs +++ b/graphannis/src/annis/operator.rs @@ -253,7 +253,10 @@ pub trait UnaryOperatorSpec: std::fmt::Debug { db: &AnnotationGraph, ) -> HashSet>; - fn create_operator(&self, db: &AnnotationGraph) -> Option>; + fn create_operator<'a>( + &'a self, + db: &'a AnnotationGraph, + ) -> Option>; } pub trait UnaryOperator: std::fmt::Display + Send + Sync { From 232745195c85ca7d60f3cc8c449d4165b77462a9 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Mon, 13 Sep 2021 14:11:40 +0200 Subject: [PATCH 05/29] Implement some of the NonExistingUnaryOperator functionality --- .../annis/db/aql/operators/non_existing.rs | 37 ++++++++++++++----- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/graphannis/src/annis/db/aql/operators/non_existing.rs b/graphannis/src/annis/db/aql/operators/non_existing.rs index b2977ade8..1f234e744 100644 --- a/graphannis/src/annis/db/aql/operators/non_existing.rs +++ b/graphannis/src/annis/db/aql/operators/non_existing.rs @@ -1,20 +1,28 @@ -use std::fmt::Display; +use std::fmt::{Debug, Display}; + +use graphannis_core::{annostorage::AnnotationStorage, types::NodeID}; use crate::{ annis::{ - db::exec::nodesearch::NodeSearchSpec, + db::exec::MatchFilterFunc, operator::{BinaryOperator, BinaryOperatorSpec, UnaryOperator, UnaryOperatorSpec}, }, AnnotationGraph, }; -#[derive(Debug)] pub struct NonExistingUnaryOperatorSpec { - pub spec_left: NodeSearchSpec, - pub spec_right: NodeSearchSpec, + pub filter: Vec, pub negated_op: Box, } +impl Debug for NonExistingUnaryOperatorSpec { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("NonExistingUnaryOperatorSpec") + .field("negated_op", &self.negated_op) + .finish() + } +} + impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { fn necessary_components( &self, @@ -25,12 +33,14 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { self.negated_op.necessary_components(g) } - fn create_operator<'a>( - &'a self, - g: &'a AnnotationGraph, - ) -> Option> { + fn create_operator<'b>( + &'b self, + g: &'b AnnotationGraph, + ) -> Option> { let op = NonExistingUnaryOperator { negated_op: self.negated_op.create_operator(g)?, + filter: &self.filter, + node_annos: g.get_node_annos(), }; Some(Box::new(op)) } @@ -38,6 +48,8 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { struct NonExistingUnaryOperator<'a> { negated_op: BinaryOperator<'a>, + node_annos: &'a dyn AnnotationStorage, + filter: &'a Vec, } impl<'a> Display for NonExistingUnaryOperator<'a> { @@ -50,6 +62,11 @@ impl<'a> Display for NonExistingUnaryOperator<'a> { impl<'a> UnaryOperator for NonExistingUnaryOperator<'a> { fn filter_match(&self, m: &graphannis_core::annostorage::Match) -> bool { - todo!() + match &self.negated_op { + BinaryOperator::Base(_) => todo!(), + BinaryOperator::Index(op) => !op + .retrieve_matches(&m) + .any(|m| self.filter.iter().all(|f| f(&m, self.node_annos))), + } } } From dfbf7d01f09a670a225ce14d476016f005aa114b Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Mon, 13 Sep 2021 14:31:27 +0200 Subject: [PATCH 06/29] Only support binary operators with a retrieve_matches() function --- .../annis/db/aql/operators/non_existing.rs | 32 +++++++++++-------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/graphannis/src/annis/db/aql/operators/non_existing.rs b/graphannis/src/annis/db/aql/operators/non_existing.rs index 1f234e744..b05a1b631 100644 --- a/graphannis/src/annis/db/aql/operators/non_existing.rs +++ b/graphannis/src/annis/db/aql/operators/non_existing.rs @@ -5,7 +5,10 @@ use graphannis_core::{annostorage::AnnotationStorage, types::NodeID}; use crate::{ annis::{ db::exec::MatchFilterFunc, - operator::{BinaryOperator, BinaryOperatorSpec, UnaryOperator, UnaryOperatorSpec}, + operator::{ + BinaryOperator, BinaryOperatorIndex, BinaryOperatorSpec, UnaryOperator, + UnaryOperatorSpec, + }, }, AnnotationGraph, }; @@ -37,17 +40,19 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { &'b self, g: &'b AnnotationGraph, ) -> Option> { - let op = NonExistingUnaryOperator { - negated_op: self.negated_op.create_operator(g)?, - filter: &self.filter, - node_annos: g.get_node_annos(), - }; - Some(Box::new(op)) + match self.negated_op.create_operator(g)? { + BinaryOperator::Base(_) => None, + BinaryOperator::Index(negated_op) => Some(Box::new(NonExistingUnaryOperator { + negated_op, + filter: &self.filter, + node_annos: g.get_node_annos(), + })), + } } } struct NonExistingUnaryOperator<'a> { - negated_op: BinaryOperator<'a>, + negated_op: Box, node_annos: &'a dyn AnnotationStorage, filter: &'a Vec, } @@ -62,11 +67,10 @@ impl<'a> Display for NonExistingUnaryOperator<'a> { impl<'a> UnaryOperator for NonExistingUnaryOperator<'a> { fn filter_match(&self, m: &graphannis_core::annostorage::Match) -> bool { - match &self.negated_op { - BinaryOperator::Base(_) => todo!(), - BinaryOperator::Index(op) => !op - .retrieve_matches(&m) - .any(|m| self.filter.iter().all(|f| f(&m, self.node_annos))), - } + // Only return true of no match was found which matches the operator and node filter + !self + .negated_op + .retrieve_matches(&m) + .any(|m| self.filter.iter().all(|f| f(&m, self.node_annos))) } } From 86351d24dbe56da0cf5619d563221ec4bde72657 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Mon, 13 Sep 2021 15:33:01 +0200 Subject: [PATCH 07/29] More consistent names for the node search related helper structs --- graphannis/src/annis/db/aql/conjunction.rs | 14 +++++----- graphannis/src/annis/db/exec/filter.rs | 10 +++---- graphannis/src/annis/db/exec/indexjoin.rs | 10 +++---- graphannis/src/annis/db/exec/mod.rs | 28 +++++++++++-------- graphannis/src/annis/db/exec/nestedloop.rs | 10 +++---- graphannis/src/annis/db/exec/nodesearch.rs | 23 ++++++++------- .../src/annis/db/exec/parallel/indexjoin.rs | 10 +++---- .../src/annis/db/exec/parallel/nestedloop.rs | 10 +++---- graphannis/src/annis/db/exec/tokensearch.rs | 6 ++-- graphannis/src/annis/db/plan.rs | 4 +-- 10 files changed, 66 insertions(+), 59 deletions(-) diff --git a/graphannis/src/annis/db/aql/conjunction.rs b/graphannis/src/annis/db/aql/conjunction.rs index 3b93ed6be..b97fec555 100644 --- a/graphannis/src/annis/db/aql/conjunction.rs +++ b/graphannis/src/annis/db/aql/conjunction.rs @@ -8,7 +8,7 @@ use crate::annis::db::exec::indexjoin::IndexJoin; use crate::annis::db::exec::nestedloop::NestedLoop; use crate::annis::db::exec::nodesearch::{NodeSearch, NodeSearchSpec}; use crate::annis::db::exec::parallel; -use crate::annis::db::exec::{CostEstimate, Desc, ExecutionNode, NodeSearchDesc}; +use crate::annis::db::exec::{CostEstimate, ExecutionNode, ExecutionNodeDesc, NodeSearchDesc}; use crate::annis::db::{aql::model::AnnotationComponentType, AnnotationStorage}; use crate::annis::errors::*; use crate::annis::operator::{ @@ -62,7 +62,7 @@ pub struct UnaryOperatorEntry<'a> { } #[derive(Debug, Clone)] -pub struct Node { +pub struct NodeSearchSpecEntry { pub var: String, pub spec: NodeSearchSpec, pub optional: bool, @@ -70,7 +70,7 @@ pub struct Node { #[derive(Debug)] pub struct Conjunction { - nodes: Vec, + nodes: Vec, binary_operators: Vec, unary_operators: Vec, variables: HashMap, @@ -279,7 +279,7 @@ impl Conjunction { } else { (idx + 1).to_string() }; - self.nodes.push(Node { + self.nodes.push(NodeSearchSpecEntry { var: variable.clone(), spec: node, optional, @@ -379,7 +379,7 @@ impl Conjunction { &self, variable: &str, location: Option, - ) -> Result { + ) -> Result { let idx = self.resolve_variable_pos(variable, location.clone())?; if let Some(pos) = idx.checked_sub(self.var_idx_offset) { if pos < self.nodes.len() { @@ -513,7 +513,7 @@ impl Conjunction { fn optimize_node_search_by_operator<'a>( &'a self, node_search_desc: Arc, - desc: Option<&Desc>, + desc: Option<&ExecutionNodeDesc>, op_spec_entries: Box + 'a>, db: &'a AnnotationGraph, ) -> Option + 'a>> { @@ -608,7 +608,7 @@ impl Conjunction { // make sure the description is correct let mut node_pos = BTreeMap::new(); node_pos.insert(node_nr, 0); - let new_desc = Desc { + let new_desc = ExecutionNodeDesc { component_nr: node_nr, lhs: None, rhs: None, diff --git a/graphannis/src/annis/db/exec/filter.rs b/graphannis/src/annis/db/exec/filter.rs index d3cbfc88c..ed7c7a0a5 100644 --- a/graphannis/src/annis/db/exec/filter.rs +++ b/graphannis/src/annis/db/exec/filter.rs @@ -1,12 +1,12 @@ use graphannis_core::annostorage::MatchGroup; -use super::{CostEstimate, Desc, ExecutionNode}; +use super::{CostEstimate, ExecutionNode, ExecutionNodeDesc}; use crate::annis::db::aql::conjunction::{BinaryOperatorEntry, UnaryOperatorEntry}; use crate::annis::operator::{BinaryOperatorBase, EstimationType, UnaryOperator}; pub struct Filter<'a> { it: Box + 'a>, - desc: Option, + desc: Option, } fn calculate_binary_outputsize(op: &dyn BinaryOperatorBase, num_tuples: usize) -> usize { @@ -55,7 +55,7 @@ impl<'a> Filter<'a> { None }; - Some(Desc { + Some(ExecutionNodeDesc { component_nr: orig_desc.component_nr, node_pos: orig_desc.node_pos.clone(), impl_description: String::from("filter"), @@ -94,7 +94,7 @@ impl<'a> Filter<'a> { None }; - Some(Desc { + Some(ExecutionNodeDesc { component_nr: orig_desc.component_nr, node_pos: orig_desc.node_pos.clone(), impl_description: String::from("filter"), @@ -119,7 +119,7 @@ impl<'a> ExecutionNode for Filter<'a> { self } - fn get_desc(&self) -> Option<&Desc> { + fn get_desc(&self) -> Option<&ExecutionNodeDesc> { self.desc.as_ref() } } diff --git a/graphannis/src/annis/db/exec/indexjoin.rs b/graphannis/src/annis/db/exec/indexjoin.rs index e02c3c0cd..30de5729c 100644 --- a/graphannis/src/annis/db/exec/indexjoin.rs +++ b/graphannis/src/annis/db/exec/indexjoin.rs @@ -1,4 +1,4 @@ -use super::{Desc, ExecutionNode, NodeSearchDesc}; +use super::{ExecutionNode, ExecutionNodeDesc, NodeSearchDesc}; use crate::annis::db::aql::conjunction::BinaryOperatorArguments; use crate::annis::db::AnnotationStorage; use crate::annis::operator::BinaryOperatorIndex; @@ -18,7 +18,7 @@ pub struct IndexJoin<'a> { lhs_idx: usize, node_search_desc: Arc, node_annos: &'a dyn AnnotationStorage, - desc: Desc, + desc: ExecutionNodeDesc, global_reflexivity: bool, } @@ -38,7 +38,7 @@ impl<'a> IndexJoin<'a> { op_args: &BinaryOperatorArguments, node_search_desc: Arc, node_annos: &'a dyn AnnotationStorage, - rhs_desc: Option<&Desc>, + rhs_desc: Option<&ExecutionNodeDesc>, ) -> IndexJoin<'a> { let lhs_desc = lhs.get_desc().cloned(); let lhs_peek = lhs.peekable(); @@ -65,7 +65,7 @@ impl<'a> IndexJoin<'a> { }; IndexJoin { - desc: Desc::join( + desc: ExecutionNodeDesc::join( op.as_ref(), lhs_desc.as_ref(), rhs_desc, @@ -107,7 +107,7 @@ impl<'a> ExecutionNode for IndexJoin<'a> { self } - fn get_desc(&self) -> Option<&Desc> { + fn get_desc(&self) -> Option<&ExecutionNodeDesc> { Some(&self.desc) } } diff --git a/graphannis/src/annis/db/exec/mod.rs b/graphannis/src/annis/db/exec/mod.rs index cb7715d51..47c7748c3 100644 --- a/graphannis/src/annis/db/exec/mod.rs +++ b/graphannis/src/annis/db/exec/mod.rs @@ -19,11 +19,12 @@ pub struct CostEstimate { pub processed_in_step: usize, } +/// Representation of the execution node tree. #[derive(Debug, Clone)] -pub struct Desc { +pub struct ExecutionNodeDesc { pub component_nr: usize, - pub lhs: Option>, - pub rhs: Option>, + pub lhs: Option>, + pub rhs: Option>, /// Maps the index of the node in the actual result to the index in the internal execution plan intermediate result. pub node_pos: BTreeMap, pub impl_description: String, @@ -56,8 +57,11 @@ pub struct NodeDescArg { node_nr: usize, } -impl Desc { - pub fn empty_with_fragment(node_desc_arg: NodeDescArg, est_size: Option) -> Desc { +impl ExecutionNodeDesc { + pub fn empty_with_fragment( + node_desc_arg: NodeDescArg, + est_size: Option, + ) -> ExecutionNodeDesc { let mut node_pos = BTreeMap::new(); node_pos.insert(node_desc_arg.node_nr, 0); @@ -67,7 +71,7 @@ impl Desc { processed_in_step: 0, }); - Desc { + ExecutionNodeDesc { component_nr: 0, lhs: None, rhs: None, @@ -80,12 +84,12 @@ impl Desc { pub fn join( op: &Op, - lhs: Option<&Desc>, - rhs: Option<&Desc>, + lhs: Option<&ExecutionNodeDesc>, + rhs: Option<&ExecutionNodeDesc>, impl_description: &str, query_fragment: &str, processed_func: &dyn Fn(EstimationType, usize, usize) -> usize, - ) -> Desc { + ) -> ExecutionNodeDesc { let component_nr = if let Some(d) = lhs { d.component_nr } else if let Some(d) = rhs { @@ -131,7 +135,7 @@ impl Desc { None }; - Desc { + ExecutionNodeDesc { component_nr, lhs: lhs.map(|x| Box::new(x.clone())), rhs: rhs.map(|x| Box::new(x.clone())), @@ -198,7 +202,7 @@ pub trait ExecutionNode: Iterator { None } - fn get_desc(&self) -> Option<&Desc> { + fn get_desc(&self) -> Option<&ExecutionNodeDesc> { None } @@ -225,7 +229,7 @@ impl ExecutionNode for EmptyResultSet { None } - fn get_desc(&self) -> Option<&Desc> { + fn get_desc(&self) -> Option<&ExecutionNodeDesc> { None } } diff --git a/graphannis/src/annis/db/exec/nestedloop.rs b/graphannis/src/annis/db/exec/nestedloop.rs index 1538e4ca5..79a81a0c7 100644 --- a/graphannis/src/annis/db/exec/nestedloop.rs +++ b/graphannis/src/annis/db/exec/nestedloop.rs @@ -1,6 +1,6 @@ use graphannis_core::annostorage::MatchGroup; -use super::{Desc, ExecutionNode}; +use super::{ExecutionNode, ExecutionNodeDesc}; use crate::annis::db::aql::conjunction::BinaryOperatorEntry; use crate::annis::operator::{BinaryOperator, BinaryOperatorBase}; use std::iter::Peekable; @@ -15,7 +15,7 @@ pub struct NestedLoop<'a> { pos_inner_cache: Option, left_is_outer: bool, - desc: Desc, + desc: ExecutionNodeDesc, global_reflexivity: bool, } @@ -49,7 +49,7 @@ impl<'a> NestedLoop<'a> { if left_is_outer { NestedLoop { - desc: Desc::join( + desc: ExecutionNodeDesc::join( &op_entry.op, lhs.get_desc(), rhs.get_desc(), @@ -73,7 +73,7 @@ impl<'a> NestedLoop<'a> { } } else { NestedLoop { - desc: Desc::join( + desc: ExecutionNodeDesc::join( &op_entry.op, rhs.get_desc(), lhs.get_desc(), @@ -104,7 +104,7 @@ impl<'a> ExecutionNode for NestedLoop<'a> { self } - fn get_desc(&self) -> Option<&Desc> { + fn get_desc(&self) -> Option<&ExecutionNodeDesc> { Some(&self.desc) } } diff --git a/graphannis/src/annis/db/exec/nodesearch.rs b/graphannis/src/annis/db/exec/nodesearch.rs index 9a6b624bc..64304b89c 100644 --- a/graphannis/src/annis/db/exec/nodesearch.rs +++ b/graphannis/src/annis/db/exec/nodesearch.rs @@ -1,5 +1,5 @@ use super::MatchFilterFunc; -use super::{Desc, ExecutionNode, NodeSearchDesc}; +use super::{ExecutionNode, ExecutionNodeDesc, NodeSearchDesc}; use crate::annis::db::exec::tokensearch; use crate::annis::db::exec::tokensearch::AnyTokenSearch; use crate::annis::db::{aql::model::AnnotationComponentType, AnnotationStorage}; @@ -28,7 +28,7 @@ pub struct NodeSearch<'a> { /// The actual search implementation it: Box + 'a>, - desc: Option, + desc: Option, node_search_desc: Arc, is_sorted: bool, } @@ -360,7 +360,7 @@ impl<'a> NodeSearch<'a> { Ok(NodeSearch { it: Box::new(it), - desc: Some(Desc::empty_with_fragment( + desc: Some(ExecutionNodeDesc::empty_with_fragment( super::NodeDescArg { query_fragment, node_nr, @@ -478,7 +478,7 @@ impl<'a> NodeSearch<'a> { } Ok(NodeSearch { it: Box::new(it), - desc: Some(Desc::empty_with_fragment( + desc: Some(ExecutionNodeDesc::empty_with_fragment( super::NodeDescArg { query_fragment: query_fragment.to_owned(), node_nr, @@ -589,7 +589,10 @@ impl<'a> NodeSearch<'a> { Ok(NodeSearch { it: Box::new(it), - desc: Some(Desc::empty_with_fragment(node_desc_arg, Some(est_output))), + desc: Some(ExecutionNodeDesc::empty_with_fragment( + node_desc_arg, + Some(est_output), + )), node_search_desc: Arc::new(NodeSearchDesc { qname: (qname.0, Some(qname.1)), cond: filters, @@ -829,7 +832,7 @@ impl<'a> NodeSearch<'a> { Ok(NodeSearch { it: Box::new(it), - desc: Some(Desc::empty_with_fragment( + desc: Some(ExecutionNodeDesc::empty_with_fragment( super::NodeDescArg { query_fragment: query_fragment.to_owned(), node_nr, @@ -888,7 +891,7 @@ impl<'a> NodeSearch<'a> { Ok(NodeSearch { it: Box::new(it), - desc: Some(Desc::empty_with_fragment( + desc: Some(ExecutionNodeDesc::empty_with_fragment( super::NodeDescArg { query_fragment: query_fragment.to_owned(), node_nr, @@ -910,7 +913,7 @@ impl<'a> NodeSearch<'a> { pub fn new_partofcomponentsearch( db: &'a AnnotationGraph, node_search_desc: Arc, - desc: Option<&Desc>, + desc: Option<&ExecutionNodeDesc>, components: HashSet>, edge_anno_spec: Option, ) -> Result> { @@ -981,7 +984,7 @@ impl<'a> NodeSearch<'a> { }) } - pub fn set_desc(&mut self, desc: Option) { + pub fn set_desc(&mut self, desc: Option) { self.desc = desc; } @@ -995,7 +998,7 @@ impl<'a> ExecutionNode for NodeSearch<'a> { self } - fn get_desc(&self) -> Option<&Desc> { + fn get_desc(&self) -> Option<&ExecutionNodeDesc> { self.desc.as_ref() } diff --git a/graphannis/src/annis/db/exec/parallel/indexjoin.rs b/graphannis/src/annis/db/exec/parallel/indexjoin.rs index d14739676..74ff362a9 100644 --- a/graphannis/src/annis/db/exec/parallel/indexjoin.rs +++ b/graphannis/src/annis/db/exec/parallel/indexjoin.rs @@ -1,4 +1,4 @@ -use super::super::{Desc, ExecutionNode, NodeSearchDesc}; +use super::super::{ExecutionNode, ExecutionNodeDesc, NodeSearchDesc}; use crate::annis::db::aql::conjunction::BinaryOperatorArguments; use crate::annis::db::AnnotationStorage; use crate::annis::operator::BinaryOperatorIndex; @@ -21,7 +21,7 @@ pub struct IndexJoin<'a> { lhs_idx: usize, node_search_desc: Arc, node_annos: &'a dyn AnnotationStorage, - desc: Desc, + desc: ExecutionNodeDesc, global_reflexivity: bool, } @@ -41,7 +41,7 @@ impl<'a> IndexJoin<'a> { op_args: &BinaryOperatorArguments, node_search_desc: Arc, node_annos: &'a dyn AnnotationStorage, - rhs_desc: Option<&Desc>, + rhs_desc: Option<&ExecutionNodeDesc>, ) -> IndexJoin<'a> { let lhs_desc = lhs.get_desc().cloned(); let lhs_peek = lhs.peekable(); @@ -68,7 +68,7 @@ impl<'a> IndexJoin<'a> { }; IndexJoin { - desc: Desc::join( + desc: ExecutionNodeDesc::join( op.as_binary_operator(), lhs_desc.as_ref(), rhs_desc, @@ -196,7 +196,7 @@ impl<'a> ExecutionNode for IndexJoin<'a> { self } - fn get_desc(&self) -> Option<&Desc> { + fn get_desc(&self) -> Option<&ExecutionNodeDesc> { Some(&self.desc) } } diff --git a/graphannis/src/annis/db/exec/parallel/nestedloop.rs b/graphannis/src/annis/db/exec/parallel/nestedloop.rs index f8c2ed3bd..f1eceabac 100644 --- a/graphannis/src/annis/db/exec/parallel/nestedloop.rs +++ b/graphannis/src/annis/db/exec/parallel/nestedloop.rs @@ -1,4 +1,4 @@ -use super::super::{Desc, ExecutionNode}; +use super::super::{ExecutionNode, ExecutionNodeDesc}; use crate::annis::db::aql::conjunction::BinaryOperatorEntry; use crate::annis::operator::BinaryOperatorBase; use graphannis_core::annostorage::MatchGroup; @@ -22,7 +22,7 @@ pub struct NestedLoop<'a> { pos_inner_cache: Option, left_is_outer: bool, - desc: Desc, + desc: ExecutionNodeDesc, global_reflexivity: bool, } @@ -58,7 +58,7 @@ impl<'a> NestedLoop<'a> { if left_is_outer { NestedLoop { - desc: Desc::join( + desc: ExecutionNodeDesc::join( &op_entry.op, lhs.get_desc(), rhs.get_desc(), @@ -85,7 +85,7 @@ impl<'a> NestedLoop<'a> { } } else { NestedLoop { - desc: Desc::join( + desc: ExecutionNodeDesc::join( &op_entry.op, rhs.get_desc(), lhs.get_desc(), @@ -228,7 +228,7 @@ impl<'a> ExecutionNode for NestedLoop<'a> { self } - fn get_desc(&self) -> Option<&Desc> { + fn get_desc(&self) -> Option<&ExecutionNodeDesc> { Some(&self.desc) } } diff --git a/graphannis/src/annis/db/exec/tokensearch.rs b/graphannis/src/annis/db/exec/tokensearch.rs index f177bf579..063a48872 100644 --- a/graphannis/src/annis/db/exec/tokensearch.rs +++ b/graphannis/src/annis/db/exec/tokensearch.rs @@ -1,5 +1,5 @@ -use crate::annis::db::exec::Desc; use crate::annis::db::exec::ExecutionNode; +use crate::annis::db::exec::ExecutionNodeDesc; use crate::annis::db::sort_matches; use crate::annis::db::sort_matches::CollationType; use crate::annis::db::token_helper; @@ -21,7 +21,7 @@ use std::sync::Arc; /// An [ExecutionNode](#impl-ExecutionNode) which wraps the search for *all* token in a corpus. pub struct AnyTokenSearch<'a> { - desc: Option, + desc: Option, node_type_key: Arc, db: &'a AnnotationGraph, token_helper: Option>, @@ -127,7 +127,7 @@ impl<'a> ExecutionNode for AnyTokenSearch<'a> { self } - fn get_desc(&self) -> Option<&Desc> { + fn get_desc(&self) -> Option<&ExecutionNodeDesc> { self.desc.as_ref() } } diff --git a/graphannis/src/annis/db/plan.rs b/graphannis/src/annis/db/plan.rs index ba446a11c..646e1c067 100644 --- a/graphannis/src/annis/db/plan.rs +++ b/graphannis/src/annis/db/plan.rs @@ -1,6 +1,6 @@ use crate::annis::db::aql::disjunction::Disjunction; use crate::annis::db::aql::Config; -use crate::annis::db::exec::{Desc, EmptyResultSet, ExecutionNode}; +use crate::annis::db::exec::{EmptyResultSet, ExecutionNode, ExecutionNodeDesc}; use crate::AnnotationGraph; use crate::{annis::errors::*, graph::Match}; use graphannis_core::{ @@ -15,7 +15,7 @@ use std::sync::Arc; pub struct ExecutionPlan<'a> { plans: Vec + 'a>>, current_plan: usize, - descriptions: Vec>, + descriptions: Vec>, inverse_node_pos: Vec>>, proxy_mode: bool, unique_result_set: HashSet)>>, From 2b75c24e274f246652afa1a881c31d190e364674 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Mon, 13 Sep 2021 16:16:59 +0200 Subject: [PATCH 08/29] Move NodeDescArg as private struct into nodesearch package --- graphannis/src/annis/db/corpusstorage.rs | 7 +++- graphannis/src/annis/db/exec/mod.rs | 12 +++---- graphannis/src/annis/db/exec/nodesearch.rs | 38 ++++++++++------------ 3 files changed, 28 insertions(+), 29 deletions(-) diff --git a/graphannis/src/annis/db/corpusstorage.rs b/graphannis/src/annis/db/corpusstorage.rs index 726c79007..e0b939498 100644 --- a/graphannis/src/annis/db/corpusstorage.rs +++ b/graphannis/src/annis/db/corpusstorage.rs @@ -2182,7 +2182,12 @@ impl CorpusStorage { let mut query = Conjunction::new(); query.add_node( - NodeSearchSpec::new_exact(Some(ANNIS_NS), NODE_TYPE, Some("corpus"), false), + NodeSearchSpec::ExactValue { + ns: Some(ANNIS_NS.into()), + name: NODE_TYPE.into(), + val: Some("corpus".into()), + is_meta: false, + }, None, ); diff --git a/graphannis/src/annis/db/exec/mod.rs b/graphannis/src/annis/db/exec/mod.rs index 47c7748c3..ab3232e24 100644 --- a/graphannis/src/annis/db/exec/mod.rs +++ b/graphannis/src/annis/db/exec/mod.rs @@ -52,18 +52,14 @@ fn calculate_outputsize( std::cmp::max(output, 1) } -pub struct NodeDescArg { - query_fragment: String, - node_nr: usize, -} - impl ExecutionNodeDesc { pub fn empty_with_fragment( - node_desc_arg: NodeDescArg, + node_nr: usize, + query_fragment: String, est_size: Option, ) -> ExecutionNodeDesc { let mut node_pos = BTreeMap::new(); - node_pos.insert(node_desc_arg.node_nr, 0); + node_pos.insert(node_nr, 0); let cost = est_size.map(|output| CostEstimate { output, @@ -77,7 +73,7 @@ impl ExecutionNodeDesc { rhs: None, node_pos, impl_description: String::from(""), - query_fragment: node_desc_arg.query_fragment, + query_fragment, cost, } } diff --git a/graphannis/src/annis/db/exec/nodesearch.rs b/graphannis/src/annis/db/exec/nodesearch.rs index 64304b89c..ae5e92afb 100644 --- a/graphannis/src/annis/db/exec/nodesearch.rs +++ b/graphannis/src/annis/db/exec/nodesearch.rs @@ -32,6 +32,11 @@ pub struct NodeSearch<'a> { node_search_desc: Arc, is_sorted: bool, } +struct NodeDescArg { + query_fragment: String, + node_nr: usize, +} + #[derive(Clone, Debug, PartialOrd, Ord, Hash, PartialEq, Eq)] pub enum NodeSearchSpec { ExactValue { @@ -242,7 +247,7 @@ impl<'a> NodeSearch<'a> { &val, false, is_meta, - super::NodeDescArg { + NodeDescArg { query_fragment, node_nr, }, @@ -274,7 +279,7 @@ impl<'a> NodeSearch<'a> { &val, true, is_meta, - super::NodeDescArg { + NodeDescArg { query_fragment, node_nr, }, @@ -361,10 +366,8 @@ impl<'a> NodeSearch<'a> { Ok(NodeSearch { it: Box::new(it), desc: Some(ExecutionNodeDesc::empty_with_fragment( - super::NodeDescArg { - query_fragment, - node_nr, - }, + node_nr, + query_fragment, Some(est_output), )), node_search_desc: Arc::new(NodeSearchDesc { @@ -479,10 +482,8 @@ impl<'a> NodeSearch<'a> { Ok(NodeSearch { it: Box::new(it), desc: Some(ExecutionNodeDesc::empty_with_fragment( - super::NodeDescArg { - query_fragment: query_fragment.to_owned(), - node_nr, - }, + node_nr, + query_fragment.to_owned(), Some(est_output), )), node_search_desc: Arc::new(NodeSearchDesc { @@ -500,7 +501,7 @@ impl<'a> NodeSearch<'a> { pattern: &str, negated: bool, is_meta: bool, - node_desc_arg: super::NodeDescArg, + node_desc_arg: NodeDescArg, location_in_query: Option, ) -> Result> { // match_regex works only with values @@ -590,7 +591,8 @@ impl<'a> NodeSearch<'a> { Ok(NodeSearch { it: Box::new(it), desc: Some(ExecutionNodeDesc::empty_with_fragment( - node_desc_arg, + node_desc_arg.node_nr, + node_desc_arg.query_fragment, Some(est_output), )), node_search_desc: Arc::new(NodeSearchDesc { @@ -833,10 +835,8 @@ impl<'a> NodeSearch<'a> { Ok(NodeSearch { it: Box::new(it), desc: Some(ExecutionNodeDesc::empty_with_fragment( - super::NodeDescArg { - query_fragment: query_fragment.to_owned(), - node_nr, - }, + node_nr, + query_fragment.to_owned(), Some(est_output), )), node_search_desc: Arc::new(NodeSearchDesc { @@ -892,10 +892,8 @@ impl<'a> NodeSearch<'a> { Ok(NodeSearch { it: Box::new(it), desc: Some(ExecutionNodeDesc::empty_with_fragment( - super::NodeDescArg { - query_fragment: query_fragment.to_owned(), - node_nr, - }, + node_nr, + query_fragment.to_owned(), Some(est_output), )), node_search_desc: Arc::new(NodeSearchDesc { From a449030f449d1dca2337f63559edf3d1707b932f Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Mon, 13 Sep 2021 16:18:27 +0200 Subject: [PATCH 09/29] Remove now unused function --- graphannis/src/annis/db/exec/nodesearch.rs | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/graphannis/src/annis/db/exec/nodesearch.rs b/graphannis/src/annis/db/exec/nodesearch.rs index ae5e92afb..8c0360a94 100644 --- a/graphannis/src/annis/db/exec/nodesearch.rs +++ b/graphannis/src/annis/db/exec/nodesearch.rs @@ -82,20 +82,6 @@ pub enum NodeSearchSpec { } impl NodeSearchSpec { - pub fn new_exact( - ns: Option<&str>, - name: &str, - val: Option<&str>, - is_meta: bool, - ) -> NodeSearchSpec { - NodeSearchSpec::ExactValue { - ns: ns.map(String::from), - name: String::from(name), - val: val.map(String::from), - is_meta, - } - } - pub fn necessary_components( &self, db: &AnnotationGraph, From 18829a0075089575e38d8d426a8e326116b437ab Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Mon, 13 Sep 2021 17:55:01 +0200 Subject: [PATCH 10/29] Refactor NodeSearch to make it easier to extract the filter functions without re-implementing the complex logic --- core/src/annostorage/inmemory.rs | 11 +- core/src/annostorage/mod.rs | 4 +- core/src/annostorage/ondisk.rs | 9 +- .../annis/db/aql/operators/non_existing.rs | 34 +- graphannis/src/annis/db/aql/parser.lalrpop | 2 +- graphannis/src/annis/db/exec/indexjoin.rs | 3 +- graphannis/src/annis/db/exec/mod.rs | 6 +- graphannis/src/annis/db/exec/nodesearch.rs | 386 ++++++++++-------- .../src/annis/db/exec/parallel/indexjoin.rs | 3 +- 9 files changed, 271 insertions(+), 187 deletions(-) diff --git a/core/src/annostorage/inmemory.rs b/core/src/annostorage/inmemory.rs index 5ae5c79c7..54c71ad62 100644 --- a/core/src/annostorage/inmemory.rs +++ b/core/src/annostorage/inmemory.rs @@ -1,4 +1,4 @@ -use super::{AnnotationStorage, Match, MatchGroup}; +use super::{AnnotationStorage, Match}; use crate::annostorage::ValueSearch; use crate::errors::Result; use crate::malloc_size_of::MallocSizeOf; @@ -8,6 +8,7 @@ use crate::{annostorage::symboltable::SymbolTable, errors::GraphAnnisCoreError}; use core::ops::Bound::*; use itertools::Itertools; use rustc_hash::{FxHashMap, FxHashSet}; +use smallvec::SmallVec; use smartstring::alias::String; use std::borrow::Cow; use std::collections::{BTreeMap, HashMap}; @@ -407,11 +408,11 @@ where ns: Option<&str>, name: Option<&str>, it: Box>, - ) -> MatchGroup { + ) -> SmallVec<[Match; 8]> { if let Some(name) = name { if let Some(ns) = ns { // return the only possible annotation for each node - let mut matches = MatchGroup::new(); + let mut matches = SmallVec::<[Match; 8]>::new(); let key = Arc::from(AnnoKey { ns: ns.into(), name: name.into(), @@ -441,7 +442,7 @@ where }) .collect(); // return all annotations with the correct name for each node - let mut matches = MatchGroup::new(); + let mut matches = SmallVec::<[Match; 8]>::new(); for item in it { for (key_symbol, key) in matching_key_symbols.iter() { if let Some(all_annos) = self.by_container.get(&item) { @@ -458,7 +459,7 @@ where } } else { // return all annotations for each node - let mut matches = MatchGroup::new(); + let mut matches = SmallVec::<[Match; 8]>::new(); for item in it { let all_keys = self.get_all_keys_for_item(&item, None, None); for anno_key in all_keys { diff --git a/core/src/annostorage/mod.rs b/core/src/annostorage/mod.rs index 7e62e7bd7..5420bc69a 100644 --- a/core/src/annostorage/mod.rs +++ b/core/src/annostorage/mod.rs @@ -155,13 +155,13 @@ where /// Get the matching annotation keys for each item in the iterator. /// - /// This function allows to filter the received annotation keys by the specifying the namespace and name. + /// This function allows to filter the received annotation keys by specifying the namespace and name. fn get_keys_for_iterator( &self, ns: Option<&str>, name: Option<&str>, it: Box>, - ) -> MatchGroup; + ) -> SmallVec<[Match; 8]>; /// Return the total number of annotations contained in this `AnnotationStorage`. fn number_of_annotations(&self) -> usize; diff --git a/core/src/annostorage/ondisk.rs b/core/src/annostorage/ondisk.rs index 986e5662a..e7cb9d27a 100644 --- a/core/src/annostorage/ondisk.rs +++ b/core/src/annostorage/ondisk.rs @@ -8,6 +8,7 @@ use crate::util::disk_collections::{DiskMap, EvictionStrategy}; use crate::util::{self, memory_estimation}; use core::ops::Bound::*; use rand::seq::IteratorRandom; +use smallvec::SmallVec; use std::borrow::Cow; use std::collections::{BTreeMap, HashMap, HashSet}; use std::path::{Path, PathBuf}; @@ -15,8 +16,6 @@ use std::sync::Arc; use smartstring::alias::String as SmartString; -use super::MatchGroup; - pub const SUBFOLDER_NAME: &str = "nodes_diskmap_v1"; const UTF_8_MSG: &str = "String must be valid UTF-8 but was corrupted"; @@ -464,7 +463,7 @@ where ns: Option<&str>, name: Option<&str>, it: Box>, - ) -> MatchGroup { + ) -> SmallVec<[Match; 8]> { if let Some(name) = name { if let Some(ns) = ns { // return the only possible annotation for each node @@ -472,7 +471,7 @@ where ns: ns.into(), name: name.into(), }); - let mut matches = MatchGroup::new(); + let mut matches = SmallVec::<[Match; 8]>::new(); if let Some(symbol_id) = self.anno_key_symbols.get_symbol(&key) { // create a template key let mut container_key = create_by_container_key(T::default(), symbol_id); @@ -500,7 +499,7 @@ where }) .collect(); // return all annotations with the correct name for each node - let mut matches = MatchGroup::new(); + let mut matches = SmallVec::<[Match; 8]>::new(); for item in it { for (container_key, anno_key) in matching_qnames.iter_mut() { // Set the first bytes to the ID of the item. diff --git a/graphannis/src/annis/db/aql/operators/non_existing.rs b/graphannis/src/annis/db/aql/operators/non_existing.rs index b05a1b631..ee8d6639a 100644 --- a/graphannis/src/annis/db/aql/operators/non_existing.rs +++ b/graphannis/src/annis/db/aql/operators/non_existing.rs @@ -4,17 +4,19 @@ use graphannis_core::{annostorage::AnnotationStorage, types::NodeID}; use crate::{ annis::{ - db::exec::MatchFilterFunc, + db::exec::{nodesearch::NodeSearchSpec, MatchValueFilterFunc}, operator::{ BinaryOperator, BinaryOperatorIndex, BinaryOperatorSpec, UnaryOperator, UnaryOperatorSpec, }, + types::LineColumnRange, }, AnnotationGraph, }; pub struct NonExistingUnaryOperatorSpec { - pub filter: Vec, + pub node_search: NodeSearchSpec, + pub node_location: Option, pub negated_op: Box, } @@ -40,12 +42,18 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { &'b self, g: &'b AnnotationGraph, ) -> Option> { + let value_filter = self + .node_search + .get_value_filter(g, self.node_location.clone()) + .ok()?; + let node_search_qname = self.node_search.get_anno_qname(); match self.negated_op.create_operator(g)? { BinaryOperator::Base(_) => None, BinaryOperator::Index(negated_op) => Some(Box::new(NonExistingUnaryOperator { negated_op, - filter: &self.filter, node_annos: g.get_node_annos(), + node_search_qname, + value_filter, })), } } @@ -54,7 +62,8 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { struct NonExistingUnaryOperator<'a> { negated_op: Box, node_annos: &'a dyn AnnotationStorage, - filter: &'a Vec, + node_search_qname: (Option, Option), + value_filter: Vec, } impl<'a> Display for NonExistingUnaryOperator<'a> { @@ -67,10 +76,17 @@ impl<'a> Display for NonExistingUnaryOperator<'a> { impl<'a> UnaryOperator for NonExistingUnaryOperator<'a> { fn filter_match(&self, m: &graphannis_core::annostorage::Match) -> bool { - // Only return true of no match was found which matches the operator and node filter - !self - .negated_op - .retrieve_matches(&m) - .any(|m| self.filter.iter().all(|f| f(&m, self.node_annos))) + // Extract the annotation keys for the matches + let it = self.negated_op.retrieve_matches(&m).map(|m| m.node); + let candidates = self.node_annos.get_keys_for_iterator( + self.node_search_qname.0.as_deref(), + self.node_search_qname.1.as_deref(), + Box::new(it), + ); + + // Only return true of no match was found which matches the operator and node value filter + !candidates + .into_iter() + .any(|m| self.value_filter.iter().all(|f| f(&m, self.node_annos))) } } diff --git a/graphannis/src/annis/db/aql/parser.lalrpop b/graphannis/src/annis/db/aql/parser.lalrpop index 5ffdddcd2..1f7c38983 100644 --- a/graphannis/src/annis/db/aql/parser.lalrpop +++ b/graphannis/src/annis/db/aql/parser.lalrpop @@ -233,7 +233,7 @@ NodeSearch : NodeSearchSpec = { }; spec }, - // searching for a span value on any tokenization/segmentation layer abc" + // searching for a span value on any tokenization/segmentation layer, e.g. "abc" => { let spec = match val.1 { ast::StringMatchType::Exact => { diff --git a/graphannis/src/annis/db/exec/indexjoin.rs b/graphannis/src/annis/db/exec/indexjoin.rs index 30de5729c..1ea8a4fb1 100644 --- a/graphannis/src/annis/db/exec/indexjoin.rs +++ b/graphannis/src/annis/db/exec/indexjoin.rs @@ -4,6 +4,7 @@ use crate::annis::db::AnnotationStorage; use crate::annis::operator::BinaryOperatorIndex; use crate::{annis::operator::EstimationType, graph::Match}; use graphannis_core::{annostorage::MatchGroup, types::NodeID}; +use smallvec::SmallVec; use std::boxed::Box; use std::iter::Peekable; use std::sync::Arc; @@ -83,7 +84,7 @@ impl<'a> IndexJoin<'a> { } } - fn next_candidates(&mut self) -> Option { + fn next_candidates(&mut self) -> Option> { if let Some(m_lhs) = self.lhs.peek().cloned() { let it_nodes = Box::from( self.op diff --git a/graphannis/src/annis/db/exec/mod.rs b/graphannis/src/annis/db/exec/mod.rs index ab3232e24..5605359ca 100644 --- a/graphannis/src/annis/db/exec/mod.rs +++ b/graphannis/src/annis/db/exec/mod.rs @@ -183,12 +183,14 @@ impl ExecutionNodeDesc { } } -pub type MatchFilterFunc = +/// Filter function for the value of a given match, but assumes the given match has already the +/// correct annotation namespace/name. +pub type MatchValueFilterFunc = Box) -> bool + Send + Sync>; pub struct NodeSearchDesc { pub qname: (Option, Option), - pub cond: Vec, + pub cond: Vec, pub const_output: Option>, } diff --git a/graphannis/src/annis/db/exec/nodesearch.rs b/graphannis/src/annis/db/exec/nodesearch.rs index 8c0360a94..69efbcd1f 100644 --- a/graphannis/src/annis/db/exec/nodesearch.rs +++ b/graphannis/src/annis/db/exec/nodesearch.rs @@ -1,4 +1,4 @@ -use super::MatchFilterFunc; +use super::MatchValueFilterFunc; use super::{ExecutionNode, ExecutionNodeDesc, NodeSearchDesc}; use crate::annis::db::exec::tokensearch; use crate::annis::db::exec::tokensearch::AnyTokenSearch; @@ -91,6 +91,187 @@ impl NodeSearchSpec { } HashSet::default() } + + /// Get the annotatio qualified name needed to execute a search with this specification. + pub fn get_anno_qname(&self) -> (Option, Option) { + match self { + NodeSearchSpec::ExactValue { ns, name, .. } + | NodeSearchSpec::NotExactValue { ns, name, .. } + | NodeSearchSpec::RegexValue { ns, name, .. } + | NodeSearchSpec::NotRegexValue { ns, name, .. } => { + (ns.to_owned(), Some(name.to_owned())) + } + NodeSearchSpec::ExactTokenValue { .. } + | NodeSearchSpec::NotExactTokenValue { .. } + | NodeSearchSpec::RegexTokenValue { .. } + | NodeSearchSpec::NotRegexTokenValue { .. } + | NodeSearchSpec::AnyToken { .. } => ( + Some(TOKEN_KEY.ns.clone().into()), + Some(TOKEN_KEY.name.clone().into()), + ), + NodeSearchSpec::AnyNode => ( + Some(NODE_TYPE_KEY.ns.clone().into()), + Some(NODE_TYPE_KEY.name.clone().into()), + ), + } + } + + /// Creates a vector of value filter functions for this node annotation search. + pub fn get_value_filter( + &self, + g: &AnnotationGraph, + location_in_query: Option, + ) -> Result> { + let mut filters: Vec = Vec::new(); + match self { + NodeSearchSpec::ExactValue { val, .. } => { + if let Some(val) = val { + let val: String = val.to_owned(); + filters.push(Box::new(move |m, node_annos| { + if let Some(anno_val) = node_annos.get_value_for_item(&m.node, &m.anno_key) + { + anno_val == val.as_str() + } else { + false + } + })); + } + } + NodeSearchSpec::NotExactValue { val, .. } => { + let val: String = val.to_owned(); + filters.push(Box::new(move |m, node_annos| { + if let Some(anno_val) = node_annos.get_value_for_item(&m.node, &m.anno_key) { + anno_val != val.as_str() + } else { + false + } + })); + } + NodeSearchSpec::RegexValue { val, .. } => { + let full_match_pattern = graphannis_core::util::regex_full_match(val); + let re = regex::Regex::new(&full_match_pattern); + match re { + Ok(re) => { + filters.push(Box::new(move |m, node_annos| { + if let Some(val) = node_annos.get_value_for_item(&m.node, &m.anno_key) { + re.is_match(&val) + } else { + false + } + })); + } + Err(e) => { + return Err(GraphAnnisError::AQLSemanticError(AQLError { + desc: format!("/{}/ -> {}", val, e), + location: location_in_query, + })); + } + } + } + NodeSearchSpec::NotRegexValue { val, .. } => { + let full_match_pattern = graphannis_core::util::regex_full_match(val); + let re = regex::Regex::new(&full_match_pattern); + match re { + Ok(re) => { + filters.push(Box::new(move |m, node_annos| { + if let Some(val) = node_annos.get_value_for_item(&m.node, &m.anno_key) { + !re.is_match(&val) + } else { + false + } + })); + } + Err(e) => { + return Err(GraphAnnisError::AQLSemanticError(AQLError { + desc: format!("/{}/ -> {}", val, e), + location: location_in_query, + })); + } + } + } + NodeSearchSpec::ExactTokenValue { val, leafs_only } => { + let val = val.clone(); + filters.push(Box::new(move |m, node_annos| { + if let Some(anno_val) = node_annos.get_value_for_item(&m.node, &m.anno_key) { + anno_val == val.as_str() + } else { + false + } + })); + if *leafs_only { + filters.push(create_token_leaf_filter(g)); + } + } + NodeSearchSpec::NotExactTokenValue { val } => { + let val = val.clone(); + filters.push(Box::new(move |m, node_annos| { + if let Some(anno_val) = node_annos.get_value_for_item(&m.node, &m.anno_key) { + anno_val != val.as_str() + } else { + false + } + })); + } + NodeSearchSpec::RegexTokenValue { val, leafs_only } => { + let full_match_pattern = graphannis_core::util::regex_full_match(val); + let re = regex::Regex::new(&full_match_pattern); + match re { + Ok(re) => filters.push(Box::new(move |m, node_annos| { + if let Some(val) = node_annos.get_value_for_item(&m.node, &m.anno_key) { + re.is_match(&val) + } else { + false + } + })), + Err(e) => { + return Err(GraphAnnisError::AQLSemanticError(AQLError { + desc: format!("/{}/ -> {}", val, e), + location: location_in_query, + })); + } + }; + if *leafs_only { + filters.push(create_token_leaf_filter(g)); + } + } + NodeSearchSpec::NotRegexTokenValue { val } => { + let full_match_pattern = graphannis_core::util::regex_full_match(val); + let re = regex::Regex::new(&full_match_pattern); + match re { + Ok(re) => filters.push(Box::new(move |m, node_annos| { + if let Some(val) = node_annos.get_value_for_item(&m.node, &m.anno_key) { + !re.is_match(&val) + } else { + false + } + })), + Err(e) => { + return Err(GraphAnnisError::AQLSemanticError(AQLError { + desc: format!("/{}/ -> {}", val, e), + location: location_in_query, + })); + } + }; + } + NodeSearchSpec::AnyToken => { + filters.push(create_token_leaf_filter(g)); + } + NodeSearchSpec::AnyNode => { + let filter_func: Box< + dyn Fn(&Match, &dyn AnnotationStorage) -> bool + Send + Sync, + > = Box::new(move |m, node_annos| { + if let Some(val) = node_annos.get_value_for_item(&m.node, &m.anno_key) { + val == "node" + } else { + false + } + }); + filters.push(filter_func) + } + } + + Ok(filters) + } } impl fmt::Display for NodeSearchSpec { @@ -182,6 +363,32 @@ impl fmt::Display for NodeSearchSpec { } } +fn create_token_leaf_filter(g: &AnnotationGraph) -> MatchValueFilterFunc { + let cov_gs: Vec> = g + .get_all_components(Some(AnnotationComponentType::Coverage), None) + .into_iter() + .filter_map(|c| g.get_graphstorage(&c)) + .filter(|gs| { + if let Some(stats) = gs.get_statistics() { + stats.nodes > 0 + } else { + true + } + }) + .collect(); + + let filter_func: Box) -> bool + Send + Sync> = + Box::new(move |m, _| { + for cov in cov_gs.iter() { + if cov.get_outgoing_edges(m.node).next().is_some() { + return false; + } + } + true + }); + filter_func +} + impl<'a> NodeSearch<'a> { pub fn from_spec( spec: NodeSearchSpec, @@ -191,6 +398,8 @@ impl<'a> NodeSearch<'a> { ) -> Result> { let query_fragment = format!("{}", spec); + let filters = spec.get_value_filter(db, location_in_query.to_owned())?; + match spec { NodeSearchSpec::ExactValue { ns, @@ -201,6 +410,7 @@ impl<'a> NodeSearch<'a> { db, (ns, name), val.into(), + filters, is_meta, &query_fragment, node_nr, @@ -214,6 +424,7 @@ impl<'a> NodeSearch<'a> { db, (ns, name), ValueSearch::NotSome(val), + filters, is_meta, &query_fragment, node_nr, @@ -232,18 +443,19 @@ impl<'a> NodeSearch<'a> { (ns, name), &val, false, + filters, is_meta, NodeDescArg { query_fragment, node_nr, }, - location_in_query, ) } else { NodeSearch::new_annosearch_exact( db, (ns, name), ValueSearch::Some(val), + filters, is_meta, &query_fragment, node_nr, @@ -264,18 +476,19 @@ impl<'a> NodeSearch<'a> { (ns, name), &val, true, + filters, is_meta, NodeDescArg { query_fragment, node_nr, }, - location_in_query, ) } else { NodeSearch::new_annosearch_exact( db, (ns, name), ValueSearch::NotSome(val), + filters, is_meta, &query_fragment, node_nr, @@ -285,38 +498,38 @@ impl<'a> NodeSearch<'a> { NodeSearchSpec::ExactTokenValue { val, leafs_only } => NodeSearch::new_tokensearch( db, ValueSearch::Some(val), + filters, leafs_only, false, &query_fragment, node_nr, - location_in_query, ), NodeSearchSpec::NotExactTokenValue { val } => NodeSearch::new_tokensearch( db, ValueSearch::NotSome(val), + filters, true, false, &query_fragment, node_nr, - location_in_query, ), NodeSearchSpec::RegexTokenValue { val, leafs_only } => NodeSearch::new_tokensearch( db, ValueSearch::Some(val), + filters, leafs_only, true, &query_fragment, node_nr, - location_in_query, ), NodeSearchSpec::NotRegexTokenValue { val } => NodeSearch::new_tokensearch( db, ValueSearch::NotSome(val), + filters, true, true, &query_fragment, node_nr, - location_in_query, ), NodeSearchSpec::AnyToken => { NodeSearch::new_anytoken_search(db, &query_fragment, node_nr) @@ -374,6 +587,7 @@ impl<'a> NodeSearch<'a> { db: &'a AnnotationGraph, qname: (Option, String), val: ValueSearch, + filters: Vec, is_meta: bool, query_fragment: &str, node_nr: usize, @@ -442,29 +656,6 @@ impl<'a> NodeSearch<'a> { let it = base_it.map(|n| smallvec![n]); - let mut filters: Vec = Vec::new(); - - match val { - ValueSearch::Any => {} - ValueSearch::Some(val) => { - filters.push(Box::new(move |m, node_annos| { - if let Some(anno_val) = node_annos.get_value_for_item(&m.node, &m.anno_key) { - anno_val == val.as_str() - } else { - false - } - })); - } - ValueSearch::NotSome(val) => { - filters.push(Box::new(move |m, node_annos| { - if let Some(anno_val) = node_annos.get_value_for_item(&m.node, &m.anno_key) { - anno_val != val.as_str() - } else { - false - } - })); - } - } Ok(NodeSearch { it: Box::new(it), desc: Some(ExecutionNodeDesc::empty_with_fragment( @@ -486,9 +677,9 @@ impl<'a> NodeSearch<'a> { qname: (Option, String), pattern: &str, negated: bool, + filters: Vec, is_meta: bool, node_desc_arg: NodeDescArg, - location_in_query: Option, ) -> Result> { // match_regex works only with values let base_it = @@ -542,38 +733,6 @@ impl<'a> NodeSearch<'a> { let it = base_it.map(|n| smallvec![n]); - let mut filters: Vec = Vec::new(); - - let full_match_pattern = graphannis_core::util::regex_full_match(pattern); - let re = regex::Regex::new(&full_match_pattern); - match re { - Ok(re) => { - if negated { - filters.push(Box::new(move |m, node_annos| { - if let Some(val) = node_annos.get_value_for_item(&m.node, &m.anno_key) { - !re.is_match(&val) - } else { - false - } - })); - } else { - filters.push(Box::new(move |m, node_annos| { - if let Some(val) = node_annos.get_value_for_item(&m.node, &m.anno_key) { - re.is_match(&val) - } else { - false - } - })); - } - } - Err(e) => { - return Err(GraphAnnisError::AQLSemanticError(AQLError { - desc: format!("/{}/ -> {}", pattern, e), - location: location_in_query, - })); - } - } - Ok(NodeSearch { it: Box::new(it), desc: Some(ExecutionNodeDesc::empty_with_fragment( @@ -593,11 +752,11 @@ impl<'a> NodeSearch<'a> { fn new_tokensearch( db: &'a AnnotationGraph, val: ValueSearch, + filters: Vec, leafs_only: bool, match_regex: bool, query_fragment: &str, node_nr: usize, - location_in_query: Option, ) -> Result> { let it_base: Box> = match val { ValueSearch::Any => { @@ -677,101 +836,6 @@ impl<'a> NodeSearch<'a> { anno_key: NODE_TYPE_KEY.clone(), }] }); - // create filter functions - let mut filters: Vec = Vec::new(); - - match val { - ValueSearch::Some(ref val) => { - if match_regex { - let full_match_pattern = graphannis_core::util::regex_full_match(val); - let re = regex::Regex::new(&full_match_pattern); - match re { - Ok(re) => filters.push(Box::new(move |m, node_annos| { - if let Some(val) = node_annos.get_value_for_item(&m.node, &m.anno_key) { - re.is_match(&val) - } else { - false - } - })), - Err(e) => { - return Err(GraphAnnisError::AQLSemanticError(AQLError { - desc: format!("/{}/ -> {}", val, e), - location: location_in_query, - })); - } - }; - } else { - let val = val.clone(); - filters.push(Box::new(move |m, node_annos| { - if let Some(anno_val) = node_annos.get_value_for_item(&m.node, &m.anno_key) - { - anno_val == val.as_str() - } else { - false - } - })); - }; - } - ValueSearch::NotSome(ref val) => { - if match_regex { - let full_match_pattern = graphannis_core::util::regex_full_match(val); - let re = regex::Regex::new(&full_match_pattern); - match re { - Ok(re) => filters.push(Box::new(move |m, node_annos| { - if let Some(val) = node_annos.get_value_for_item(&m.node, &m.anno_key) { - !re.is_match(&val) - } else { - false - } - })), - Err(e) => { - return Err(GraphAnnisError::AQLSemanticError(AQLError { - desc: format!("/{}/ -> {}", val, e), - location: location_in_query, - })); - } - }; - } else { - let val = val.clone(); - filters.push(Box::new(move |m, node_annos| { - if let Some(anno_val) = node_annos.get_value_for_item(&m.node, &m.anno_key) - { - anno_val != val.as_str() - } else { - false - } - })); - }; - } - ValueSearch::Any => {} - }; - - if leafs_only { - let cov_gs: Vec> = db - .get_all_components(Some(AnnotationComponentType::Coverage), None) - .into_iter() - .filter_map(|c| db.get_graphstorage(&c)) - .filter(|gs| { - if let Some(stats) = gs.get_statistics() { - stats.nodes > 0 - } else { - true - } - }) - .collect(); - - let filter_func: Box< - dyn Fn(&Match, &dyn AnnotationStorage) -> bool + Send + Sync, - > = Box::new(move |m, _| { - for cov in cov_gs.iter() { - if cov.get_outgoing_edges(m.node).next().is_some() { - return false; - } - } - true - }); - filters.push(filter_func); - }; // TODO: is_leaf should be part of the estimation let est_output = match val { @@ -844,7 +908,7 @@ impl<'a> NodeSearch<'a> { ) -> Result> { let it: Box> = Box::from(AnyTokenSearch::new(db)?); // create filter functions - let mut filters: Vec = Vec::new(); + let mut filters: Vec = Vec::new(); let cov_gs: Vec> = db .get_all_components(Some(AnnotationComponentType::Coverage), None) @@ -859,7 +923,7 @@ impl<'a> NodeSearch<'a> { }) .collect(); - let filter_func: MatchFilterFunc = Box::new(move |m, _| { + let filter_func: MatchValueFilterFunc = Box::new(move |m, _| { for cov in cov_gs.iter() { if cov.get_outgoing_edges(m.node).next().is_some() { return false; diff --git a/graphannis/src/annis/db/exec/parallel/indexjoin.rs b/graphannis/src/annis/db/exec/parallel/indexjoin.rs index 74ff362a9..fb9e92127 100644 --- a/graphannis/src/annis/db/exec/parallel/indexjoin.rs +++ b/graphannis/src/annis/db/exec/parallel/indexjoin.rs @@ -5,6 +5,7 @@ use crate::annis::operator::BinaryOperatorIndex; use crate::{annis::operator::EstimationType, graph::Match}; use graphannis_core::{annostorage::MatchGroup, types::NodeID}; use rayon::prelude::*; +use smallvec::SmallVec; use std::iter::Peekable; use std::sync::mpsc::{channel, Receiver, Sender}; use std::sync::Arc; @@ -181,7 +182,7 @@ fn next_candidates( lhs_idx: usize, node_annos: &dyn AnnotationStorage, node_search_desc: &Arc, -) -> MatchGroup { +) -> SmallVec<[Match; 8]> { let it_nodes = Box::from(op.retrieve_matches(&m_lhs[lhs_idx]).map(|m| m.node).fuse()); node_annos.get_keys_for_iterator( From 058179ecd285dd5e9ec9a6e49b52a1b9afbdc665 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Mon, 13 Sep 2021 18:13:31 +0200 Subject: [PATCH 11/29] Add an unary operator for negated operators refering to non-existing nodes --- graphannis/src/annis/db/aql/conjunction.rs | 2 + graphannis/src/annis/db/aql/mod.rs | 117 ++++++++++++--------- 2 files changed, 68 insertions(+), 51 deletions(-) diff --git a/graphannis/src/annis/db/aql/conjunction.rs b/graphannis/src/annis/db/aql/conjunction.rs index b97fec555..eb00100af 100644 --- a/graphannis/src/annis/db/aql/conjunction.rs +++ b/graphannis/src/annis/db/aql/conjunction.rs @@ -66,6 +66,7 @@ pub struct NodeSearchSpecEntry { pub var: String, pub spec: NodeSearchSpec, pub optional: bool, + pub location: Option, } #[derive(Debug)] @@ -283,6 +284,7 @@ impl Conjunction { var: variable.clone(), spec: node, optional, + location: location.clone(), }); self.variables.insert(variable.clone(), idx); diff --git a/graphannis/src/annis/db/aql/mod.rs b/graphannis/src/annis/db/aql/mod.rs index 37d5e7df4..9b8003ac4 100644 --- a/graphannis/src/annis/db/aql/mod.rs +++ b/graphannis/src/annis/db/aql/mod.rs @@ -14,7 +14,8 @@ lalrpop_mod!( use crate::annis::db::aql::conjunction::Conjunction; use crate::annis::db::aql::disjunction::Disjunction; use crate::annis::db::aql::operators::{ - EqualValueSpec, IdenticalNodeSpec, NegatedOpSpec, PartOfSubCorpusSpec, RangeSpec, + EqualValueSpec, IdenticalNodeSpec, NegatedOpSpec, NonExistingUnaryOperatorSpec, + PartOfSubCorpusSpec, RangeSpec, }; use crate::annis::db::exec::nodesearch::NodeSearchSpec; use crate::annis::errors::*; @@ -105,63 +106,77 @@ fn map_conjunction( let node_left = q.resolve_variable(&var_left, op_pos.clone())?; let node_right = q.resolve_variable(&var_right, op_pos.clone())?; - if node_left.optional && node_right.optional { - // Not supported yet - return Err(GraphAnnisError::AQLSemanticError(AQLError { + if quirks_mode { + match op { + ast::BinaryOpSpec::Dominance(_) | ast::BinaryOpSpec::Pointing(_) => { + let entry_lhs = num_pointing_or_dominance_joins + .entry(var_left.clone()) + .or_insert(0); + *entry_lhs += 1; + let entry_rhs = num_pointing_or_dominance_joins + .entry(var_right.clone()) + .or_insert(0); + *entry_rhs += 1; + } + ast::BinaryOpSpec::Precedence(ref mut spec) => { + // limit unspecified .* precedence to 50 + spec.dist = if let RangeSpec::Unbound = spec.dist { + RangeSpec::Bound { + min_dist: 1, + max_dist: 50, + } + } else { + spec.dist.clone() + }; + } + ast::BinaryOpSpec::Near(ref mut spec) => { + // limit unspecified ^* near-by operator to 50 + spec.dist = if let RangeSpec::Unbound = spec.dist { + RangeSpec::Bound { + min_dist: 1, + max_dist: 50, + } + } else { + spec.dist.clone() + }; + } + _ => {} + } + } + let mut op_spec = + make_binary_operator_spec(op, node_left.spec.clone(), node_right.spec.clone())?; + if negated { + op_spec = Box::new(NegatedOpSpec { + spec_left: node_left.spec.clone(), + spec_right: node_right.spec.clone(), + negated_op: op_spec, + }); + if node_left.optional && node_right.optional { + // Not supported yet + return Err(GraphAnnisError::AQLSemanticError(AQLError { desc: format!( "Negated binary operator needs a non-optional left or right operand, but both operands (#{}, #{}) are optional, as indicated by their \"?\" suffix.", var_left, var_right), location: op_pos, })); - } else if node_left.optional { - } else if node_right.optional { - } else { - if quirks_mode { - match op { - ast::BinaryOpSpec::Dominance(_) | ast::BinaryOpSpec::Pointing(_) => { - let entry_lhs = num_pointing_or_dominance_joins - .entry(var_left.clone()) - .or_insert(0); - *entry_lhs += 1; - let entry_rhs = num_pointing_or_dominance_joins - .entry(var_right.clone()) - .or_insert(0); - *entry_rhs += 1; - } - ast::BinaryOpSpec::Precedence(ref mut spec) => { - // limit unspecified .* precedence to 50 - spec.dist = if let RangeSpec::Unbound = spec.dist { - RangeSpec::Bound { - min_dist: 1, - max_dist: 50, - } - } else { - spec.dist.clone() - }; - } - ast::BinaryOpSpec::Near(ref mut spec) => { - // limit unspecified ^* near-by operator to 50 - spec.dist = if let RangeSpec::Unbound = spec.dist { - RangeSpec::Bound { - min_dist: 1, - max_dist: 50, - } - } else { - spec.dist.clone() - }; - } - _ => {} - } - } - let mut op_spec = - make_binary_operator_spec(op, node_left.spec.clone(), node_right.spec.clone())?; - if negated { - op_spec = Box::new(NegatedOpSpec { - spec_left: node_left.spec, - spec_right: node_right.spec, + } else if node_left.optional { + let spec = NonExistingUnaryOperatorSpec { + node_search: node_left.spec.clone(), + node_location: node_left.location, + negated_op: op_spec, + }; + q.add_unary_operator_from_query(Box::new(spec), &node_right.var, op_pos)?; + } else if node_right.optional { + let spec = NonExistingUnaryOperatorSpec { + node_search: node_right.spec.clone(), + node_location: node_right.location, negated_op: op_spec, - }); + }; + q.add_unary_operator_from_query(Box::new(spec), &node_left.var, op_pos)?; + } else { + q.add_operator_from_query(op_spec, &var_left, &var_right, op_pos, !quirks_mode)? } + } else { q.add_operator_from_query(op_spec, &var_left, &var_right, op_pos, !quirks_mode)?; } } From 66798e0594b643beab5d38bdb67575b7ef7a6a6e Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Mon, 13 Sep 2021 18:37:26 +0200 Subject: [PATCH 12/29] Exclude all optional nodes from the component calculation --- graphannis/src/annis/db/aql/conjunction.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/graphannis/src/annis/db/aql/conjunction.rs b/graphannis/src/annis/db/aql/conjunction.rs index eb00100af..c880dcdec 100644 --- a/graphannis/src/annis/db/aql/conjunction.rs +++ b/graphannis/src/annis/db/aql/conjunction.rs @@ -826,8 +826,17 @@ impl Conjunction { fn check_components_connected(&self) -> Result<()> { let mut node2component: BTreeMap = BTreeMap::new(); - node2component - .extend((self.var_idx_offset..self.nodes.len() + self.var_idx_offset).map(|i| (i, i))); + node2component.extend( + self.nodes + .iter() + .enumerate() + // Exclude all optional nodes from the component calculation + .filter(|(_i, n)| !n.optional) + // Use the global node number when there are several conjunctions + .map(|(i, _n)| self.var_idx_offset + i) + // Set the node position as initial unique component number + .map(|i| (i, i)), + ); for op_entry in self.binary_operators.iter() { if op_entry.op.is_binding() { From 179afae7df4c5c5d59dbd73580d56ed66652a76f Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Mon, 13 Sep 2021 18:38:00 +0200 Subject: [PATCH 13/29] Add the original operator to the non-existing unary operator. The negation is handled by the unary operator logic. --- graphannis/src/annis/db/aql/mod.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/graphannis/src/annis/db/aql/mod.rs b/graphannis/src/annis/db/aql/mod.rs index 9b8003ac4..9edc0fa8d 100644 --- a/graphannis/src/annis/db/aql/mod.rs +++ b/graphannis/src/annis/db/aql/mod.rs @@ -146,11 +146,6 @@ fn map_conjunction( let mut op_spec = make_binary_operator_spec(op, node_left.spec.clone(), node_right.spec.clone())?; if negated { - op_spec = Box::new(NegatedOpSpec { - spec_left: node_left.spec.clone(), - spec_right: node_right.spec.clone(), - negated_op: op_spec, - }); if node_left.optional && node_right.optional { // Not supported yet return Err(GraphAnnisError::AQLSemanticError(AQLError { @@ -172,8 +167,14 @@ fn map_conjunction( node_location: node_right.location, negated_op: op_spec, }; + q.add_unary_operator_from_query(Box::new(spec), &node_left.var, op_pos)?; } else { + op_spec = Box::new(NegatedOpSpec { + spec_left: node_left.spec.clone(), + spec_right: node_right.spec.clone(), + negated_op: op_spec, + }); q.add_operator_from_query(op_spec, &var_left, &var_right, op_pos, !quirks_mode)? } } else { From 974eb87f67759b4f6bfcdf4442fe163781e2fb30 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Mon, 13 Sep 2021 18:46:13 +0200 Subject: [PATCH 14/29] Add a search test for negated dominance and non-existance --- graphannis/tests/searchtest_queries.csv | 1 + 1 file changed, 1 insertion(+) diff --git a/graphannis/tests/searchtest_queries.csv b/graphannis/tests/searchtest_queries.csv index d3eeb63a7..0d103ed4e 100644 --- a/graphannis/tests/searchtest_queries.csv +++ b/graphannis/tests/searchtest_queries.csv @@ -30,6 +30,7 @@ NegationOverlap,"t#tok & m#""modern"" & ((#t ->dep #m)|(#m ->dep #t)) & r#ref _o NegationLeftAligned,"cat=""ROOT"" >* cat >* ""modern"" . ""campaigns"" & #1 !_l_ #2",GUM,9 NegationRightAligned,"cat=""ROOT"" >* cat >* ""modern"" . ""campaigns"" & #1 !_r_ #2",GUM,9 NegationNodeIdentity,"m#""modern"" & (n#claws5=""NN1"" | n#claws5=""NN2"") & #m . #n _r_ o#node & #n !_ident_ #o",GUM,16 +NonExistingDominance,"cat=""S"" > cat=""NP"" > ""Wikinews"" & #1 !>* /U\.?S\.?/?",GUM,19 NotDocument,"tok @* doc!=""maz-11299""",pcc2.1,34709 StructureInclusionSeed,"cat=""S"" _i_ cat=""AP""",pcc2.1,726 IsConnectedRange,"""Jugendlichen"" .3,10 ""Musikcafé""",pcc2.1,1 From 80ea59dbc03ee2dd2e136974353acd76446e4b31 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Mon, 13 Sep 2021 19:08:56 +0200 Subject: [PATCH 15/29] Explicitly mark this case as todo!() instead of failing silently --- graphannis/src/annis/db/aql/operators/non_existing.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/graphannis/src/annis/db/aql/operators/non_existing.rs b/graphannis/src/annis/db/aql/operators/non_existing.rs index ee8d6639a..5f26c9a55 100644 --- a/graphannis/src/annis/db/aql/operators/non_existing.rs +++ b/graphannis/src/annis/db/aql/operators/non_existing.rs @@ -48,12 +48,13 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { .ok()?; let node_search_qname = self.node_search.get_anno_qname(); match self.negated_op.create_operator(g)? { - BinaryOperator::Base(_) => None, + BinaryOperator::Base(_) => todo!(), BinaryOperator::Index(negated_op) => Some(Box::new(NonExistingUnaryOperator { negated_op, node_annos: g.get_node_annos(), node_search_qname, value_filter, + node_search_spec: self.node_search.clone(), })), } } @@ -63,13 +64,15 @@ struct NonExistingUnaryOperator<'a> { negated_op: Box, node_annos: &'a dyn AnnotationStorage, node_search_qname: (Option, Option), + node_search_spec: NodeSearchSpec, value_filter: Vec, } impl<'a> Display for NonExistingUnaryOperator<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "!",)?; + write!(f, " !",)?; self.negated_op.fmt(f)?; + write!(f, " {}", self.node_search_spec)?; Ok(()) } } From efa066f30920734df4dc26ea2f966124f393872e Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Tue, 14 Sep 2021 14:50:33 +0200 Subject: [PATCH 16/29] Begin to refactor code to use a more general internal subquery instead of trying to replicate the optimization logic --- graphannis/src/annis/db/aql/conjunction.rs | 14 +- .../src/annis/db/aql/conjunction/test.rs | 14 +- graphannis/src/annis/db/aql/mod.rs | 92 ++++++------- .../src/annis/db/aql/operators/edge_op.rs | 24 +++- .../src/annis/db/aql/operators/equal_value.rs | 7 +- .../annis/db/aql/operators/identical_cov.rs | 6 +- .../annis/db/aql/operators/identical_node.rs | 7 +- .../src/annis/db/aql/operators/inclusion.rs | 6 +- .../annis/db/aql/operators/leftalignment.rs | 7 +- graphannis/src/annis/db/aql/operators/near.rs | 6 +- .../src/annis/db/aql/operators/negated_op.rs | 10 +- .../annis/db/aql/operators/non_existing.rs | 127 +++++++++++------- .../src/annis/db/aql/operators/overlap.rs | 6 +- .../src/annis/db/aql/operators/precedence.rs | 6 +- .../annis/db/aql/operators/rightalignment.rs | 7 +- graphannis/src/annis/db/corpusstorage.rs | 18 +-- graphannis/src/annis/operator.rs | 6 +- 17 files changed, 226 insertions(+), 137 deletions(-) diff --git a/graphannis/src/annis/db/aql/conjunction.rs b/graphannis/src/annis/db/aql/conjunction.rs index c880dcdec..109f37d72 100644 --- a/graphannis/src/annis/db/aql/conjunction.rs +++ b/graphannis/src/annis/db/aql/conjunction.rs @@ -41,13 +41,13 @@ pub struct BinaryOperatorArguments { #[derive(Debug)] struct BinaryOperatorSpecEntry { - op: Box, + op: Arc, args: BinaryOperatorArguments, } #[derive(Debug)] struct UnaryOperatorSpecEntry { - op: Box, + op: Arc, idx: usize, } @@ -299,7 +299,7 @@ impl Conjunction { pub fn add_unary_operator_from_query( &mut self, - op: Box, + op: Arc, var: &str, location: Option, ) -> Result<()> { @@ -317,7 +317,7 @@ impl Conjunction { pub fn add_operator( &mut self, - op: Box, + op: Arc, var_left: &str, var_right: &str, global_reflexivity: bool, @@ -327,7 +327,7 @@ impl Conjunction { pub fn add_operator_from_query( &mut self, - op: Box, + op: Arc, var_left: &str, var_right: &str, location: Option, @@ -417,7 +417,7 @@ impl Conjunction { result } - fn optimize_join_order_heuristics( + pub fn optimize_join_order_heuristics( &self, db: &AnnotationGraph, config: &Config, @@ -740,7 +740,7 @@ impl Conjunction { Ok(()) } - fn make_exec_plan_with_order<'a>( + pub fn make_exec_plan_with_order<'a>( &'a self, db: &'a AnnotationGraph, config: &Config, diff --git a/graphannis/src/annis/db/aql/conjunction/test.rs b/graphannis/src/annis/db/aql/conjunction/test.rs index f6073cdf8..6cb272520 100644 --- a/graphannis/src/annis/db/aql/conjunction/test.rs +++ b/graphannis/src/annis/db/aql/conjunction/test.rs @@ -20,12 +20,14 @@ fn parse_negation_filter_expression() { let op2 = op_entry2.op.into_any(); assert_eq!(true, op1.is::()); + assert_eq!(true, op2.is::()); - let op2 = op2.downcast::().unwrap(); let negated_op = op2 + .downcast_ref::() + .unwrap() .negated_op - .into_any() - .downcast::() + .any_ref() + .downcast_ref::() .unwrap(); assert_eq!("", negated_op.name); @@ -51,11 +53,11 @@ fn parse_negation_between_ops_expression() { let op_entry1 = alt.binary_operators.remove(1); let op1 = op_entry1.op.into_any(); - let op1 = op1.downcast::().unwrap(); + let op1 = op1.downcast_ref::().unwrap(); let negated_op = op1 .negated_op - .into_any() - .downcast::() + .any_ref() + .downcast_ref::() .unwrap(); assert_eq!(None, negated_op.segmentation); diff --git a/graphannis/src/annis/db/aql/mod.rs b/graphannis/src/annis/db/aql/mod.rs index 9edc0fa8d..3c86049d6 100644 --- a/graphannis/src/annis/db/aql/mod.rs +++ b/graphannis/src/annis/db/aql/mod.rs @@ -24,6 +24,7 @@ use crate::annis::types::{LineColumn, LineColumnRange}; use lalrpop_util::ParseError; use std::collections::BTreeMap; use std::collections::HashMap; +use std::sync::Arc; thread_local! { static AQL_PARSER: parser::DisjunctionParser = parser::DisjunctionParser::new(); @@ -146,36 +147,35 @@ fn map_conjunction( let mut op_spec = make_binary_operator_spec(op, node_left.spec.clone(), node_right.spec.clone())?; if negated { - if node_left.optional && node_right.optional { - // Not supported yet - return Err(GraphAnnisError::AQLSemanticError(AQLError { - desc: format!( - "Negated binary operator needs a non-optional left or right operand, but both operands (#{}, #{}) are optional, as indicated by their \"?\" suffix.", - var_left, var_right), - location: op_pos, - })); - } else if node_left.optional { - let spec = NonExistingUnaryOperatorSpec { - node_search: node_left.spec.clone(), - node_location: node_left.location, - negated_op: op_spec, - }; - q.add_unary_operator_from_query(Box::new(spec), &node_right.var, op_pos)?; - } else if node_right.optional { - let spec = NonExistingUnaryOperatorSpec { - node_search: node_right.spec.clone(), - node_location: node_right.location, - negated_op: op_spec, - }; - - q.add_unary_operator_from_query(Box::new(spec), &node_left.var, op_pos)?; - } else { - op_spec = Box::new(NegatedOpSpec { + if !node_left.optional && !node_right.optional { + op_spec = Arc::new(NegatedOpSpec { spec_left: node_left.spec.clone(), spec_right: node_right.spec.clone(), negated_op: op_spec, }); q.add_operator_from_query(op_spec, &var_left, &var_right, op_pos, !quirks_mode)? + } else { + if node_left.optional && node_right.optional { + // Not supported yet + return Err(GraphAnnisError::AQLSemanticError(AQLError { + desc: format!( + "Negated binary operator needs a non-optional left or right operand, but both operands (#{}, #{}) are optional, as indicated by their \"?\" suffix.", + var_left, var_right), + location: op_pos, + })); + } else { + let target_left = node_left.optional; + let spec = NonExistingUnaryOperatorSpec { + op: op_spec, + target: if target_left { + node_left.spec + } else { + node_right.spec + }, + target_left, + }; + q.add_unary_operator_from_query(Arc::new(spec), &node_right.var, op_pos)?; + } } } else { q.add_operator_from_query(op_spec, &var_left, &var_right, op_pos, !quirks_mode)?; @@ -193,7 +193,7 @@ fn map_conjunction( for _ in 1..*num_joins { if let Ok(node) = q.resolve_variable(orig_var, None) { let new_var = q.add_node(node.spec, None); - q.add_operator(Box::new(IdenticalNodeSpec {}), orig_var, &new_var, false)?; + q.add_operator(Arc::new(IdenticalNodeSpec {}), orig_var, &new_var, false)?; } } } @@ -314,7 +314,7 @@ fn add_legacy_metadata_constraints( if let Some(first_meta_idx) = first_meta_idx.clone() { // avoid nested loops by joining additional meta nodes with a "identical node" q.add_operator( - Box::new(IdenticalNodeSpec {}), + Arc::new(IdenticalNodeSpec {}), &first_meta_idx, &meta_node_idx, true, @@ -323,7 +323,7 @@ fn add_legacy_metadata_constraints( first_meta_idx = Some(meta_node_idx.clone()); // add a special join to the first node of the query q.add_operator( - Box::new(PartOfSubCorpusSpec { + Arc::new(PartOfSubCorpusSpec { dist: RangeSpec::Unbound, }), &first_node_pos, @@ -345,7 +345,7 @@ fn add_legacy_metadata_constraints( false, ); q.add_operator( - Box::new(IdenticalNodeSpec {}), + Arc::new(IdenticalNodeSpec {}), &meta_node_idx, &doc_anno_idx, true, @@ -477,26 +477,26 @@ fn make_binary_operator_spec( op: ast::BinaryOpSpec, spec_left: NodeSearchSpec, spec_right: NodeSearchSpec, -) -> Result> { - let op_spec: Box = match op { - ast::BinaryOpSpec::Dominance(spec) => Box::new(spec), - ast::BinaryOpSpec::Pointing(spec) => Box::new(spec), - ast::BinaryOpSpec::Precedence(spec) => Box::new(spec), - ast::BinaryOpSpec::Near(spec) => Box::new(spec), - ast::BinaryOpSpec::Overlap(spec) => Box::new(spec), - ast::BinaryOpSpec::IdenticalCoverage(spec) => Box::new(spec), - ast::BinaryOpSpec::PartOfSubCorpus(spec) => Box::new(spec), - ast::BinaryOpSpec::Inclusion(spec) => Box::new(spec), - ast::BinaryOpSpec::LeftAlignment(spec) => Box::new(spec), - ast::BinaryOpSpec::RightAlignment(spec) => Box::new(spec), - ast::BinaryOpSpec::IdenticalNode(spec) => Box::new(spec), +) -> Result> { + let op_spec: Arc = match op { + ast::BinaryOpSpec::Dominance(spec) => Arc::new(spec), + ast::BinaryOpSpec::Pointing(spec) => Arc::new(spec), + ast::BinaryOpSpec::Precedence(spec) => Arc::new(spec), + ast::BinaryOpSpec::Near(spec) => Arc::new(spec), + ast::BinaryOpSpec::Overlap(spec) => Arc::new(spec), + ast::BinaryOpSpec::IdenticalCoverage(spec) => Arc::new(spec), + ast::BinaryOpSpec::PartOfSubCorpus(spec) => Arc::new(spec), + ast::BinaryOpSpec::Inclusion(spec) => Arc::new(spec), + ast::BinaryOpSpec::LeftAlignment(spec) => Arc::new(spec), + ast::BinaryOpSpec::RightAlignment(spec) => Arc::new(spec), + ast::BinaryOpSpec::IdenticalNode(spec) => Arc::new(spec), ast::BinaryOpSpec::ValueComparison(cmp) => match cmp { - ast::ComparisonOperator::Equal => Box::new(EqualValueSpec { + ast::ComparisonOperator::Equal => Arc::new(EqualValueSpec { spec_left, spec_right, negated: false, }), - ast::ComparisonOperator::NotEqual => Box::new(EqualValueSpec { + ast::ComparisonOperator::NotEqual => Arc::new(EqualValueSpec { spec_left, spec_right, negated: true, @@ -506,9 +506,9 @@ fn make_binary_operator_spec( Ok(op_spec) } -fn make_unary_operator_spec(op: ast::UnaryOpSpec) -> Box { +fn make_unary_operator_spec(op: ast::UnaryOpSpec) -> Arc { match op { - ast::UnaryOpSpec::Arity(spec) => Box::new(spec), + ast::UnaryOpSpec::Arity(spec) => Arc::new(spec), } } diff --git a/graphannis/src/annis/db/aql/operators/edge_op.rs b/graphannis/src/annis/db/aql/operators/edge_op.rs index 3a28f9cc4..b6b8f6e4c 100644 --- a/graphannis/src/annis/db/aql/operators/edge_op.rs +++ b/graphannis/src/annis/db/aql/operators/edge_op.rs @@ -68,7 +68,11 @@ impl BinaryOperatorSpec for BaseEdgeOpSpec { self.edge_anno.clone() } - fn into_any(self: Box) -> Box { + fn into_any(self: Arc) -> Arc { + self + } + + fn any_ref(&self) -> &dyn Any { self } } @@ -535,7 +539,11 @@ impl BinaryOperatorSpec for DominanceSpec { base.create_operator(db) } - fn into_any(self: Box) -> Box { + fn into_any(self: Arc) -> Arc { + self + } + + fn any_ref(&self) -> &dyn Any { self } } @@ -576,7 +584,11 @@ impl BinaryOperatorSpec for PointingSpec { base.create_operator(db) } - fn into_any(self: Box) -> Box { + fn into_any(self: Arc) -> Arc { + self + } + + fn any_ref(&self) -> &dyn Any { self } } @@ -617,7 +629,11 @@ impl BinaryOperatorSpec for PartOfSubCorpusSpec { base.create_operator(db) } - fn into_any(self: Box) -> Box { + fn into_any(self: Arc) -> Arc { + self + } + + fn any_ref(&self) -> &dyn Any { self } } diff --git a/graphannis/src/annis/db/aql/operators/equal_value.rs b/graphannis/src/annis/db/aql/operators/equal_value.rs index 2d34b2e9a..d66ec730f 100644 --- a/graphannis/src/annis/db/aql/operators/equal_value.rs +++ b/graphannis/src/annis/db/aql/operators/equal_value.rs @@ -16,6 +16,7 @@ use graphannis_core::{ use std::any::Any; use std::borrow::Cow; use std::collections::HashSet; +use std::sync::Arc; #[derive(Debug, Clone, PartialOrd, Ord, Hash, PartialEq, Eq)] pub struct EqualValueSpec { @@ -45,7 +46,11 @@ impl BinaryOperatorSpec for EqualValueSpec { false } - fn into_any(self: Box) -> Box { + fn into_any(self: Arc) -> Arc { + self + } + + fn any_ref(&self) -> &dyn Any { self } } diff --git a/graphannis/src/annis/db/aql/operators/identical_cov.rs b/graphannis/src/annis/db/aql/operators/identical_cov.rs index d8c0bff07..d53d19fb5 100644 --- a/graphannis/src/annis/db/aql/operators/identical_cov.rs +++ b/graphannis/src/annis/db/aql/operators/identical_cov.rs @@ -61,7 +61,11 @@ impl BinaryOperatorSpec for IdenticalCoverageSpec { optional_op.map(|op| BinaryOperator::Index(Box::new(op))) } - fn into_any(self: Box) -> Box { + fn into_any(self: Arc) -> Arc { + self + } + + fn any_ref(&self) -> &dyn Any { self } } diff --git a/graphannis/src/annis/db/aql/operators/identical_node.rs b/graphannis/src/annis/db/aql/operators/identical_node.rs index 5d859b527..049fb0b64 100644 --- a/graphannis/src/annis/db/aql/operators/identical_node.rs +++ b/graphannis/src/annis/db/aql/operators/identical_node.rs @@ -6,6 +6,7 @@ use crate::{ use graphannis_core::{graph::DEFAULT_ANNO_KEY, types::Component}; use std::any::Any; use std::collections::HashSet; +use std::sync::Arc; #[derive(Debug, Clone, PartialOrd, Ord, Hash, PartialEq, Eq)] pub struct IdenticalNodeSpec; @@ -22,7 +23,11 @@ impl BinaryOperatorSpec for IdenticalNodeSpec { Some(BinaryOperator::Index(Box::new(IdenticalNode {}))) } - fn into_any(self: Box) -> Box { + fn into_any(self: Arc) -> Arc { + self + } + + fn any_ref(&self) -> &dyn Any { self } } diff --git a/graphannis/src/annis/db/aql/operators/inclusion.rs b/graphannis/src/annis/db/aql/operators/inclusion.rs index e4d7b0419..3f7d40199 100644 --- a/graphannis/src/annis/db/aql/operators/inclusion.rs +++ b/graphannis/src/annis/db/aql/operators/inclusion.rs @@ -50,7 +50,11 @@ impl BinaryOperatorSpec for InclusionSpec { optional_op.map(|op| BinaryOperator::Index(Box::new(op))) } - fn into_any(self: Box) -> Box { + fn into_any(self: Arc) -> Arc { + self + } + + fn any_ref(&self) -> &dyn Any { self } } diff --git a/graphannis/src/annis/db/aql/operators/leftalignment.rs b/graphannis/src/annis/db/aql/operators/leftalignment.rs index 2eebc8ae8..8a82fb253 100644 --- a/graphannis/src/annis/db/aql/operators/leftalignment.rs +++ b/graphannis/src/annis/db/aql/operators/leftalignment.rs @@ -7,6 +7,7 @@ use crate::{annis::operator::EstimationType, graph::Match}; use graphannis_core::{graph::DEFAULT_ANNO_KEY, types::Component}; use std::any::Any; use std::collections::HashSet; +use std::sync::Arc; #[derive(Clone, Debug, PartialOrd, Ord, Hash, PartialEq, Eq)] pub struct LeftAlignmentSpec; @@ -31,7 +32,11 @@ impl BinaryOperatorSpec for LeftAlignmentSpec { optional_op.map(|op| BinaryOperator::Index(Box::new(op))) } - fn into_any(self: Box) -> Box { + fn into_any(self: Arc) -> Arc { + self + } + + fn any_ref(&self) -> &dyn Any { self } } diff --git a/graphannis/src/annis/db/aql/operators/near.rs b/graphannis/src/annis/db/aql/operators/near.rs index 2669fa737..5a679a12f 100644 --- a/graphannis/src/annis/db/aql/operators/near.rs +++ b/graphannis/src/annis/db/aql/operators/near.rs @@ -54,7 +54,11 @@ impl BinaryOperatorSpec for NearSpec { optional_op.map(|op| BinaryOperator::Index(Box::new(op))) } - fn into_any(self: Box) -> Box { + fn into_any(self: Arc) -> Arc { + self + } + + fn any_ref(&self) -> &dyn Any { self } } diff --git a/graphannis/src/annis/db/aql/operators/negated_op.rs b/graphannis/src/annis/db/aql/operators/negated_op.rs index 1bda5f446..9df7f1581 100644 --- a/graphannis/src/annis/db/aql/operators/negated_op.rs +++ b/graphannis/src/annis/db/aql/operators/negated_op.rs @@ -1,4 +1,4 @@ -use std::{any::Any, fmt::Display}; +use std::{any::Any, fmt::Display, sync::Arc}; use crate::{ annis::{ @@ -13,7 +13,7 @@ use graphannis_core::annostorage::Match; pub struct NegatedOpSpec { pub spec_left: NodeSearchSpec, pub spec_right: NodeSearchSpec, - pub negated_op: Box, + pub negated_op: Arc, } impl BinaryOperatorSpec for NegatedOpSpec { @@ -39,7 +39,11 @@ impl BinaryOperatorSpec for NegatedOpSpec { } } - fn into_any(self: Box) -> Box { + fn into_any(self: Arc) -> Arc { + self + } + + fn any_ref(&self) -> &dyn Any { self } } diff --git a/graphannis/src/annis/db/aql/operators/non_existing.rs b/graphannis/src/annis/db/aql/operators/non_existing.rs index 5f26c9a55..5b5a90f38 100644 --- a/graphannis/src/annis/db/aql/operators/non_existing.rs +++ b/graphannis/src/annis/db/aql/operators/non_existing.rs @@ -1,30 +1,50 @@ -use std::fmt::{Debug, Display}; +use std::{ + collections::HashSet, + fmt::{Debug, Display}, + sync::Arc, +}; -use graphannis_core::{annostorage::AnnotationStorage, types::NodeID}; +use graphannis_core::graph::{ANNIS_NS, NODE_NAME}; use crate::{ annis::{ - db::exec::{nodesearch::NodeSearchSpec, MatchValueFilterFunc}, - operator::{ - BinaryOperator, BinaryOperatorIndex, BinaryOperatorSpec, UnaryOperator, - UnaryOperatorSpec, + db::{ + aql::{conjunction::Conjunction, Config}, + exec::nodesearch::NodeSearchSpec, }, - types::LineColumnRange, + errors::Result, + operator::{BinaryOperatorSpec, UnaryOperator, UnaryOperatorSpec}, }, AnnotationGraph, }; +#[derive(Debug)] pub struct NonExistingUnaryOperatorSpec { - pub node_search: NodeSearchSpec, - pub node_location: Option, - pub negated_op: Box, + pub op: Arc, + pub target: NodeSearchSpec, + pub target_left: bool, } -impl Debug for NonExistingUnaryOperatorSpec { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("NonExistingUnaryOperatorSpec") - .field("negated_op", &self.negated_op) - .finish() +impl NonExistingUnaryOperatorSpec { + fn create_subquery_conjunction(&self) -> Result { + let mut subquery = Conjunction::new(); + let var_flex = subquery.add_node( + NodeSearchSpec::ExactValue { + ns: Some(ANNIS_NS.into()), + name: NODE_NAME.into(), + val: None, + is_meta: false, + }, + None, + ); + let var_target = subquery.add_node(self.target.clone(), None); + if self.target_left { + subquery.add_operator(self.op.clone(), &var_target, &var_flex, true)?; + } else { + subquery.add_operator(self.op.clone(), &var_flex, &var_target, true)?; + } + + Ok(subquery) } } @@ -35,61 +55,66 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { ) -> std::collections::HashSet< graphannis_core::types::Component, > { - self.negated_op.necessary_components(g) + if let Ok(subquery) = self.create_subquery_conjunction() { + subquery.necessary_components(g) + } else { + HashSet::default() + } } fn create_operator<'b>( &'b self, g: &'b AnnotationGraph, ) -> Option> { - let value_filter = self - .node_search - .get_value_filter(g, self.node_location.clone()) - .ok()?; - let node_search_qname = self.node_search.get_anno_qname(); - match self.negated_op.create_operator(g)? { - BinaryOperator::Base(_) => todo!(), - BinaryOperator::Index(negated_op) => Some(Box::new(NonExistingUnaryOperator { - negated_op, - node_annos: g.get_node_annos(), - node_search_qname, - value_filter, - node_search_spec: self.node_search.clone(), - })), - } + let config = Config { + use_parallel_joins: false, + }; + // Create a conjunction from the definition + let subquery = self.create_subquery_conjunction().ok()?; + // make an initial plan and store the operator order and apply it every time + // (this should aoid costly optimization in each step) + let operator_order = subquery.optimize_join_order_heuristics(g, &config).ok()?; + + let op = NonExistingUnaryOperator { + // subquery: Arc::new(Mutex::new(subquery)), + operator_order, + graph: g, + config, + }; + Some(Box::new(op)) } } struct NonExistingUnaryOperator<'a> { - negated_op: Box, - node_annos: &'a dyn AnnotationStorage, - node_search_qname: (Option, Option), - node_search_spec: NodeSearchSpec, - value_filter: Vec, + // TODO: this could be replaced with an execution plan and a *mutable* reference to the node ID search + //subquery: Arc, + operator_order: Vec, + graph: &'a AnnotationGraph, + config: Config, } impl<'a> Display for NonExistingUnaryOperator<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, " !",)?; - self.negated_op.fmt(f)?; - write!(f, " {}", self.node_search_spec)?; + write!(f, " !(?)")?; Ok(()) } } impl<'a> UnaryOperator for NonExistingUnaryOperator<'a> { fn filter_match(&self, m: &graphannis_core::annostorage::Match) -> bool { - // Extract the annotation keys for the matches - let it = self.negated_op.retrieve_matches(&m).map(|m| m.node); - let candidates = self.node_annos.get_keys_for_iterator( - self.node_search_qname.0.as_deref(), - self.node_search_qname.1.as_deref(), - Box::new(it), - ); - - // Only return true of no match was found which matches the operator and node value filter - !candidates - .into_iter() - .any(|m| self.value_filter.iter().all(|f| f(&m, self.node_annos))) + todo!() + // let subquery = self.subquery.lock().unwrap(); + // // TODO: Replace the first node annotation value with the ID of our match + // // Create an execution plan from the conjunction + // if let Ok(plan) = subquery.make_exec_plan_with_order( + // self.graph, + // &self.config, + // self.operator_order.clone(), + // ) { + // // Only return true of no match was found + // plan.next().is_none() + // } else { + // false + // } } } diff --git a/graphannis/src/annis/db/aql/operators/overlap.rs b/graphannis/src/annis/db/aql/operators/overlap.rs index 49e88a03e..64f5250ec 100644 --- a/graphannis/src/annis/db/aql/operators/overlap.rs +++ b/graphannis/src/annis/db/aql/operators/overlap.rs @@ -53,7 +53,11 @@ impl BinaryOperatorSpec for OverlapSpec { optional_op.map(|op| BinaryOperator::Index(Box::new(op))) } - fn into_any(self: Box) -> Box { + fn into_any(self: Arc) -> Arc { + self + } + + fn any_ref(&self) -> &dyn Any { self } } diff --git a/graphannis/src/annis/db/aql/operators/precedence.rs b/graphannis/src/annis/db/aql/operators/precedence.rs index 6db0f287a..991e112db 100644 --- a/graphannis/src/annis/db/aql/operators/precedence.rs +++ b/graphannis/src/annis/db/aql/operators/precedence.rs @@ -71,7 +71,11 @@ impl BinaryOperatorSpec for PrecedenceSpec { optional_op.map(|op| BinaryOperator::Index(Box::new(op))) } - fn into_any(self: Box) -> Box { + fn into_any(self: Arc) -> Arc { + self + } + + fn any_ref(&self) -> &dyn Any { self } } diff --git a/graphannis/src/annis/db/aql/operators/rightalignment.rs b/graphannis/src/annis/db/aql/operators/rightalignment.rs index 6c12fc59c..649ff04cd 100644 --- a/graphannis/src/annis/db/aql/operators/rightalignment.rs +++ b/graphannis/src/annis/db/aql/operators/rightalignment.rs @@ -9,6 +9,7 @@ use crate::{annis::operator::EstimationType, graph::Match, model::AnnotationComp use graphannis_core::graph::DEFAULT_ANNO_KEY; use std::any::Any; use std::collections::HashSet; +use std::sync::Arc; #[derive(Clone, Debug, PartialOrd, Ord, Hash, PartialEq, Eq)] pub struct RightAlignmentSpec; @@ -30,7 +31,11 @@ impl BinaryOperatorSpec for RightAlignmentSpec { optional_op.map(|op| BinaryOperator::Index(Box::new(op))) } - fn into_any(self: Box) -> Box { + fn into_any(self: Arc) -> Arc { + self + } + + fn any_ref(&self) -> &dyn Any { self } } diff --git a/graphannis/src/annis/db/corpusstorage.rs b/graphannis/src/annis/db/corpusstorage.rs index e0b939498..9cf30b1ff 100644 --- a/graphannis/src/annis/db/corpusstorage.rs +++ b/graphannis/src/annis/db/corpusstorage.rs @@ -374,13 +374,13 @@ fn add_subgraph_precedence( let tok_idx = q.add_node(NodeSearchSpec::AnyToken, None); let m_idx = q.add_node(m.clone(), None); q.add_operator( - Box::new(operators::OverlapSpec { reflexive: true }), + Arc::new(operators::OverlapSpec { reflexive: true }), &node_idx, &tok_idx, true, )?; q.add_operator( - Box::new(operators::PrecedenceSpec { + Arc::new(operators::PrecedenceSpec { segmentation: None, dist: RangeSpec::Bound { min_dist: 0, @@ -416,21 +416,21 @@ fn add_subgraph_precedence_with_segmentation( let m_idx = q.add_node(m.clone(), None); q.add_operator( - Box::new(operators::OverlapSpec { reflexive: true }), + Arc::new(operators::OverlapSpec { reflexive: true }), &m_node_idx, &m_idx, false, )?; q.add_operator( - Box::new(operators::OverlapSpec { reflexive: true }), + Arc::new(operators::OverlapSpec { reflexive: true }), &target_idx, &node_idx, false, )?; q.add_operator( - Box::new(operators::PrecedenceSpec { + Arc::new(operators::PrecedenceSpec { segmentation: Some(segmentation.to_string()), dist: RangeSpec::Bound { min_dist: 0, @@ -1996,7 +1996,7 @@ impl CorpusStorage { let node_idx = q.add_node(NodeSearchSpec::AnyNode, None); let m_idx = q.add_node(m.clone(), None); q.add_operator( - Box::new(operators::OverlapSpec { reflexive: true }), + Arc::new(operators::OverlapSpec { reflexive: true }), &m_idx, &node_idx, false, @@ -2039,7 +2039,7 @@ impl CorpusStorage { ); let m_idx = q.add_node(m.clone(), None); q.add_operator( - Box::new(operators::PartOfSubCorpusSpec { + Arc::new(operators::PartOfSubCorpusSpec { dist: RangeSpec::Bound { min_dist: 1, max_dist: 1, @@ -2122,7 +2122,7 @@ impl CorpusStorage { ); let any_node_idx = q.add_node(NodeSearchSpec::AnyNode, None); q.add_operator( - Box::new(operators::PartOfSubCorpusSpec { + Arc::new(operators::PartOfSubCorpusSpec { dist: RangeSpec::Unbound, }), &any_node_idx, @@ -2153,7 +2153,7 @@ impl CorpusStorage { None, ); q.add_operator( - Box::new(operators::PartOfSubCorpusSpec { + Arc::new(operators::PartOfSubCorpusSpec { dist: RangeSpec::Unbound, }), &any_node_idx, diff --git a/graphannis/src/annis/operator.rs b/graphannis/src/annis/operator.rs index 2e6784df7..509c0a071 100644 --- a/graphannis/src/annis/operator.rs +++ b/graphannis/src/annis/operator.rs @@ -1,7 +1,7 @@ use super::db::aql::model::AnnotationComponentType; use crate::{annis::db::AnnotationStorage, graph::Match, AnnotationGraph}; use graphannis_core::types::{Component, Edge}; -use std::{any::Any, collections::HashSet, fmt::Display}; +use std::{any::Any, collections::HashSet, fmt::Display, sync::Arc}; #[derive(Clone, Debug, PartialOrd, Ord, Hash, PartialEq, Eq)] #[allow(clippy::enum_variant_names)] @@ -244,7 +244,9 @@ pub trait BinaryOperatorSpec: std::fmt::Debug { fn is_binding(&self) -> bool { true } - fn into_any(self: Box) -> Box; + fn into_any(self: Arc) -> Arc; + + fn any_ref(&self) -> &dyn Any; } pub trait UnaryOperatorSpec: std::fmt::Debug { From 70fea0ce2adc0f0b907c868dc8b5950022a71256 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Tue, 14 Sep 2021 14:57:29 +0200 Subject: [PATCH 17/29] Create a subquery on each filter execution --- .../annis/db/aql/operators/non_existing.rs | 39 ++++++++++--------- graphannis/src/annis/db/exec/nodesearch.rs | 24 ------------ graphannis/src/annis/operator.rs | 2 +- 3 files changed, 21 insertions(+), 44 deletions(-) diff --git a/graphannis/src/annis/db/aql/operators/non_existing.rs b/graphannis/src/annis/db/aql/operators/non_existing.rs index 5b5a90f38..91bc4562c 100644 --- a/graphannis/src/annis/db/aql/operators/non_existing.rs +++ b/graphannis/src/annis/db/aql/operators/non_existing.rs @@ -18,7 +18,7 @@ use crate::{ AnnotationGraph, }; -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct NonExistingUnaryOperatorSpec { pub op: Arc, pub target: NodeSearchSpec, @@ -71,12 +71,12 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { }; // Create a conjunction from the definition let subquery = self.create_subquery_conjunction().ok()?; - // make an initial plan and store the operator order and apply it every time + // Make an initial plan and store the operator order and apply it every time // (this should aoid costly optimization in each step) let operator_order = subquery.optimize_join_order_heuristics(g, &config).ok()?; let op = NonExistingUnaryOperator { - // subquery: Arc::new(Mutex::new(subquery)), + spec: self.clone(), operator_order, graph: g, config, @@ -86,8 +86,7 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { } struct NonExistingUnaryOperator<'a> { - // TODO: this could be replaced with an execution plan and a *mutable* reference to the node ID search - //subquery: Arc, + spec: NonExistingUnaryOperatorSpec, operator_order: Vec, graph: &'a AnnotationGraph, config: Config, @@ -102,19 +101,21 @@ impl<'a> Display for NonExistingUnaryOperator<'a> { impl<'a> UnaryOperator for NonExistingUnaryOperator<'a> { fn filter_match(&self, m: &graphannis_core::annostorage::Match) -> bool { - todo!() - // let subquery = self.subquery.lock().unwrap(); - // // TODO: Replace the first node annotation value with the ID of our match - // // Create an execution plan from the conjunction - // if let Ok(plan) = subquery.make_exec_plan_with_order( - // self.graph, - // &self.config, - // self.operator_order.clone(), - // ) { - // // Only return true of no match was found - // plan.next().is_none() - // } else { - // false - // } + if let Ok(subquery) = self.spec.create_subquery_conjunction() { + // TODO: Replace the first node annotation value with the ID of our match + // Create an execution plan from the conjunction + if let Ok(mut plan) = subquery.make_exec_plan_with_order( + self.graph, + &self.config, + self.operator_order.clone(), + ) { + // Only return true of no match was found + plan.next().is_none() + } else { + false + } + } else { + false + } } } diff --git a/graphannis/src/annis/db/exec/nodesearch.rs b/graphannis/src/annis/db/exec/nodesearch.rs index 69efbcd1f..36ff947e0 100644 --- a/graphannis/src/annis/db/exec/nodesearch.rs +++ b/graphannis/src/annis/db/exec/nodesearch.rs @@ -92,30 +92,6 @@ impl NodeSearchSpec { HashSet::default() } - /// Get the annotatio qualified name needed to execute a search with this specification. - pub fn get_anno_qname(&self) -> (Option, Option) { - match self { - NodeSearchSpec::ExactValue { ns, name, .. } - | NodeSearchSpec::NotExactValue { ns, name, .. } - | NodeSearchSpec::RegexValue { ns, name, .. } - | NodeSearchSpec::NotRegexValue { ns, name, .. } => { - (ns.to_owned(), Some(name.to_owned())) - } - NodeSearchSpec::ExactTokenValue { .. } - | NodeSearchSpec::NotExactTokenValue { .. } - | NodeSearchSpec::RegexTokenValue { .. } - | NodeSearchSpec::NotRegexTokenValue { .. } - | NodeSearchSpec::AnyToken { .. } => ( - Some(TOKEN_KEY.ns.clone().into()), - Some(TOKEN_KEY.name.clone().into()), - ), - NodeSearchSpec::AnyNode => ( - Some(NODE_TYPE_KEY.ns.clone().into()), - Some(NODE_TYPE_KEY.name.clone().into()), - ), - } - } - /// Creates a vector of value filter functions for this node annotation search. pub fn get_value_filter( &self, diff --git a/graphannis/src/annis/operator.rs b/graphannis/src/annis/operator.rs index 509c0a071..bf1c6ba2c 100644 --- a/graphannis/src/annis/operator.rs +++ b/graphannis/src/annis/operator.rs @@ -261,7 +261,7 @@ pub trait UnaryOperatorSpec: std::fmt::Debug { ) -> Option>; } -pub trait UnaryOperator: std::fmt::Display + Send + Sync { +pub trait UnaryOperator: std::fmt::Display { fn filter_match(&self, m: &Match) -> bool; fn estimation_type(&self) -> EstimationType { From 7bfb9b1c00f3e0847e30c72dd695ce43a8584ac1 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Tue, 14 Sep 2021 15:46:49 +0200 Subject: [PATCH 18/29] Search for the matched node name --- .../annis/db/aql/operators/non_existing.rs | 55 ++++++++++++------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/graphannis/src/annis/db/aql/operators/non_existing.rs b/graphannis/src/annis/db/aql/operators/non_existing.rs index 91bc4562c..6d0fb84b1 100644 --- a/graphannis/src/annis/db/aql/operators/non_existing.rs +++ b/graphannis/src/annis/db/aql/operators/non_existing.rs @@ -4,7 +4,7 @@ use std::{ sync::Arc, }; -use graphannis_core::graph::{ANNIS_NS, NODE_NAME}; +use graphannis_core::graph::{ANNIS_NS, NODE_NAME, NODE_NAME_KEY}; use crate::{ annis::{ @@ -26,13 +26,13 @@ pub struct NonExistingUnaryOperatorSpec { } impl NonExistingUnaryOperatorSpec { - fn create_subquery_conjunction(&self) -> Result { + fn create_subquery_conjunction(&self, node_name: String) -> Result { let mut subquery = Conjunction::new(); let var_flex = subquery.add_node( NodeSearchSpec::ExactValue { ns: Some(ANNIS_NS.into()), name: NODE_NAME.into(), - val: None, + val: Some(node_name), is_meta: false, }, None, @@ -55,7 +55,7 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { ) -> std::collections::HashSet< graphannis_core::types::Component, > { - if let Ok(subquery) = self.create_subquery_conjunction() { + if let Ok(subquery) = self.create_subquery_conjunction(String::default()) { subquery.necessary_components(g) } else { HashSet::default() @@ -70,16 +70,27 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { use_parallel_joins: false, }; // Create a conjunction from the definition - let subquery = self.create_subquery_conjunction().ok()?; + let subquery = self.create_subquery_conjunction(String::default()).ok()?; // Make an initial plan and store the operator order and apply it every time // (this should aoid costly optimization in each step) let operator_order = subquery.optimize_join_order_heuristics(g, &config).ok()?; + let query_fragment = if let Some(op) = self.op.create_operator(g) { + if self.target_left { + format!("{} {} x", self.target, op) + } else { + format!("x {} {}", op, self.target) + } + } else { + "unknown subquery".into() + }; + let op = NonExistingUnaryOperator { spec: self.clone(), operator_order, graph: g, config, + query_fragment, }; Some(Box::new(op)) } @@ -90,32 +101,34 @@ struct NonExistingUnaryOperator<'a> { operator_order: Vec, graph: &'a AnnotationGraph, config: Config, + query_fragment: String, } impl<'a> Display for NonExistingUnaryOperator<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, " !(?)")?; - Ok(()) + write!(f, " !({})", self.query_fragment) } } impl<'a> UnaryOperator for NonExistingUnaryOperator<'a> { fn filter_match(&self, m: &graphannis_core::annostorage::Match) -> bool { - if let Ok(subquery) = self.spec.create_subquery_conjunction() { - // TODO: Replace the first node annotation value with the ID of our match - // Create an execution plan from the conjunction - if let Ok(mut plan) = subquery.make_exec_plan_with_order( - self.graph, - &self.config, - self.operator_order.clone(), - ) { - // Only return true of no match was found - plan.next().is_none() - } else { - false + if let Some(match_name) = self + .graph + .get_node_annos() + .get_value_for_item(&m.node, &NODE_NAME_KEY) + { + if let Ok(subquery) = self.spec.create_subquery_conjunction(match_name.into()) { + // Create an execution plan from the conjunction + if let Ok(mut plan) = subquery.make_exec_plan_with_order( + self.graph, + &self.config, + self.operator_order.clone(), + ) { + // Only return true of no match was found + return plan.next().is_none(); + } } - } else { - false } + false } } From 940412b61bdfa70a7a98f14d399577629cdd1c5e Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Tue, 14 Sep 2021 16:01:21 +0200 Subject: [PATCH 19/29] Actually use the filtered variable name instead of always the right side --- graphannis/src/annis/db/aql/mod.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/graphannis/src/annis/db/aql/mod.rs b/graphannis/src/annis/db/aql/mod.rs index 3c86049d6..76d5ee608 100644 --- a/graphannis/src/annis/db/aql/mod.rs +++ b/graphannis/src/annis/db/aql/mod.rs @@ -165,6 +165,11 @@ fn map_conjunction( })); } else { let target_left = node_left.optional; + let filtered_var = if target_left { + node_right.var + } else { + node_left.var + }; let spec = NonExistingUnaryOperatorSpec { op: op_spec, target: if target_left { @@ -174,7 +179,7 @@ fn map_conjunction( }, target_left, }; - q.add_unary_operator_from_query(Arc::new(spec), &node_right.var, op_pos)?; + q.add_unary_operator_from_query(Arc::new(spec), &filtered_var, op_pos)?; } } } else { From e14bfeeae8b0169b04abdec4c1e6eca19c5a707d Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Tue, 14 Sep 2021 16:20:06 +0200 Subject: [PATCH 20/29] Use the operator selectivity to estimate the non-existance selectivity --- .../annis/db/aql/operators/non_existing.rs | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/graphannis/src/annis/db/aql/operators/non_existing.rs b/graphannis/src/annis/db/aql/operators/non_existing.rs index 6d0fb84b1..d179f8ed8 100644 --- a/graphannis/src/annis/db/aql/operators/non_existing.rs +++ b/graphannis/src/annis/db/aql/operators/non_existing.rs @@ -13,7 +13,10 @@ use crate::{ exec::nodesearch::NodeSearchSpec, }, errors::Result, - operator::{BinaryOperatorSpec, UnaryOperator, UnaryOperatorSpec}, + operator::{ + BinaryOperatorBase, BinaryOperatorSpec, EstimationType, UnaryOperator, + UnaryOperatorSpec, + }, }, AnnotationGraph, }; @@ -75,15 +78,16 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { // (this should aoid costly optimization in each step) let operator_order = subquery.optimize_join_order_heuristics(g, &config).ok()?; - let query_fragment = if let Some(op) = self.op.create_operator(g) { - if self.target_left { + let mut query_fragment = "".into(); + let mut op_estimation = EstimationType::Min; + if let Some(op) = self.op.create_operator(g) { + query_fragment = if self.target_left { format!("{} {} x", self.target, op) } else { format!("x {} {}", op, self.target) - } - } else { - "unknown subquery".into() - }; + }; + op_estimation = op.estimation_type(); + } let op = NonExistingUnaryOperator { spec: self.clone(), @@ -91,6 +95,7 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { graph: g, config, query_fragment, + op_estimation, }; Some(Box::new(op)) } @@ -102,6 +107,7 @@ struct NonExistingUnaryOperator<'a> { graph: &'a AnnotationGraph, config: Config, query_fragment: String, + op_estimation: EstimationType, } impl<'a> Display for NonExistingUnaryOperator<'a> { @@ -131,4 +137,11 @@ impl<'a> UnaryOperator for NonExistingUnaryOperator<'a> { } false } + + fn estimation_type(&self) -> EstimationType { + match self.op_estimation { + EstimationType::Selectivity(s) => EstimationType::Selectivity(1.0 - s), + EstimationType::Min => EstimationType::Min, + } + } } From 8e6d7feb7b89d0acd9f2fc320faffdf7ff73b573 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Tue, 14 Sep 2021 16:35:40 +0200 Subject: [PATCH 21/29] Mark the operators as Send+Sync again --- graphannis/src/annis/operator.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/graphannis/src/annis/operator.rs b/graphannis/src/annis/operator.rs index bf1c6ba2c..901be87b4 100644 --- a/graphannis/src/annis/operator.rs +++ b/graphannis/src/annis/operator.rs @@ -229,7 +229,7 @@ pub trait BinaryOperatorIndex: BinaryOperatorBase { fn as_binary_operator(&self) -> &dyn BinaryOperatorBase; } -pub trait BinaryOperatorSpec: std::fmt::Debug { +pub trait BinaryOperatorSpec: std::fmt::Debug + Send + Sync { fn necessary_components( &self, db: &AnnotationGraph, @@ -261,7 +261,7 @@ pub trait UnaryOperatorSpec: std::fmt::Debug { ) -> Option>; } -pub trait UnaryOperator: std::fmt::Display { +pub trait UnaryOperator: std::fmt::Display + Send + Sync { fn filter_match(&self, m: &Match) -> bool; fn estimation_type(&self) -> EstimationType { From ef5c6a3205892b4ea863229f00a4a8e0d343bf79 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Tue, 14 Sep 2021 18:10:18 +0200 Subject: [PATCH 22/29] Use the direct operator retrieve_matches/filter approach again, but be more specific about when retrieve_matches is actually possible --- .../annis/db/aql/operators/non_existing.rs | 133 ++++++++++++------ graphannis/src/annis/db/exec/nodesearch.rs | 24 ++++ graphannis/src/annis/operator.rs | 3 +- 3 files changed, 115 insertions(+), 45 deletions(-) diff --git a/graphannis/src/annis/db/aql/operators/non_existing.rs b/graphannis/src/annis/db/aql/operators/non_existing.rs index d179f8ed8..03f56a4ed 100644 --- a/graphannis/src/annis/db/aql/operators/non_existing.rs +++ b/graphannis/src/annis/db/aql/operators/non_existing.rs @@ -4,18 +4,18 @@ use std::{ sync::Arc, }; -use graphannis_core::graph::{ANNIS_NS, NODE_NAME, NODE_NAME_KEY}; +use graphannis_core::graph::{ANNIS_NS, NODE_NAME}; use crate::{ annis::{ db::{ - aql::{conjunction::Conjunction, Config}, - exec::nodesearch::NodeSearchSpec, + aql::conjunction::Conjunction, + exec::nodesearch::{NodeSearch, NodeSearchSpec}, }, errors::Result, operator::{ - BinaryOperatorBase, BinaryOperatorSpec, EstimationType, UnaryOperator, - UnaryOperatorSpec, + BinaryOperator, BinaryOperatorBase, BinaryOperatorIndex, BinaryOperatorSpec, + EstimationType, UnaryOperator, UnaryOperatorSpec, }, }, AnnotationGraph, @@ -69,15 +69,6 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { &'b self, g: &'b AnnotationGraph, ) -> Option> { - let config = Config { - use_parallel_joins: false, - }; - // Create a conjunction from the definition - let subquery = self.create_subquery_conjunction(String::default()).ok()?; - // Make an initial plan and store the operator order and apply it every time - // (this should aoid costly optimization in each step) - let operator_order = subquery.optimize_join_order_heuristics(g, &config).ok()?; - let mut query_fragment = "".into(); let mut op_estimation = EstimationType::Min; if let Some(op) = self.op.create_operator(g) { @@ -89,53 +80,107 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { op_estimation = op.estimation_type(); } - let op = NonExistingUnaryOperator { - spec: self.clone(), - operator_order, - graph: g, - config, - query_fragment, - op_estimation, - }; - Some(Box::new(op)) + let orig_op = self.op.create_operator(g)?; + let mut negated_op: Box = + Box::new(NonExistingUnaryOperatorFilter { + spec: self.clone(), + graph: g, + query_fragment: query_fragment.clone(), + op_estimation: op_estimation.clone(), + negated_op: orig_op, + }); + if !self.target_left { + if let BinaryOperator::Index(orig_op) = self.op.create_operator(g)? { + negated_op = Box::new(NonExistingUnaryOperatorIndex { + spec: self.clone(), + graph: g, + query_fragment, + op_estimation, + negated_op: orig_op, + }) + } + } + Some(negated_op) + } +} + +struct NonExistingUnaryOperatorIndex<'a> { + spec: NonExistingUnaryOperatorSpec, + negated_op: Box, + graph: &'a AnnotationGraph, + query_fragment: String, + op_estimation: EstimationType, +} + +impl<'a> Display for NonExistingUnaryOperatorIndex<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, " !({})", self.query_fragment) + } +} + +impl<'a> UnaryOperator for NonExistingUnaryOperatorIndex<'a> { + fn filter_match(&self, m: &graphannis_core::annostorage::Match) -> bool { + // Extract the annotation keys for the matches + let it = self.negated_op.retrieve_matches(&m).map(|m| m.node); + let qname = self.spec.target.get_anno_qname(); + let candidates = self.graph.get_node_annos().get_keys_for_iterator( + qname.0.as_deref(), + qname.1.as_deref(), + Box::new(it), + ); + + let value_filter = self + .spec + .target + .get_value_filter(self.graph, None) + .unwrap_or_default(); + + // Only return true of no match was found which matches the operator and node value filter + !candidates.into_iter().any(|m| { + value_filter + .iter() + .all(|f| f(&m, self.graph.get_node_annos())) + }) + } + + fn estimation_type(&self) -> EstimationType { + match self.op_estimation { + EstimationType::Selectivity(s) => EstimationType::Selectivity(1.0 - s), + EstimationType::Min => EstimationType::Min, + } } } -struct NonExistingUnaryOperator<'a> { +struct NonExistingUnaryOperatorFilter<'a> { spec: NonExistingUnaryOperatorSpec, - operator_order: Vec, + negated_op: BinaryOperator<'a>, graph: &'a AnnotationGraph, - config: Config, query_fragment: String, op_estimation: EstimationType, } -impl<'a> Display for NonExistingUnaryOperator<'a> { +impl<'a> Display for NonExistingUnaryOperatorFilter<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, " !({})", self.query_fragment) } } -impl<'a> UnaryOperator for NonExistingUnaryOperator<'a> { +impl<'a> UnaryOperator for NonExistingUnaryOperatorFilter<'a> { fn filter_match(&self, m: &graphannis_core::annostorage::Match) -> bool { - if let Some(match_name) = self - .graph - .get_node_annos() - .get_value_for_item(&m.node, &NODE_NAME_KEY) + // The candidate match is on one side and we need to get an iterator of all possible matches for the other side + if let Ok(mut node_search) = + NodeSearch::from_spec(self.spec.target.clone(), 0, self.graph, None) { - if let Ok(subquery) = self.spec.create_subquery_conjunction(match_name.into()) { - // Create an execution plan from the conjunction - if let Ok(mut plan) = subquery.make_exec_plan_with_order( - self.graph, - &self.config, - self.operator_order.clone(), - ) { - // Only return true of no match was found - return plan.next().is_none(); - } - } + // Include if no nodes matches the conditions + let has_any_match = if self.spec.target_left { + node_search.any(|lhs| self.negated_op.filter_match(&lhs[0], m)) + } else { + node_search.any(|rhs| self.negated_op.filter_match(m, &rhs[0])) + }; + !has_any_match + } else { + false } - false } fn estimation_type(&self) -> EstimationType { diff --git a/graphannis/src/annis/db/exec/nodesearch.rs b/graphannis/src/annis/db/exec/nodesearch.rs index 36ff947e0..03a08c63b 100644 --- a/graphannis/src/annis/db/exec/nodesearch.rs +++ b/graphannis/src/annis/db/exec/nodesearch.rs @@ -92,6 +92,30 @@ impl NodeSearchSpec { HashSet::default() } + /// Get the annotatiom qualified name needed to execute a search with this specification. + pub fn get_anno_qname(&self) -> (Option, Option) { + match self { + NodeSearchSpec::ExactValue { ns, name, .. } + | NodeSearchSpec::NotExactValue { ns, name, .. } + | NodeSearchSpec::RegexValue { ns, name, .. } + | NodeSearchSpec::NotRegexValue { ns, name, .. } => { + (ns.to_owned(), Some(name.to_owned())) + } + NodeSearchSpec::ExactTokenValue { .. } + | NodeSearchSpec::NotExactTokenValue { .. } + | NodeSearchSpec::RegexTokenValue { .. } + | NodeSearchSpec::NotRegexTokenValue { .. } + | NodeSearchSpec::AnyToken { .. } => ( + Some(TOKEN_KEY.ns.clone().into()), + Some(TOKEN_KEY.name.clone().into()), + ), + NodeSearchSpec::AnyNode => ( + Some(NODE_TYPE_KEY.ns.clone().into()), + Some(NODE_TYPE_KEY.name.clone().into()), + ), + } + } + /// Creates a vector of value filter functions for this node annotation search. pub fn get_value_filter( &self, diff --git a/graphannis/src/annis/operator.rs b/graphannis/src/annis/operator.rs index 901be87b4..f14b4a33c 100644 --- a/graphannis/src/annis/operator.rs +++ b/graphannis/src/annis/operator.rs @@ -136,6 +136,7 @@ impl EdgeAnnoSearchSpec { } /// Represents the different strategies to estimate the output of size of applying an operator. +#[derive(Clone)] pub enum EstimationType { /// Estimate using the given selectivity. /// This means the cross product of the input sizes is multiplied with this factor to get the output size. @@ -261,7 +262,7 @@ pub trait UnaryOperatorSpec: std::fmt::Debug { ) -> Option>; } -pub trait UnaryOperator: std::fmt::Display + Send + Sync { +pub trait UnaryOperator: std::fmt::Display { fn filter_match(&self, m: &Match) -> bool; fn estimation_type(&self) -> EstimationType { From da2df8a42b7212fc9a1db5bf507eeb1af519d63f Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Tue, 14 Sep 2021 18:20:14 +0200 Subject: [PATCH 23/29] Improvde planner output for non-existance queries --- .../annis/db/aql/operators/non_existing.rs | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/graphannis/src/annis/db/aql/operators/non_existing.rs b/graphannis/src/annis/db/aql/operators/non_existing.rs index 03f56a4ed..6a6d8951a 100644 --- a/graphannis/src/annis/db/aql/operators/non_existing.rs +++ b/graphannis/src/annis/db/aql/operators/non_existing.rs @@ -69,14 +69,8 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { &'b self, g: &'b AnnotationGraph, ) -> Option> { - let mut query_fragment = "".into(); let mut op_estimation = EstimationType::Min; if let Some(op) = self.op.create_operator(g) { - query_fragment = if self.target_left { - format!("{} {} x", self.target, op) - } else { - format!("x {} {}", op, self.target) - }; op_estimation = op.estimation_type(); } @@ -85,7 +79,6 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { Box::new(NonExistingUnaryOperatorFilter { spec: self.clone(), graph: g, - query_fragment: query_fragment.clone(), op_estimation: op_estimation.clone(), negated_op: orig_op, }); @@ -94,7 +87,6 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { negated_op = Box::new(NonExistingUnaryOperatorIndex { spec: self.clone(), graph: g, - query_fragment, op_estimation, negated_op: orig_op, }) @@ -108,13 +100,13 @@ struct NonExistingUnaryOperatorIndex<'a> { spec: NonExistingUnaryOperatorSpec, negated_op: Box, graph: &'a AnnotationGraph, - query_fragment: String, op_estimation: EstimationType, } impl<'a> Display for NonExistingUnaryOperatorIndex<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, " !({})", self.query_fragment) + write!(f, " !{} {}", self.negated_op, self.spec.target)?; + Ok(()) } } @@ -155,13 +147,17 @@ struct NonExistingUnaryOperatorFilter<'a> { spec: NonExistingUnaryOperatorSpec, negated_op: BinaryOperator<'a>, graph: &'a AnnotationGraph, - query_fragment: String, op_estimation: EstimationType, } impl<'a> Display for NonExistingUnaryOperatorFilter<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, " !({})", self.query_fragment) + if self.spec.target_left { + write!(f, " !({} {} x)", self.spec.target, self.negated_op)?; + } else { + write!(f, " !(x {} {})", self.negated_op, self.spec.target,)?; + } + Ok(()) } } From 151beaa8eba1d4ed156976e9712312ae08bb7071 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Tue, 14 Sep 2021 18:41:06 +0200 Subject: [PATCH 24/29] Try to avoid creating the operator twice --- .../annis/db/aql/operators/non_existing.rs | 64 +++++++++++++------ 1 file changed, 44 insertions(+), 20 deletions(-) diff --git a/graphannis/src/annis/db/aql/operators/non_existing.rs b/graphannis/src/annis/db/aql/operators/non_existing.rs index 6a6d8951a..c5cfc49a5 100644 --- a/graphannis/src/annis/db/aql/operators/non_existing.rs +++ b/graphannis/src/annis/db/aql/operators/non_existing.rs @@ -49,6 +49,22 @@ impl NonExistingUnaryOperatorSpec { Ok(subquery) } + + fn create_filter_operator<'b>( + &'b self, + g: &'b AnnotationGraph, + negated_op: BinaryOperator<'b>, + target_left: bool, + op_estimation: EstimationType, + ) -> Box { + Box::new(NonExistingUnaryOperatorFilter { + target: self.target.clone(), + target_left, + graph: g, + op_estimation, + negated_op, + }) + } } impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { @@ -73,25 +89,33 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { if let Some(op) = self.op.create_operator(g) { op_estimation = op.estimation_type(); } - - let orig_op = self.op.create_operator(g)?; - let mut negated_op: Box = - Box::new(NonExistingUnaryOperatorFilter { - spec: self.clone(), - graph: g, - op_estimation: op_estimation.clone(), - negated_op: orig_op, - }); - if !self.target_left { - if let BinaryOperator::Index(orig_op) = self.op.create_operator(g)? { - negated_op = Box::new(NonExistingUnaryOperatorIndex { + let mut target_left = self.target_left; + let mut orig_op = self.op.create_operator(g)?; + + if target_left { + // Check if we can avoid a costly filter operation by switching operands + if let Some(inverted_op) = orig_op.get_inverse_operator(g) { + orig_op = inverted_op; + target_left = false; + } + } + // When possible, use the index-based implementation, use the filter + // version as a fallback + let negated_op: Box = if !target_left { + if let BinaryOperator::Index(orig_op) = orig_op { + Box::new(NonExistingUnaryOperatorIndex { spec: self.clone(), graph: g, op_estimation, negated_op: orig_op, }) + } else { + self.create_filter_operator(g, orig_op, target_left, op_estimation) } - } + } else { + self.create_filter_operator(g, orig_op, target_left, op_estimation) + }; + Some(negated_op) } } @@ -144,7 +168,8 @@ impl<'a> UnaryOperator for NonExistingUnaryOperatorIndex<'a> { } struct NonExistingUnaryOperatorFilter<'a> { - spec: NonExistingUnaryOperatorSpec, + target: NodeSearchSpec, + target_left: bool, negated_op: BinaryOperator<'a>, graph: &'a AnnotationGraph, op_estimation: EstimationType, @@ -152,10 +177,10 @@ struct NonExistingUnaryOperatorFilter<'a> { impl<'a> Display for NonExistingUnaryOperatorFilter<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if self.spec.target_left { - write!(f, " !({} {} x)", self.spec.target, self.negated_op)?; + if self.target_left { + write!(f, " !({} {} x)", self.target, self.negated_op)?; } else { - write!(f, " !(x {} {})", self.negated_op, self.spec.target,)?; + write!(f, " !(x {} {})", self.negated_op, self.target,)?; } Ok(()) } @@ -164,11 +189,10 @@ impl<'a> Display for NonExistingUnaryOperatorFilter<'a> { impl<'a> UnaryOperator for NonExistingUnaryOperatorFilter<'a> { fn filter_match(&self, m: &graphannis_core::annostorage::Match) -> bool { // The candidate match is on one side and we need to get an iterator of all possible matches for the other side - if let Ok(mut node_search) = - NodeSearch::from_spec(self.spec.target.clone(), 0, self.graph, None) + if let Ok(mut node_search) = NodeSearch::from_spec(self.target.clone(), 0, self.graph, None) { // Include if no nodes matches the conditions - let has_any_match = if self.spec.target_left { + let has_any_match = if self.target_left { node_search.any(|lhs| self.negated_op.filter_match(&lhs[0], m)) } else { node_search.any(|rhs| self.negated_op.filter_match(m, &rhs[0])) From ef8746654524dc31e9a4f0d7c7bd2f0af6bd3aa5 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Tue, 14 Sep 2021 18:45:05 +0200 Subject: [PATCH 25/29] Re-use already created operator for output estimation --- .../src/annis/db/aql/operators/non_existing.rs | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/graphannis/src/annis/db/aql/operators/non_existing.rs b/graphannis/src/annis/db/aql/operators/non_existing.rs index c5cfc49a5..dbbda5c94 100644 --- a/graphannis/src/annis/db/aql/operators/non_existing.rs +++ b/graphannis/src/annis/db/aql/operators/non_existing.rs @@ -85,10 +85,6 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { &'b self, g: &'b AnnotationGraph, ) -> Option> { - let mut op_estimation = EstimationType::Min; - if let Some(op) = self.op.create_operator(g) { - op_estimation = op.estimation_type(); - } let mut target_left = self.target_left; let mut orig_op = self.op.create_operator(g)?; @@ -99,12 +95,14 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { target_left = false; } } + let op_estimation = orig_op.estimation_type(); + // When possible, use the index-based implementation, use the filter // version as a fallback let negated_op: Box = if !target_left { if let BinaryOperator::Index(orig_op) = orig_op { Box::new(NonExistingUnaryOperatorIndex { - spec: self.clone(), + target: self.target.clone(), graph: g, op_estimation, negated_op: orig_op, @@ -121,7 +119,7 @@ impl UnaryOperatorSpec for NonExistingUnaryOperatorSpec { } struct NonExistingUnaryOperatorIndex<'a> { - spec: NonExistingUnaryOperatorSpec, + target: NodeSearchSpec, negated_op: Box, graph: &'a AnnotationGraph, op_estimation: EstimationType, @@ -129,7 +127,7 @@ struct NonExistingUnaryOperatorIndex<'a> { impl<'a> Display for NonExistingUnaryOperatorIndex<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, " !{} {}", self.negated_op, self.spec.target)?; + write!(f, " !{} {}", self.negated_op, self.target)?; Ok(()) } } @@ -138,7 +136,7 @@ impl<'a> UnaryOperator for NonExistingUnaryOperatorIndex<'a> { fn filter_match(&self, m: &graphannis_core::annostorage::Match) -> bool { // Extract the annotation keys for the matches let it = self.negated_op.retrieve_matches(&m).map(|m| m.node); - let qname = self.spec.target.get_anno_qname(); + let qname = self.target.get_anno_qname(); let candidates = self.graph.get_node_annos().get_keys_for_iterator( qname.0.as_deref(), qname.1.as_deref(), @@ -146,7 +144,6 @@ impl<'a> UnaryOperator for NonExistingUnaryOperatorIndex<'a> { ); let value_filter = self - .spec .target .get_value_filter(self.graph, None) .unwrap_or_default(); From fb89e21eebe544ef79196c35337d775f1fb7e40d Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Wed, 15 Sep 2021 08:49:08 +0200 Subject: [PATCH 26/29] Add search tests for negation without existence --- CHANGELOG.md | 7 ++++++- graphannis/tests/searchtest_queries.csv | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a95c33cae..e3cece60b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,11 +5,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Added generic operator negation without existence assumption, + if only one side of the negated operator is optional (#187). + ## [1.1.0] - 2021-09-09 ### Added -- Added generic operator negation with existance assumption by adding `!` before the binary operator (#186) +- Added generic operator negation with existence assumption by adding `!` before the binary operator (#186) ### Changed diff --git a/graphannis/tests/searchtest_queries.csv b/graphannis/tests/searchtest_queries.csv index 0d103ed4e..39d48ad3f 100644 --- a/graphannis/tests/searchtest_queries.csv +++ b/graphannis/tests/searchtest_queries.csv @@ -31,6 +31,21 @@ NegationLeftAligned,"cat=""ROOT"" >* cat >* ""modern"" . ""campaigns"" & #1 !_l_ NegationRightAligned,"cat=""ROOT"" >* cat >* ""modern"" . ""campaigns"" & #1 !_r_ #2",GUM,9 NegationNodeIdentity,"m#""modern"" & (n#claws5=""NN1"" | n#claws5=""NN2"") & #m . #n _r_ o#node & #n !_ident_ #o",GUM,16 NonExistingDominance,"cat=""S"" > cat=""NP"" > ""Wikinews"" & #1 !>* /U\.?S\.?/?",GUM,19 +NonExistingDominanceLeft,"cat=""S""? !>* ""issues""",GUM,1 +NonExistingDepAnno,"""issues"" !->dep[func=""conj""] tok?",GUM,3 +NonExistingPrecedence,"""issues"" !.1,10 pos=/JJ.*/?",GUM,2 +NonExistingNear,"""issues"" !^1,5 pos=/JJ.*/?",GUM,1 +NonExistingPartOf,"""Greece""? !@* type=""voyage""",GUM,10 +NonExistingIdentCoverage,"cat=""S"" !_=_ tok?",GUM,5128 +NonExistingIdentCoverageInv,"tok? !_=_ cat=""S""",GUM,5128 +NonExistingInclusion,"quote !_i_ ""amazing""?",GUM,49 +NonExistingOverlap,"quote !_o_ ""amazing""?",GUM,49 +NonExistingOverlapInv,"""amazing""? !_o_ quote",GUM,49 +NonExistingLeftAligned,"quote !_l_ /""/?",GUM,11 +NonExistingLeftInv,"/""/? !_l_ quote",GUM,11 +NonExistingRight,"quote & /""/? & #1 !_r_ #2",GUM,19 +NonExistingRightInv,"quote & /""/? & #2 !_r_ #1",GUM,19 +InvalidNonExisting,"tok !. tok? !_o_ node?",GUM,0 NotDocument,"tok @* doc!=""maz-11299""",pcc2.1,34709 StructureInclusionSeed,"cat=""S"" _i_ cat=""AP""",pcc2.1,726 IsConnectedRange,"""Jugendlichen"" .3,10 ""Musikcafé""",pcc2.1,1 From dc844a27c2956af69c6b49cc3c2666b88e921f91 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Wed, 15 Sep 2021 08:53:59 +0200 Subject: [PATCH 27/29] Fix some code smell --- graphannis/src/annis/db/aql/conjunction.rs | 2 +- graphannis/src/annis/db/aql/mod.rs | 38 +++++++++---------- .../annis/db/aql/operators/non_existing.rs | 2 +- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/graphannis/src/annis/db/aql/conjunction.rs b/graphannis/src/annis/db/aql/conjunction.rs index 109f37d72..3e569b514 100644 --- a/graphannis/src/annis/db/aql/conjunction.rs +++ b/graphannis/src/annis/db/aql/conjunction.rs @@ -660,7 +660,7 @@ impl Conjunction { let inverse_op = op.get_inverse_operator(g); if let Some(inverse_op) = inverse_op { - if should_switch_operand_order(op_spec_entry, &node2cost) { + if should_switch_operand_order(op_spec_entry, node2cost) { spec_idx_left = op_spec_entry.args.right; spec_idx_right = op_spec_entry.args.left; diff --git a/graphannis/src/annis/db/aql/mod.rs b/graphannis/src/annis/db/aql/mod.rs index 76d5ee608..067de3cd5 100644 --- a/graphannis/src/annis/db/aql/mod.rs +++ b/graphannis/src/annis/db/aql/mod.rs @@ -154,33 +154,31 @@ fn map_conjunction( negated_op: op_spec, }); q.add_operator_from_query(op_spec, &var_left, &var_right, op_pos, !quirks_mode)? - } else { - if node_left.optional && node_right.optional { - // Not supported yet - return Err(GraphAnnisError::AQLSemanticError(AQLError { + } else if node_left.optional && node_right.optional { + // Not supported yet + return Err(GraphAnnisError::AQLSemanticError(AQLError { desc: format!( "Negated binary operator needs a non-optional left or right operand, but both operands (#{}, #{}) are optional, as indicated by their \"?\" suffix.", var_left, var_right), location: op_pos, })); + } else { + let target_left = node_left.optional; + let filtered_var = if target_left { + node_right.var } else { - let target_left = node_left.optional; - let filtered_var = if target_left { - node_right.var + node_left.var + }; + let spec = NonExistingUnaryOperatorSpec { + op: op_spec, + target: if target_left { + node_left.spec } else { - node_left.var - }; - let spec = NonExistingUnaryOperatorSpec { - op: op_spec, - target: if target_left { - node_left.spec - } else { - node_right.spec - }, - target_left, - }; - q.add_unary_operator_from_query(Arc::new(spec), &filtered_var, op_pos)?; - } + node_right.spec + }, + target_left, + }; + q.add_unary_operator_from_query(Arc::new(spec), &filtered_var, op_pos)?; } } else { q.add_operator_from_query(op_spec, &var_left, &var_right, op_pos, !quirks_mode)?; diff --git a/graphannis/src/annis/db/aql/operators/non_existing.rs b/graphannis/src/annis/db/aql/operators/non_existing.rs index dbbda5c94..de6b86621 100644 --- a/graphannis/src/annis/db/aql/operators/non_existing.rs +++ b/graphannis/src/annis/db/aql/operators/non_existing.rs @@ -135,7 +135,7 @@ impl<'a> Display for NonExistingUnaryOperatorIndex<'a> { impl<'a> UnaryOperator for NonExistingUnaryOperatorIndex<'a> { fn filter_match(&self, m: &graphannis_core::annostorage::Match) -> bool { // Extract the annotation keys for the matches - let it = self.negated_op.retrieve_matches(&m).map(|m| m.node); + let it = self.negated_op.retrieve_matches(m).map(|m| m.node); let qname = self.target.get_anno_qname(); let candidates = self.graph.get_node_annos().get_keys_for_iterator( qname.0.as_deref(), From 99f572ae60c0f759f8feda232f52933501684f0c Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Wed, 15 Sep 2021 09:21:13 +0200 Subject: [PATCH 28/29] Remove unnnecessary clone --- graphannis/src/annis/db/exec/nodesearch.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphannis/src/annis/db/exec/nodesearch.rs b/graphannis/src/annis/db/exec/nodesearch.rs index 03a08c63b..241895a4a 100644 --- a/graphannis/src/annis/db/exec/nodesearch.rs +++ b/graphannis/src/annis/db/exec/nodesearch.rs @@ -398,7 +398,7 @@ impl<'a> NodeSearch<'a> { ) -> Result> { let query_fragment = format!("{}", spec); - let filters = spec.get_value_filter(db, location_in_query.to_owned())?; + let filters = spec.get_value_filter(db, location_in_query)?; match spec { NodeSearchSpec::ExactValue { From 3defb77eed1cd4c258520df17e1ced7f077fa44d Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Wed, 15 Sep 2021 11:02:17 +0200 Subject: [PATCH 29/29] Add query to actually test the filter version of th non-existence operator --- graphannis/tests/searchtest_queries.csv | 2 +- misc/test_coverage.sh | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100755 misc/test_coverage.sh diff --git a/graphannis/tests/searchtest_queries.csv b/graphannis/tests/searchtest_queries.csv index 39d48ad3f..21586c1b8 100644 --- a/graphannis/tests/searchtest_queries.csv +++ b/graphannis/tests/searchtest_queries.csv @@ -31,7 +31,7 @@ NegationLeftAligned,"cat=""ROOT"" >* cat >* ""modern"" . ""campaigns"" & #1 !_l_ NegationRightAligned,"cat=""ROOT"" >* cat >* ""modern"" . ""campaigns"" & #1 !_r_ #2",GUM,9 NegationNodeIdentity,"m#""modern"" & (n#claws5=""NN1"" | n#claws5=""NN2"") & #m . #n _r_ o#node & #n !_ident_ #o",GUM,16 NonExistingDominance,"cat=""S"" > cat=""NP"" > ""Wikinews"" & #1 !>* /U\.?S\.?/?",GUM,19 -NonExistingDominanceLeft,"cat=""S""? !>* ""issues""",GUM,1 +NonExistingDominanceLeft,"cat=""NP""? !>* ""Wahlkampf""",pcc2.1,2 NonExistingDepAnno,"""issues"" !->dep[func=""conj""] tok?",GUM,3 NonExistingPrecedence,"""issues"" !.1,10 pos=/JJ.*/?",GUM,2 NonExistingNear,"""issues"" !^1,5 pos=/JJ.*/?",GUM,1 diff --git a/misc/test_coverage.sh b/misc/test_coverage.sh new file mode 100755 index 000000000..260c08d8d --- /dev/null +++ b/misc/test_coverage.sh @@ -0,0 +1,14 @@ +# Make sure you installed grcov via cargo first +# cargo install grcov + +# Set some environment variables needed by grcov +export CARGO_INCREMENTAL=0 +export RUSTFLAGS="-Zprofile -Ccodegen-units=1 -Copt-level=0 -Clink-dead-code -Coverflow-checks=off" + +# Run all tests +cargo +nightly clean +cargo +nightly test +cargo +nightly test -- --ignored + +# Generate HTML report in target/debug/coverage/index.html +grcov ./target/debug/ -s . -t html --llvm --branch --ignore-not-existing -o ./target/debug/coverage/