refactor: more improvements to the rules profiling logic

VirusTotal · Oct 18, 2024 · 443baa8 · 443baa8
1 parent 9a92dd0
commit 443baa8
Show file tree

Hide file tree

Showing 6 changed files with 66 additions and 72 deletions.
diff --git a/lib/src/compiler/emit.rs b/lib/src/compiler/emit.rs
@@ -252,17 +252,6 @@ pub(super) fn emit_rule_condition(
 ) {
     let mut instr = builder.start_rule(rule_id, ctx.current_rule.is_global);
 
-    // When the "logging" feature is enabled, print a log before the starting
-    // evaluating the rule's condition. In case of error during the evaluation
-    // this helps in determining the rule causing the issue.
-    #[cfg(feature = "logging")]
-    {
-        instr.i32_const(rule_id.0);
-        instr.call(
-            ctx.function_id(wasm::export__log_rule_eval_start.mangled_name),
-        );
-    }
-
     // Emit WASM code for the rule's condition.
     catch_undef(
         ctx,

diff --git a/lib/src/compiler/mod.rs b/lib/src/compiler/mod.rs
@@ -1082,7 +1082,7 @@ impl<'a> Compiler<'a> {
 
         // The RuleId for the new rule is current length of `self.rules`. The
         // first rule has RuleId = 0.
-        let rule_id = RuleId(self.rules.len() as i32);
+        let rule_id = RuleId::from(self.rules.len());
 
         // Build a vector of pairs (IdentId, MetaValue) for every meta defined
         // in the rule.
@@ -2202,6 +2202,15 @@ pub(crate) struct NamespaceId(i32);
 #[derive(Copy, Clone, Debug, Default)]
 pub(crate) struct RuleId(i32);
 
+impl RuleId {
+    /// Returns the [`RuleId`] that comes after this one.
+    ///
+    /// This simply adds 1 to the ID.
+    pub(crate) fn next(&self) -> Self {
+        RuleId(self.0 + 1)
+    }
+}
+
 impl From<i32> for RuleId {
     #[inline]
     fn from(value: i32) -> Self {

diff --git a/lib/src/compiler/rules.rs b/lib/src/compiler/rules.rs
@@ -169,6 +169,9 @@ impl Rules {
             .with_varint_encoding()
             .deserialize::<Self>(&bytes[magic.len()..])?;
 
+        #[cfg(feature = "logging")]
+        info!("Deserialization time: {:?}", Instant::elapsed(&start));
+
         // `rules.compiled_wasm_mod` can be `None` for two reasons:
         //
         //  1- The rules were serialized without compiled rules (i.e: the
@@ -182,14 +185,17 @@ impl Rules {
         // In both cases we try to build the module again from the data in
         // `rules.wasm_mode`.
         if rules.compiled_wasm_mod.is_none() {
+            #[cfg(feature = "logging")]
+            let start = Instant::now();
+
             rules.compiled_wasm_mod = Some(wasmtime::Module::from_binary(
                 &crate::wasm::ENGINE,
                 rules.wasm_mod.as_slice(),
             )?);
-        }
 
-        #[cfg(feature = "logging")]
-        info!("Deserialization time: {:?}", Instant::elapsed(&start));
+            #[cfg(feature = "logging")]
+            info!("WASM build time: {:?}", Instant::elapsed(&start));
+        }
 
         rules.build_ac_automaton();
 

diff --git a/lib/src/scanner/context.rs b/lib/src/scanner/context.rs
@@ -121,7 +121,10 @@ pub(crate) struct ScanContext<'r> {
     pub time_spent_in_rule: Vec<u64>,
     /// The time at which the evaluation of the current rule started.
     #[cfg(feature = "rules-profiling")]
-    pub rule_evaluation_start_time: u64,
+    pub rule_execution_start_time: u64,
+    /// The ID of the last rule whose condition was executed.
+    #[cfg(feature = "rules-profiling")]
+    pub last_executed_rule: Option<RuleId>,
     /// Clock used for measuring the time spend on each pattern.
     #[cfg(any(feature = "rules-profiling", feature = "logging"))]
     pub clock: quanta::Clock,
@@ -235,28 +238,6 @@ impl ScanContext<'_> {
         <dyn MessageDyn>::downcast_ref(m)
     }
 
-    /// Writes a log before starting evaluating the condition for the rule
-    /// identified by `rule_id`.
-    #[cfg(feature = "logging")]
-    pub(crate) fn log_rule_eval_start(&mut self, rule_id: RuleId) {
-        let rule = self.compiled_rules.get(rule_id);
-
-        let rule_name =
-            self.compiled_rules.ident_pool().get(rule.ident_id).unwrap();
-
-        let rule_namespace = self
-            .compiled_rules
-            .ident_pool()
-            .get(rule.namespace_ident_id)
-            .unwrap();
-
-        log::info!(
-            "Started rule evaluation: {}:{}",
-            rule_namespace,
-            rule_name
-        );
-    }
-
     pub(crate) fn console_log(&mut self, message: String) {
         if let Some(console_log) = &mut self.console_log {
             console_log(message)
@@ -293,20 +274,30 @@ impl ScanContext<'_> {
         obj_ref
     }
 
-    /// Called during the scan process when a rule didn't match.
-    pub(crate) fn track_rule_no_match(&mut self, rule_id: RuleId) {
-        // The time spent evaluating the rule that didn't match is the
-        // difference between the current time and the time stored in
-        // `self.rule_evaluation_start_time`.
-        #[cfg(feature = "rules-profiling")]
+    /// Update the time spent in the rule with the given ID, the time is
+    /// increased by the time elapsed since `rule_execution_start_time`.
+    pub(crate) fn update_time_spent_in_rule(&mut self, rule_id: RuleId) {
+        // SAFETY: it's safe to call `get_unchecked_mut`, the size of the
+        // `time_spent_in_rule` vector is guaranteed to be the number of
+        // existing rules. Therefore, the rule ID can be used as an index
+        // in this vector.
         unsafe {
             self.time_spent_in_rule
                 .get_unchecked_mut::<usize>(rule_id.into())
                 .add_assign(self.clock.delta_as_nanos(
-                    self.rule_evaluation_start_time,
+                    self.rule_execution_start_time,
                     self.clock.raw(),
                 ));
         }
+    }
+
+    /// Called during the scan process when a rule didn't match.
+    pub(crate) fn track_rule_no_match(&mut self, rule_id: RuleId) {
+        #[cfg(feature = "rules-profiling")]
+        {
+            self.last_executed_rule = Some(rule_id);
+            self.update_time_spent_in_rule(rule_id);
+        }
 
         let rule = self.compiled_rules.get(rule_id);
 
@@ -337,24 +328,17 @@ impl ScanContext<'_> {
         // Save the time in which the evaluation of the next rule started.
         #[cfg(feature = "rules-profiling")]
         {
-            self.rule_evaluation_start_time = self.clock.raw();
+            self.rule_execution_start_time = self.clock.raw();
         }
     }
 
     /// Called during the scan process when a rule has matched for tracking
     /// the matching rules.
     pub(crate) fn track_rule_match(&mut self, rule_id: RuleId) {
-        // The time spent evaluating the rule that matched is the
-        // difference between the current time and the time stored in
-        // `self.rule_evaluation_start_time`.
         #[cfg(feature = "rules-profiling")]
-        unsafe {
-            self.time_spent_in_rule
-                .get_unchecked_mut::<usize>(rule_id.into())
-                .add_assign(self.clock.delta_as_nanos(
-                    self.rule_evaluation_start_time,
-                    self.clock.raw(),
-                ));
+        {
+            self.last_executed_rule = Some(rule_id);
+            self.update_time_spent_in_rule(rule_id);
         }
 
         let rule = self.compiled_rules.get(rule_id);
@@ -390,7 +374,7 @@ impl ScanContext<'_> {
         // Save the time in which the evaluation of the next rule started.
         #[cfg(feature = "rules-profiling")]
         {
-            self.rule_evaluation_start_time = self.clock.raw();
+            self.rule_execution_start_time = self.clock.raw();
         }
     }
 
@@ -696,7 +680,7 @@ impl ScanContext<'_> {
         // `rule_evaluation_start_time` accordingly.
         #[cfg(feature = "rules-profiling")]
         {
-            self.rule_evaluation_start_time +=
+            self.rule_execution_start_time +=
                 scan_end.saturating_sub(scan_start);
         }
 

diff --git a/lib/src/scanner/mod.rs b/lib/src/scanner/mod.rs
@@ -206,7 +206,9 @@ impl<'r> Scanner<'r> {
                 #[cfg(feature = "rules-profiling")]
                 time_spent_in_rule: vec![0; num_rules as usize],
                 #[cfg(feature = "rules-profiling")]
-                rule_evaluation_start_time: 0,
+                rule_execution_start_time: 0,
+                #[cfg(feature = "rules-profiling")]
+                last_executed_rule: None,
                 #[cfg(any(feature = "rules-profiling", feature = "logging"))]
                 clock: quanta::Clock::new(),
             },
@@ -717,7 +719,7 @@ impl<'r> Scanner<'r> {
         // Save the time in which the evaluation of rules started.
         #[cfg(feature = "rules-profiling")]
         {
-            ctx.rule_evaluation_start_time = ctx.clock.raw();
+            ctx.rule_execution_start_time = ctx.clock.raw();
         }
 
         // Invoke the main function, which evaluates the rules' conditions. It
@@ -729,6 +731,22 @@ impl<'r> Scanner<'r> {
         let func_result =
             self.wasm_main_func.call(self.wasm_store.as_context_mut(), ());
 
+        #[cfg(feature = "rules-profiling")]
+        if func_result.is_err() {
+            let ctx = self.wasm_store.data_mut();
+            // When a timeout occurs, neither `ctx.track_rule_no_match` nor
+            // `ctx.track_rule_match` are invoked for the rule that was being
+            // executed at that moment. This implies that the time spent in
+            // that rule has not being updated yet, and we must do it here.
+            // The ID of the rule that was being executed is the one that
+            // comes after the last executed one. This assumes that rules are
+            // executed in strict ID increasing order.
+            ctx.update_time_spent_in_rule(
+                ctx.last_executed_rule
+                    .map_or(RuleId::from(0), |rule_id| rule_id.next()),
+            );
+        }
+
         #[cfg(all(feature = "rules-profiling", feature = "logging"))]
         {
             let most_expensive_rules = self.most_expensive_rules(10);

diff --git a/lib/src/wasm/mod.rs b/lib/src/wasm/mod.rs
@@ -747,18 +747,6 @@ pub(crate) fn new_linker<'r>() -> Linker<ScanContext<'r>> {
     linker
 }
 
-/// Invoked from WASM before starting the evaluation of the rule identified
-/// by the given [`RuleId`]. This only happens when the "logging" feature is
-/// enabled.
-#[wasm_export]
-#[cfg(feature = "logging")]
-pub(crate) fn log_rule_eval_start(
-    caller: &mut Caller<'_, ScanContext>,
-    rule_id: RuleId,
-) {
-    caller.data_mut().log_rule_eval_start(rule_id);
-}
-
 /// Invoked from WASM for triggering the pattern search phase.
 ///
 /// Returns `true` on success and `false` when a timeout occurs.