Skip to content

Commit

Permalink
refactor: more improvements to the rules profiling logic
Browse files Browse the repository at this point in the history
  • Loading branch information
plusvic committed Oct 18, 2024
1 parent 9a92dd0 commit 443baa8
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 72 deletions.
11 changes: 0 additions & 11 deletions lib/src/compiler/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,17 +252,6 @@ pub(super) fn emit_rule_condition(
) {
let mut instr = builder.start_rule(rule_id, ctx.current_rule.is_global);

// When the "logging" feature is enabled, print a log before the starting
// evaluating the rule's condition. In case of error during the evaluation
// this helps in determining the rule causing the issue.
#[cfg(feature = "logging")]
{
instr.i32_const(rule_id.0);
instr.call(
ctx.function_id(wasm::export__log_rule_eval_start.mangled_name),
);
}

// Emit WASM code for the rule's condition.
catch_undef(
ctx,
Expand Down
11 changes: 10 additions & 1 deletion lib/src/compiler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1082,7 +1082,7 @@ impl<'a> Compiler<'a> {

// The RuleId for the new rule is current length of `self.rules`. The
// first rule has RuleId = 0.
let rule_id = RuleId(self.rules.len() as i32);
let rule_id = RuleId::from(self.rules.len());

// Build a vector of pairs (IdentId, MetaValue) for every meta defined
// in the rule.
Expand Down Expand Up @@ -2202,6 +2202,15 @@ pub(crate) struct NamespaceId(i32);
#[derive(Copy, Clone, Debug, Default)]
pub(crate) struct RuleId(i32);

impl RuleId {
/// Returns the [`RuleId`] that comes after this one.
///
/// This simply adds 1 to the ID.
pub(crate) fn next(&self) -> Self {
RuleId(self.0 + 1)
}
}

impl From<i32> for RuleId {
#[inline]
fn from(value: i32) -> Self {
Expand Down
12 changes: 9 additions & 3 deletions lib/src/compiler/rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,9 @@ impl Rules {
.with_varint_encoding()
.deserialize::<Self>(&bytes[magic.len()..])?;

#[cfg(feature = "logging")]
info!("Deserialization time: {:?}", Instant::elapsed(&start));

// `rules.compiled_wasm_mod` can be `None` for two reasons:
//
// 1- The rules were serialized without compiled rules (i.e: the
Expand All @@ -182,14 +185,17 @@ impl Rules {
// In both cases we try to build the module again from the data in
// `rules.wasm_mode`.
if rules.compiled_wasm_mod.is_none() {
#[cfg(feature = "logging")]
let start = Instant::now();

rules.compiled_wasm_mod = Some(wasmtime::Module::from_binary(
&crate::wasm::ENGINE,
rules.wasm_mod.as_slice(),
)?);
}

#[cfg(feature = "logging")]
info!("Deserialization time: {:?}", Instant::elapsed(&start));
#[cfg(feature = "logging")]
info!("WASM build time: {:?}", Instant::elapsed(&start));
}

rules.build_ac_automaton();

Expand Down
70 changes: 27 additions & 43 deletions lib/src/scanner/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,10 @@ pub(crate) struct ScanContext<'r> {
pub time_spent_in_rule: Vec<u64>,
/// The time at which the evaluation of the current rule started.
#[cfg(feature = "rules-profiling")]
pub rule_evaluation_start_time: u64,
pub rule_execution_start_time: u64,
/// The ID of the last rule whose condition was executed.
#[cfg(feature = "rules-profiling")]
pub last_executed_rule: Option<RuleId>,
/// Clock used for measuring the time spend on each pattern.
#[cfg(any(feature = "rules-profiling", feature = "logging"))]
pub clock: quanta::Clock,
Expand Down Expand Up @@ -235,28 +238,6 @@ impl ScanContext<'_> {
<dyn MessageDyn>::downcast_ref(m)
}

/// Writes a log before starting evaluating the condition for the rule
/// identified by `rule_id`.
#[cfg(feature = "logging")]
pub(crate) fn log_rule_eval_start(&mut self, rule_id: RuleId) {
let rule = self.compiled_rules.get(rule_id);

let rule_name =
self.compiled_rules.ident_pool().get(rule.ident_id).unwrap();

let rule_namespace = self
.compiled_rules
.ident_pool()
.get(rule.namespace_ident_id)
.unwrap();

log::info!(
"Started rule evaluation: {}:{}",
rule_namespace,
rule_name
);
}

pub(crate) fn console_log(&mut self, message: String) {
if let Some(console_log) = &mut self.console_log {
console_log(message)
Expand Down Expand Up @@ -293,20 +274,30 @@ impl ScanContext<'_> {
obj_ref
}

/// Called during the scan process when a rule didn't match.
pub(crate) fn track_rule_no_match(&mut self, rule_id: RuleId) {
// The time spent evaluating the rule that didn't match is the
// difference between the current time and the time stored in
// `self.rule_evaluation_start_time`.
#[cfg(feature = "rules-profiling")]
/// Update the time spent in the rule with the given ID, the time is
/// increased by the time elapsed since `rule_execution_start_time`.
pub(crate) fn update_time_spent_in_rule(&mut self, rule_id: RuleId) {
// SAFETY: it's safe to call `get_unchecked_mut`, the size of the
// `time_spent_in_rule` vector is guaranteed to be the number of
// existing rules. Therefore, the rule ID can be used as an index
// in this vector.
unsafe {
self.time_spent_in_rule
.get_unchecked_mut::<usize>(rule_id.into())
.add_assign(self.clock.delta_as_nanos(
self.rule_evaluation_start_time,
self.rule_execution_start_time,
self.clock.raw(),
));
}
}

/// Called during the scan process when a rule didn't match.
pub(crate) fn track_rule_no_match(&mut self, rule_id: RuleId) {
#[cfg(feature = "rules-profiling")]
{
self.last_executed_rule = Some(rule_id);
self.update_time_spent_in_rule(rule_id);
}

let rule = self.compiled_rules.get(rule_id);

Expand Down Expand Up @@ -337,24 +328,17 @@ impl ScanContext<'_> {
// Save the time in which the evaluation of the next rule started.
#[cfg(feature = "rules-profiling")]
{
self.rule_evaluation_start_time = self.clock.raw();
self.rule_execution_start_time = self.clock.raw();
}
}

/// Called during the scan process when a rule has matched for tracking
/// the matching rules.
pub(crate) fn track_rule_match(&mut self, rule_id: RuleId) {
// The time spent evaluating the rule that matched is the
// difference between the current time and the time stored in
// `self.rule_evaluation_start_time`.
#[cfg(feature = "rules-profiling")]
unsafe {
self.time_spent_in_rule
.get_unchecked_mut::<usize>(rule_id.into())
.add_assign(self.clock.delta_as_nanos(
self.rule_evaluation_start_time,
self.clock.raw(),
));
{
self.last_executed_rule = Some(rule_id);
self.update_time_spent_in_rule(rule_id);
}

let rule = self.compiled_rules.get(rule_id);
Expand Down Expand Up @@ -390,7 +374,7 @@ impl ScanContext<'_> {
// Save the time in which the evaluation of the next rule started.
#[cfg(feature = "rules-profiling")]
{
self.rule_evaluation_start_time = self.clock.raw();
self.rule_execution_start_time = self.clock.raw();
}
}

Expand Down Expand Up @@ -696,7 +680,7 @@ impl ScanContext<'_> {
// `rule_evaluation_start_time` accordingly.
#[cfg(feature = "rules-profiling")]
{
self.rule_evaluation_start_time +=
self.rule_execution_start_time +=
scan_end.saturating_sub(scan_start);
}

Expand Down
22 changes: 20 additions & 2 deletions lib/src/scanner/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,9 @@ impl<'r> Scanner<'r> {
#[cfg(feature = "rules-profiling")]
time_spent_in_rule: vec![0; num_rules as usize],
#[cfg(feature = "rules-profiling")]
rule_evaluation_start_time: 0,
rule_execution_start_time: 0,
#[cfg(feature = "rules-profiling")]
last_executed_rule: None,
#[cfg(any(feature = "rules-profiling", feature = "logging"))]
clock: quanta::Clock::new(),
},
Expand Down Expand Up @@ -717,7 +719,7 @@ impl<'r> Scanner<'r> {
// Save the time in which the evaluation of rules started.
#[cfg(feature = "rules-profiling")]
{
ctx.rule_evaluation_start_time = ctx.clock.raw();
ctx.rule_execution_start_time = ctx.clock.raw();
}

// Invoke the main function, which evaluates the rules' conditions. It
Expand All @@ -729,6 +731,22 @@ impl<'r> Scanner<'r> {
let func_result =
self.wasm_main_func.call(self.wasm_store.as_context_mut(), ());

#[cfg(feature = "rules-profiling")]
if func_result.is_err() {
let ctx = self.wasm_store.data_mut();
// When a timeout occurs, neither `ctx.track_rule_no_match` nor
// `ctx.track_rule_match` are invoked for the rule that was being
// executed at that moment. This implies that the time spent in
// that rule has not being updated yet, and we must do it here.
// The ID of the rule that was being executed is the one that
// comes after the last executed one. This assumes that rules are
// executed in strict ID increasing order.
ctx.update_time_spent_in_rule(
ctx.last_executed_rule
.map_or(RuleId::from(0), |rule_id| rule_id.next()),
);
}

#[cfg(all(feature = "rules-profiling", feature = "logging"))]
{
let most_expensive_rules = self.most_expensive_rules(10);
Expand Down
12 changes: 0 additions & 12 deletions lib/src/wasm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -747,18 +747,6 @@ pub(crate) fn new_linker<'r>() -> Linker<ScanContext<'r>> {
linker
}

/// Invoked from WASM before starting the evaluation of the rule identified
/// by the given [`RuleId`]. This only happens when the "logging" feature is
/// enabled.
#[wasm_export]
#[cfg(feature = "logging")]
pub(crate) fn log_rule_eval_start(
caller: &mut Caller<'_, ScanContext>,
rule_id: RuleId,
) {
caller.data_mut().log_rule_eval_start(rule_id);
}

/// Invoked from WASM for triggering the pattern search phase.
///
/// Returns `true` on success and `false` when a timeout occurs.
Expand Down

0 comments on commit 443baa8

Please sign in to comment.