Skip to content

Commit

Permalink
Minor: add with_estimated_selectivity to Precision (#8177)
Browse files Browse the repository at this point in the history
* Minor: add apply_filter to Precision

* fix: use inexact

* Rename to with_estimated_selectivity
  • Loading branch information
alamb authored Nov 17, 2023
1 parent c14a765 commit a2b9ab8
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 17 deletions.
9 changes: 9 additions & 0 deletions datafusion/common/src/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,15 @@ impl Precision<usize> {
(_, _) => Precision::Absent,
}
}

/// Return the estimate of applying a filter with estimated selectivity
/// `selectivity` to this Precision. A selectivity of `1.0` means that all
/// rows are selected. A selectivity of `0.5` means half the rows are
/// selected. Will always return inexact statistics.
pub fn with_estimated_selectivity(self, selectivity: f64) -> Self {
self.map(|v| ((v as f64 * selectivity).ceil()) as usize)
.to_inexact()
}
}

impl Precision<ScalarValue> {
Expand Down
25 changes: 8 additions & 17 deletions datafusion/physical-plan/src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,15 +200,12 @@ impl ExecutionPlan for FilterExec {
// assume filter selects 20% of rows if we cannot do anything smarter
// tracking issue for making this configurable:
// https://github.com/apache/arrow-datafusion/issues/8133
let selectivity = 0.2_f32;
let mut stats = input_stats.into_inexact();
if let Precision::Inexact(n) = stats.num_rows {
stats.num_rows = Precision::Inexact((selectivity * n as f32) as usize);
}
if let Precision::Inexact(n) = stats.total_byte_size {
stats.total_byte_size =
Precision::Inexact((selectivity * n as f32) as usize);
}
let selectivity = 0.2_f64;
let mut stats = input_stats.clone().into_inexact();
stats.num_rows = stats.num_rows.with_estimated_selectivity(selectivity);
stats.total_byte_size = stats
.total_byte_size
.with_estimated_selectivity(selectivity);
return Ok(stats);
}

Expand All @@ -222,14 +219,8 @@ impl ExecutionPlan for FilterExec {

// Estimate (inexact) selectivity of predicate
let selectivity = analysis_ctx.selectivity.unwrap_or(1.0);
let num_rows = match num_rows.get_value() {
Some(nr) => Precision::Inexact((*nr as f64 * selectivity).ceil() as usize),
None => Precision::Absent,
};
let total_byte_size = match total_byte_size.get_value() {
Some(tbs) => Precision::Inexact((*tbs as f64 * selectivity).ceil() as usize),
None => Precision::Absent,
};
let num_rows = num_rows.with_estimated_selectivity(selectivity);
let total_byte_size = total_byte_size.with_estimated_selectivity(selectivity);

let column_statistics = collect_new_statistics(
&input_stats.column_statistics,
Expand Down

0 comments on commit a2b9ab8

Please sign in to comment.