From 89677ae66ec2238ffd58de8c36fe6030fad458fa Mon Sep 17 00:00:00 2001 From: kamille Date: Wed, 31 Jul 2024 20:31:59 +0800 Subject: [PATCH] Check hashes first during probing the aggr hash table (#11718) * check hashes first during probing. * fix style. --------- Co-authored-by: xikai.wxk --- .../physical-plan/src/aggregates/group_values/row.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/datafusion/physical-plan/src/aggregates/group_values/row.rs b/datafusion/physical-plan/src/aggregates/group_values/row.rs index 8c2a4ba5c497..9f05da7cff53 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/row.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/row.rs @@ -120,12 +120,13 @@ impl GroupValues for GroupValuesRows { batch_hashes.resize(n_rows, 0); create_hashes(cols, &self.random_state, batch_hashes)?; - for (row, &hash) in batch_hashes.iter().enumerate() { - let entry = self.map.get_mut(hash, |(_hash, group_idx)| { + for (row, &target_hash) in batch_hashes.iter().enumerate() { + let entry = self.map.get_mut(target_hash, |(exist_hash, group_idx)| { // verify that a group that we are inserting with hash is // actually the same key value as the group in // existing_idx (aka group_values @ row) - group_rows.row(row) == group_values.row(*group_idx) + target_hash == *exist_hash + && group_rows.row(row) == group_values.row(*group_idx) }); let group_idx = match entry { @@ -139,7 +140,7 @@ impl GroupValues for GroupValuesRows { // for hasher function, use precomputed hash value self.map.insert_accounted( - (hash, group_idx), + (target_hash, group_idx), |(hash, _group_index)| *hash, &mut self.map_size, );