From 0d7cc5f5d2b772644af43460bb37507ce15a41ab Mon Sep 17 00:00:00 2001 From: Alex Koshelev Date: Tue, 10 Dec 2024 10:35:18 -0800 Subject: [PATCH 1/3] Make hybrid_in_the_clear use iterators, rather than slices Same reason as for many other recent PRs - save memory for inputs larger than 200M, the footprint is significant --- ipa-core/src/bin/in_the_clear.rs | 4 +- ipa-core/src/test_fixture/hybrid.rs | 105 ++++++++++++++++++---------- 2 files changed, 70 insertions(+), 39 deletions(-) diff --git a/ipa-core/src/bin/in_the_clear.rs b/ipa-core/src/bin/in_the_clear.rs index 16b2235df..af6cb380d 100644 --- a/ipa-core/src/bin/in_the_clear.rs +++ b/ipa-core/src/bin/in_the_clear.rs @@ -49,9 +49,9 @@ fn main() -> Result<(), Box> { let input = InputSource::from(&args.input); - let input_rows = input.iter::().collect::>(); + let input_rows = input.iter::(); let expected = hybrid_in_the_clear( - &input_rows, + input_rows, usize::try_from(args.max_breakdown_key.get()).unwrap(), ); diff --git a/ipa-core/src/test_fixture/hybrid.rs b/ipa-core/src/test_fixture/hybrid.rs index ba1b19e2c..832f1e655 100644 --- a/ipa-core/src/test_fixture/hybrid.rs +++ b/ipa-core/src/test_fixture/hybrid.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, iter::zip}; +use std::{borrow::Borrow, collections::HashMap, iter::zip}; use crate::{ ff::{ @@ -191,67 +191,98 @@ where enum MatchEntry { Single(TestHybridRecord), - Pair(TestHybridRecord, TestHybridRecord), + Attributed(Option<(u32, u32)>), + // Pair(TestHybridRecord, TestHybridRecord), MoreThanTwo, } impl MatchEntry { - pub fn add_record(&mut self, new_record: TestHybridRecord) { - match self { - Self::Single(old_record) => { - *self = Self::Pair(old_record.clone(), new_record); - } - Self::Pair { .. } | Self::MoreThanTwo => *self = Self::MoreThanTwo, - } + pub fn add_record(&mut self, new_record: &TestHybridRecord) { + let new_v = if let Self::Single(old_record) = &self { + Self::attribute(old_record, new_record) + } else { + Self::MoreThanTwo + }; + + *self = new_v; + } + + fn attribute(old_record: &TestHybridRecord, new_record: &TestHybridRecord) -> Self { + let attr_result = match (old_record, new_record) { + ( + TestHybridRecord::TestImpression { breakdown_key, .. }, + TestHybridRecord::TestConversion { value, .. }, + ) + | ( + TestHybridRecord::TestConversion { value, .. }, + TestHybridRecord::TestImpression { breakdown_key, .. }, + ) => Some((*breakdown_key, *value)), + (TestHybridRecord::TestConversion { value: value1, .. }, TestHybridRecord::TestConversion { value: value2, .. }) => Some((0, *value1 + value2)), + _ => None + }; + + Self::Attributed(attr_result) } pub fn into_breakdown_key_and_value_tuple(self) -> Option<(u32, u32)> { match self { - Self::Pair(imp, conv) => match (imp, conv) { - ( - TestHybridRecord::TestImpression { breakdown_key, .. }, - TestHybridRecord::TestConversion { value, .. }, - ) - | ( - TestHybridRecord::TestConversion { value, .. }, - TestHybridRecord::TestImpression { breakdown_key, .. }, - ) => Some((breakdown_key, value)), - ( - TestHybridRecord::TestConversion { value: value1, .. }, - TestHybridRecord::TestConversion { value: value2, .. }, - ) => Some((0, value1 + value2)), - _ => None, - }, + Self::Attributed(Some((breakdown_key, value))) => Some((breakdown_key, value)), _ => None, } + // Self::Pair(imp, conv) => match (imp, conv) { + // ( + // TestHybridRecord::TestImpression { breakdown_key, .. }, + // TestHybridRecord::TestConversion { value, .. }, + // ) + // | ( + // TestHybridRecord::TestConversion { value, .. }, + // TestHybridRecord::TestImpression { breakdown_key, .. }, + // ) => Some((breakdown_key, value)), + // ( + // TestHybridRecord::TestConversion { value: value1, .. }, + // TestHybridRecord::TestConversion { value: value2, .. }, + // ) => Some((0, value1 + value2)), + // _ => None, + // }, + // _ => None, } } /// # Panics /// It won't, so long as you can convert a u32 to a usize #[must_use] -pub fn hybrid_in_the_clear(input_rows: &[TestHybridRecord], max_breakdown: usize) -> Vec { +pub fn hybrid_in_the_clear>>( + input_rows: I, + max_breakdown: usize, +) -> Vec { let mut attributed_conversions = HashMap::::new(); - for input in input_rows { - match input { - TestHybridRecord::TestConversion { match_key, .. } - | TestHybridRecord::TestImpression { match_key, .. } => { + for (cnt, input) in input_rows.into_iter().enumerate() { + if cnt % 1_000_000 == 0 { + tracing::info!("processed another 1M rows: {}, size of conversions: {}", cnt / 1_000_000, attributed_conversions.len()); + } + match input.borrow() { + r @ (TestHybridRecord::TestConversion { match_key, .. } + | TestHybridRecord::TestImpression { match_key, .. }) => { attributed_conversions .entry(*match_key) - .and_modify(|e| e.add_record(input.clone())) - .or_insert(MatchEntry::Single(input.clone())); + .and_modify(|e| e.add_record(r)) + .or_insert(MatchEntry::Single(r.clone())); } } } + tracing::info!("done attribution phase"); - let pairs = attributed_conversions - .into_values() - .filter_map(MatchEntry::into_breakdown_key_and_value_tuple) - .collect::>(); + // let pairs = attributed_conversions + // .into_values() + // .filter_map(MatchEntry::into_breakdown_key_and_value_tuple) + // .collect::>(); let mut output = vec![0; max_breakdown]; - for (breakdown_key, value) in pairs { - output[usize::try_from(breakdown_key).unwrap()] += value; + // for (breakdown_key, value) in attributed_conversions.into_values() { + for entry in attributed_conversions.into_values() { + if let Some((breakdown_key, value)) = entry.into_breakdown_key_and_value_tuple() { + output[usize::try_from(breakdown_key).unwrap()] += value; + } } output From 12c6e8cbff41f5ddfe38f751aa94e63f1323b34a Mon Sep 17 00:00:00 2001 From: Alex Koshelev Date: Tue, 10 Dec 2024 15:34:15 -0800 Subject: [PATCH 2/3] Only store what's needed for attribution logic --- ipa-core/src/test_fixture/hybrid.rs | 88 +++++++++++++++-------------- 1 file changed, 46 insertions(+), 42 deletions(-) diff --git a/ipa-core/src/test_fixture/hybrid.rs b/ipa-core/src/test_fixture/hybrid.rs index 832f1e655..8609d7f62 100644 --- a/ipa-core/src/test_fixture/hybrid.rs +++ b/ipa-core/src/test_fixture/hybrid.rs @@ -190,61 +190,61 @@ where } enum MatchEntry { - Single(TestHybridRecord), + SingleImpression { breakdown_key: u32 }, + SingleConversion { value: u32 }, Attributed(Option<(u32, u32)>), - // Pair(TestHybridRecord, TestHybridRecord), MoreThanTwo, } impl MatchEntry { - pub fn add_record(&mut self, new_record: &TestHybridRecord) { - let new_v = if let Self::Single(old_record) = &self { - Self::attribute(old_record, new_record) - } else { - Self::MoreThanTwo - }; + pub fn from_record(record: &TestHybridRecord) -> Self { + match record { + TestHybridRecord::TestImpression { breakdown_key, .. } => Self::SingleImpression { + breakdown_key: *breakdown_key, + }, + TestHybridRecord::TestConversion { value, .. } => { + Self::SingleConversion { value: *value } + } + } + } - *self = new_v; + pub fn add_record(&mut self, new_record: &TestHybridRecord) { + match self { + MatchEntry::SingleImpression { breakdown_key, .. } => { + *self = Self::attribute_impression(*breakdown_key, new_record); + } + MatchEntry::SingleConversion { value } => { + *self = Self::attribute_conversion(*value, new_record); + } + _ => *self = Self::MoreThanTwo, + } } - fn attribute(old_record: &TestHybridRecord, new_record: &TestHybridRecord) -> Self { - let attr_result = match (old_record, new_record) { - ( - TestHybridRecord::TestImpression { breakdown_key, .. }, - TestHybridRecord::TestConversion { value, .. }, - ) - | ( - TestHybridRecord::TestConversion { value, .. }, - TestHybridRecord::TestImpression { breakdown_key, .. }, - ) => Some((*breakdown_key, *value)), - (TestHybridRecord::TestConversion { value: value1, .. }, TestHybridRecord::TestConversion { value: value2, .. }) => Some((0, *value1 + value2)), - _ => None - }; + fn attribute_impression(breakdown_key: u32, new_record: &TestHybridRecord) -> Self { + match new_record { + TestHybridRecord::TestImpression { .. } => Self::Attributed(None), + TestHybridRecord::TestConversion { value, .. } => { + Self::Attributed(Some((breakdown_key, *value))) + } + } + } - Self::Attributed(attr_result) + fn attribute_conversion(value: u32, new_record: &TestHybridRecord) -> Self { + match new_record { + TestHybridRecord::TestImpression { breakdown_key, .. } => { + Self::Attributed(Some((*breakdown_key, value))) + } + TestHybridRecord::TestConversion { + value: other_value, .. + } => Self::Attributed(Some((0, value + *other_value))), + } } pub fn into_breakdown_key_and_value_tuple(self) -> Option<(u32, u32)> { match self { - Self::Attributed(Some((breakdown_key, value))) => Some((breakdown_key, value)), + Self::Attributed(v) => v, _ => None, } - // Self::Pair(imp, conv) => match (imp, conv) { - // ( - // TestHybridRecord::TestImpression { breakdown_key, .. }, - // TestHybridRecord::TestConversion { value, .. }, - // ) - // | ( - // TestHybridRecord::TestConversion { value, .. }, - // TestHybridRecord::TestImpression { breakdown_key, .. }, - // ) => Some((breakdown_key, value)), - // ( - // TestHybridRecord::TestConversion { value: value1, .. }, - // TestHybridRecord::TestConversion { value: value2, .. }, - // ) => Some((0, value1 + value2)), - // _ => None, - // }, - // _ => None, } } @@ -258,7 +258,11 @@ pub fn hybrid_in_the_clear>>( let mut attributed_conversions = HashMap::::new(); for (cnt, input) in input_rows.into_iter().enumerate() { if cnt % 1_000_000 == 0 { - tracing::info!("processed another 1M rows: {}, size of conversions: {}", cnt / 1_000_000, attributed_conversions.len()); + tracing::info!( + "processed another 1M rows: {}, size of conversions: {}", + cnt / 1_000_000, + attributed_conversions.len() + ); } match input.borrow() { r @ (TestHybridRecord::TestConversion { match_key, .. } @@ -266,7 +270,7 @@ pub fn hybrid_in_the_clear>>( attributed_conversions .entry(*match_key) .and_modify(|e| e.add_record(r)) - .or_insert(MatchEntry::Single(r.clone())); + .or_insert(MatchEntry::from_record(r)); } } } From 1acf7bd5d8af9f44bec88b9e7be80054f30a7f23 Mon Sep 17 00:00:00 2001 From: Alex Koshelev Date: Tue, 10 Dec 2024 15:51:18 -0800 Subject: [PATCH 3/3] Final version --- ipa-core/src/test_fixture/hybrid.rs | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/ipa-core/src/test_fixture/hybrid.rs b/ipa-core/src/test_fixture/hybrid.rs index 8609d7f62..335c8bd5f 100644 --- a/ipa-core/src/test_fixture/hybrid.rs +++ b/ipa-core/src/test_fixture/hybrid.rs @@ -256,14 +256,7 @@ pub fn hybrid_in_the_clear>>( max_breakdown: usize, ) -> Vec { let mut attributed_conversions = HashMap::::new(); - for (cnt, input) in input_rows.into_iter().enumerate() { - if cnt % 1_000_000 == 0 { - tracing::info!( - "processed another 1M rows: {}, size of conversions: {}", - cnt / 1_000_000, - attributed_conversions.len() - ); - } + for input in input_rows { match input.borrow() { r @ (TestHybridRecord::TestConversion { match_key, .. } | TestHybridRecord::TestImpression { match_key, .. }) => { @@ -274,15 +267,8 @@ pub fn hybrid_in_the_clear>>( } } } - tracing::info!("done attribution phase"); - - // let pairs = attributed_conversions - // .into_values() - // .filter_map(MatchEntry::into_breakdown_key_and_value_tuple) - // .collect::>(); let mut output = vec![0; max_breakdown]; - // for (breakdown_key, value) in attributed_conversions.into_values() { for entry in attributed_conversions.into_values() { if let Some((breakdown_key, value)) = entry.into_breakdown_key_and_value_tuple() { output[usize::try_from(breakdown_key).unwrap()] += value;