From d8cd1d1c0115e2e3b06432f30451aa18b912719c Mon Sep 17 00:00:00 2001 From: Andy Leiserson Date: Wed, 3 Jan 2024 16:48:06 -0800 Subject: [PATCH] Vectorization for prime fields Semi-honest AdditiveShare can hold a vector of sharings instead of just one sharing. The semi-honest multiply can operate on these vectors. --- ipa-core/benches/ct/arithmetic_circuit.rs | 57 +++- ipa-core/benches/iai/arithmetic_circuit.rs | 4 +- .../benches/oneshot/arithmetic_circuit.rs | 4 +- ipa-core/src/ff/boolean.rs | 15 +- ipa-core/src/ff/boolean_array.rs | 20 +- ipa-core/src/ff/curve_points.rs | 6 +- ipa-core/src/ff/ec_prime_field.rs | 12 +- ipa-core/src/ff/field.rs | 7 +- ipa-core/src/ff/galois_field.rs | 38 ++- ipa-core/src/ff/mod.rs | 2 +- ipa-core/src/ff/prime_field.rs | 20 +- .../src/helpers/buffers/ordering_sender.rs | 21 +- ipa-core/src/helpers/gateway/send.rs | 9 +- .../src/helpers/gateway/stall_detection.rs | 3 +- ipa-core/src/protocol/basics/mul/mod.rs | 15 +- .../src/protocol/basics/mul/semi_honest.rs | 200 +++++++++-- ipa-core/src/protocol/basics/mul/sparse.rs | 24 +- .../modulus_conversion/convert_shares.rs | 2 +- ipa-core/src/secret_sharing/array.rs | 315 ++++++++++++++++++ ipa-core/src/secret_sharing/mod.rs | 208 +++++++++++- .../replicated/semi_honest/additive_share.rs | 257 +++++++++----- ipa-core/src/test_fixture/circuit.rs | 48 ++- ipa-core/src/test_fixture/mod.rs | 2 +- ipa-core/src/test_fixture/sharing.rs | 38 ++- 24 files changed, 1148 insertions(+), 179 deletions(-) create mode 100644 ipa-core/src/secret_sharing/array.rs diff --git a/ipa-core/benches/ct/arithmetic_circuit.rs b/ipa-core/benches/ct/arithmetic_circuit.rs index 1c11e8dd2..fac67dca9 100644 --- a/ipa-core/benches/ct/arithmetic_circuit.rs +++ b/ipa-core/benches/ct/arithmetic_circuit.rs @@ -1,8 +1,38 @@ use criterion::{ - black_box, criterion_group, criterion_main, BenchmarkId, Criterion, SamplingMode, Throughput, + black_box, criterion_group, criterion_main, measurement::Measurement, BenchmarkGroup, + BenchmarkId, Criterion, SamplingMode, Throughput, }; -use ipa_core::{ff::Fp31, test_fixture::circuit}; -use tokio::runtime::Builder; +use ipa_core::{ + ff::{Field, Fp31, Fp32BitPrime}, + protocol::{basics::SecureMul, context::SemiHonestContext}, + secret_sharing::{replicated::semi_honest::AdditiveShare as Replicated, FieldSimd, IntoShares}, + test_fixture::circuit, +}; +use rand::distributions::{Distribution, Standard}; +use tokio::runtime::{Builder, Runtime}; + +fn do_benchmark( + rt: &Runtime, + group: &mut BenchmarkGroup, + width: u32, + depth: u16, +) where + M: Measurement, + F: Field + FieldSimd, + for<'a> Replicated: SecureMul>, + [F; N]: IntoShares>, + Standard: Distribution, +{ + group.throughput(Throughput::Elements((width * depth as u32) as u64)); + group.bench_with_input( + BenchmarkId::new("circuit", format!("{width}:{depth}:{}x{}", F::NAME, N)), + &(width, depth), + |b, &(width, depth)| { + b.to_async(rt) + .iter(|| circuit::arithmetic::(black_box(width), black_box(depth))); + }, + ); +} pub fn criterion_benchmark(c: &mut Criterion) { let rt = Builder::new_multi_thread() @@ -16,19 +46,14 @@ pub fn criterion_benchmark(c: &mut Criterion) { group.sample_size(10); group.sampling_mode(SamplingMode::Flat); - for width in [5_000u32, 50_000, 500_000, 1_000_000] { - for depth in [1u8, 10, 64] { - group.throughput(Throughput::Elements((width * depth as u32) as u64)); - group.bench_with_input( - BenchmarkId::new("circuit", format!("{width}:{depth}")), - &(width, depth), - |b, &(width, depth)| { - b.to_async(&rt) - .iter(|| circuit::arithmetic::(black_box(width), black_box(depth))); - }, - ); - } - } + do_benchmark::<_, Fp31, 1>(&rt, &mut group, 512_000, 1); + do_benchmark::<_, Fp31, 1>(&rt, &mut group, 51_200, 10); + do_benchmark::<_, Fp31, 1>(&rt, &mut group, 8_000, 64); + + do_benchmark::<_, Fp32BitPrime, 1>(&rt, &mut group, 25_600, 10); + do_benchmark::<_, Fp32BitPrime, 1>(&rt, &mut group, 2_560, 100); + do_benchmark::<_, Fp32BitPrime, 32>(&rt, &mut group, 4_000, 64); + do_benchmark::<_, Fp32BitPrime, 32>(&rt, &mut group, 250, 1_024); } criterion_group!(benches, criterion_benchmark); diff --git a/ipa-core/benches/iai/arithmetic_circuit.rs b/ipa-core/benches/iai/arithmetic_circuit.rs index af4cc3c2c..ef43e70a0 100644 --- a/ipa-core/benches/iai/arithmetic_circuit.rs +++ b/ipa-core/benches/iai/arithmetic_circuit.rs @@ -10,10 +10,10 @@ pub fn iai_benchmark() { .expect("Creating runtime failed"); const CIRCUIT_WIDTH: u32 = 500_000; - const CIRCUIT_DEPTH: u8 = 1; + const CIRCUIT_DEPTH: u16 = 1; rt.block_on(async { - circuit::arithmetic::(black_box(CIRCUIT_WIDTH), black_box(CIRCUIT_DEPTH)).await; + circuit::arithmetic::(black_box(CIRCUIT_WIDTH), black_box(CIRCUIT_DEPTH)).await; }) } diff --git a/ipa-core/benches/oneshot/arithmetic_circuit.rs b/ipa-core/benches/oneshot/arithmetic_circuit.rs index e4fdeceab..c78a409ea 100644 --- a/ipa-core/benches/oneshot/arithmetic_circuit.rs +++ b/ipa-core/benches/oneshot/arithmetic_circuit.rs @@ -14,7 +14,7 @@ pub struct CircuitArgs { pub width: u32, #[arg(short, long, help = "depth of the circuit", default_value_t = 10)] - pub depth: u8, + pub depth: u16, /// Cargo passes the bench argument /// https://doc.rust-lang.org/cargo/commands/cargo-bench.html @@ -34,7 +34,7 @@ pub async fn main() { } let start = Instant::now(); - circuit::arithmetic::(args.width, args.depth).await; + circuit::arithmetic::(args.width, args.depth).await; let duration = start.elapsed().as_secs_f32(); println!("benchmark complete after {duration}s"); diff --git a/ipa-core/src/ff/boolean.rs b/ipa-core/src/ff/boolean.rs index 6937dd1a9..f577c5ac1 100644 --- a/ipa-core/src/ff/boolean.rs +++ b/ipa-core/src/ff/boolean.rs @@ -5,7 +5,10 @@ use super::Gf32Bit; use crate::{ ff::{Field, Serializable}, protocol::prss::FromRandomU128, - secret_sharing::{replicated::malicious::ExtendableField, Block, SharedValue}, + secret_sharing::{ + replicated::malicious::ExtendableField, Block, FieldVectorizable, SharedValue, StdArray, + Vectorizable, + }, }; impl Block for bool { @@ -40,6 +43,14 @@ impl SharedValue for Boolean { const ZERO: Self = Self(false); } +impl Vectorizable<1> for Boolean { + type Array = StdArray; +} + +impl FieldVectorizable<1> for Boolean { + type ArrayAlias = StdArray; +} + ///conversion to Scalar struct of `curve25519_dalek` impl From for bool { fn from(s: Boolean) -> Self { @@ -146,6 +157,8 @@ impl From for Boolean { ///implement Field because required by PRSS impl Field for Boolean { + const NAME: &'static str = "Boolean"; + const ONE: Boolean = Boolean(true); fn as_u128(&self) -> u128 { diff --git a/ipa-core/src/ff/boolean_array.rs b/ipa-core/src/ff/boolean_array.rs index 9d83b1286..e733264f7 100644 --- a/ipa-core/src/ff/boolean_array.rs +++ b/ipa-core/src/ff/boolean_array.rs @@ -8,7 +8,7 @@ use typenum::{U14, U2, U32, U8}; use crate::{ ff::{boolean::Boolean, ArrayAccess, Field, Serializable}, protocol::prss::{FromRandom, FromRandomU128}, - secret_sharing::{Block, SharedValue}, + secret_sharing::{Block, FieldVectorizable, SharedValue, StdArray, Vectorizable}, }; /// The implementation below cannot be constrained without breaking Rust's @@ -42,6 +42,12 @@ impl<'a> Iterator for BAIterator<'a> { } } +impl<'a> ExactSizeIterator for BAIterator<'a> { + fn len(&self) -> usize { + self.iterator.len() + } +} + /// A value of ONE has a one in the first element of the bit array, followed by `$bits-1` zeros. /// This macro uses a bit of recursive repetition to produce those zeros. /// @@ -95,6 +101,8 @@ macro_rules! boolean_array_impl_small { // TODO(812): remove this impl; BAs are not field elements. impl Field for $name { + const NAME: &'static str = stringify!($name); + const ONE: Self = Self(bitarr_one!($bits)); fn as_u128(&self) -> u128 { @@ -153,6 +161,10 @@ macro_rules! boolean_array_impl_small { Field::truncate_from(src) } } + + impl FieldVectorizable<1> for $name { + type ArrayAlias = StdArray<$name, 1>; + } }; } @@ -358,6 +370,10 @@ macro_rules! boolean_array_impl { } } + impl Vectorizable<1> for $name { + type Array = StdArray<$name, 1>; + } + impl std::ops::Mul for $name { type Output = Self; fn mul(self, rhs: Self) -> Self::Output { @@ -394,7 +410,7 @@ macro_rules! boolean_array_impl { #[allow(clippy::into_iter_without_iter)] impl<'a> IntoIterator for &'a AdditiveShare<$name> { type Item = AdditiveShare; - type IntoIter = ASIterator>; + type IntoIter = ASIterator<'a, $name>; fn into_iter(self) -> Self::IntoIter { self.iter() diff --git a/ipa-core/src/ff/curve_points.rs b/ipa-core/src/ff/curve_points.rs index 499845f7b..f0db75b49 100644 --- a/ipa-core/src/ff/curve_points.rs +++ b/ipa-core/src/ff/curve_points.rs @@ -7,7 +7,7 @@ use typenum::U32; use crate::{ ff::{ec_prime_field::Fp25519, Serializable}, - secret_sharing::{Block, SharedValue}, + secret_sharing::{Block, SharedValue, StdArray, Vectorizable}, }; impl Block for CompressedRistretto { @@ -35,6 +35,10 @@ impl SharedValue for RP25519 { const ZERO: Self = Self(CompressedRistretto([0_u8; 32])); } +impl Vectorizable<1> for RP25519 { + type Array = StdArray; +} + #[derive(thiserror::Error, Debug)] #[error("{0:?} is not the canonical encoding of a Ristretto point.")] pub struct NonCanonicalEncoding(CompressedRistretto); diff --git a/ipa-core/src/ff/ec_prime_field.rs b/ipa-core/src/ff/ec_prime_field.rs index 0e72024ab..4c03a6a6f 100644 --- a/ipa-core/src/ff/ec_prime_field.rs +++ b/ipa-core/src/ff/ec_prime_field.rs @@ -9,7 +9,7 @@ use typenum::U32; use crate::{ ff::{boolean_array::BA256, Field, Serializable}, protocol::prss::FromRandomU128, - secret_sharing::{Block, SharedValue}, + secret_sharing::{Block, FieldVectorizable, SharedValue, StdArray, Vectorizable}, }; impl Block for Scalar { @@ -176,8 +176,18 @@ macro_rules! sc_hash_impl { #[cfg(test)] sc_hash_impl!(u64); +impl Vectorizable<1> for Fp25519 { + type Array = StdArray; +} + +impl FieldVectorizable<1> for Fp25519 { + type ArrayAlias = StdArray; +} + ///implement Field because required by PRSS impl Field for Fp25519 { + const NAME: &'static str = "Fp25519"; + const ONE: Fp25519 = Fp25519::ONE; ///both following methods are based on hashing and do not allow to actually convert elements in Fp25519 diff --git a/ipa-core/src/ff/field.rs b/ipa-core/src/ff/field.rs index 5535ed833..4e098cfda 100644 --- a/ipa-core/src/ff/field.rs +++ b/ipa-core/src/ff/field.rs @@ -8,7 +8,7 @@ use typenum::{U1, U4}; use crate::{ error, protocol::prss::FromRandomU128, - secret_sharing::{Block, SharedValue}, + secret_sharing::{Block, FieldVectorizable, SharedValue, Vectorizable}, }; impl Block for u8 { @@ -29,7 +29,12 @@ pub trait Field: + FromRandomU128 + TryFrom + Into + + Vectorizable<1> + + FieldVectorizable<1, ArrayAlias = >::Array> { + // Name of the field + const NAME: &'static str; + /// Multiplicative identity element const ONE: Self; diff --git a/ipa-core/src/ff/galois_field.rs b/ipa-core/src/ff/galois_field.rs index fb7c9ae0a..64a345f6f 100644 --- a/ipa-core/src/ff/galois_field.rs +++ b/ipa-core/src/ff/galois_field.rs @@ -15,7 +15,7 @@ use crate::{ ff::{boolean_array::NonZeroPadding, Field, Serializable}, impl_serializable_trait, protocol::prss::FromRandomU128, - secret_sharing::{Block, SharedValue}, + secret_sharing::{Block, FieldVectorizable, SharedValue, Vectorizable}, }; /// Trait for data types storing arbitrary number of bits. @@ -148,6 +148,12 @@ impl<'a> Iterator for BoolIterator<'a> { } } +impl<'a> ExactSizeIterator for BoolIterator<'a> { + fn len(&self) -> usize { + self.0.len() + } +} + macro_rules! bit_array_impl { ( $modname:ident, $name:ident, $store:ty, $bits:expr, $one:expr, $polynomial:expr, $deser_type: tt, $({$($extra:item)*})? ) => { #[allow(clippy::suspicious_arithmetic_impl)] @@ -169,7 +175,17 @@ macro_rules! bit_array_impl { const ZERO: Self = Self(<$store>::ZERO); } + impl Vectorizable<1> for $name { + type Array = crate::secret_sharing::StdArray<$name, 1>; + } + + impl FieldVectorizable<1> for $name { + type ArrayAlias = crate::secret_sharing::StdArray<$name, 1>; + } + impl Field for $name { + const NAME: &'static str = stringify!($field); + const ONE: Self = Self($one); fn as_u128(&self) -> u128 { @@ -693,5 +709,25 @@ bit_array_impl!( value != Gf2::ZERO } } + + impl From for Gf2 { + fn from(value: crate::ff::boolean::Boolean) -> Self { + bool::from(value).into() + } + } + + impl From for crate::ff::boolean::Boolean { + fn from(value: Gf2) -> Self { + bool::from(value).into() + } + } + + impl std::ops::Not for Gf2 { + type Output = Self; + + fn not(self) -> Self { + (!bool::from(self)).into() + } + } } ); diff --git a/ipa-core/src/ff/mod.rs b/ipa-core/src/ff/mod.rs index 96aecca00..b831fd707 100644 --- a/ipa-core/src/ff/mod.rs +++ b/ipa-core/src/ff/mod.rs @@ -82,7 +82,7 @@ pub trait Serializable: Sized { pub trait ArrayAccess { type Output; - type Iter<'a>: Iterator + Send + type Iter<'a>: Iterator + ExactSizeIterator + Send where Self: 'a; diff --git a/ipa-core/src/ff/prime_field.rs b/ipa-core/src/ff/prime_field.rs index b7ac911b9..2f9694969 100644 --- a/ipa-core/src/ff/prime_field.rs +++ b/ipa-core/src/ff/prime_field.rs @@ -6,7 +6,7 @@ use super::Field; use crate::{ ff::Serializable, protocol::prss::FromRandomU128, - secret_sharing::{Block, SharedValue}, + secret_sharing::{Block, FieldVectorizable, SharedValue, StdArray, Vectorizable}, }; pub trait PrimeField: Field { @@ -33,7 +33,17 @@ macro_rules! field_impl { const ZERO: Self = $field(0); } + impl Vectorizable<1> for $field { + type Array = StdArray<$field, 1>; + } + + impl FieldVectorizable<1> for $field { + type ArrayAlias = StdArray<$field, 1>; + } + impl Field for $field { + const NAME: &'static str = stringify!($field); + const ONE: Self = $field(1); fn as_u128(&self) -> u128 { @@ -317,6 +327,14 @@ mod fp31 { mod fp32bit { field_impl! { Fp32BitPrime, u32, 32, 4_294_967_291 } + impl Vectorizable<32> for Fp32BitPrime { + type Array = StdArray; + } + + impl FieldVectorizable<32> for Fp32BitPrime { + type ArrayAlias = StdArray; + } + #[cfg(all(test, unit_test))] mod specialized_tests { use super::*; diff --git a/ipa-core/src/helpers/buffers/ordering_sender.rs b/ipa-core/src/helpers/buffers/ordering_sender.rs index 943ee07f5..d7e383036 100644 --- a/ipa-core/src/helpers/buffers/ordering_sender.rs +++ b/ipa-core/src/helpers/buffers/ordering_sender.rs @@ -3,6 +3,7 @@ use std::{ cmp::Ordering, collections::VecDeque, fmt::Debug, + marker::PhantomData, mem::drop, num::NonZeroUsize, pin::Pin, @@ -330,8 +331,13 @@ impl OrderingSender { /// * the same index is provided more than once. /// /// [capacity]: OrderingSender#spare-capacity-configuration - pub fn send(&self, i: usize, m: M) -> Send<'_, M> { - Send { i, m, sender: self } + pub fn send>(&self, i: usize, m: B) -> Send<'_, M, B> { + Send { + i, + m, + sender: self, + phantom_data: PhantomData, + } } /// Close the sender at index `i`. @@ -433,13 +439,14 @@ impl OrderingSender { } /// A future for writing item `i` into an `OrderingSender`. -pub struct Send<'s, M: Message> { +pub struct Send<'a, M: Message, B: Borrow + 'a> { i: usize, - m: M, - sender: &'s OrderingSender, + m: B, + sender: &'a OrderingSender, + phantom_data: PhantomData, } -impl<'s, M: Message> Future for Send<'s, M> { +impl<'a, M: Message, B: Borrow + 'a> Future for Send<'a, M, B> { type Output = (); fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { @@ -447,7 +454,7 @@ impl<'s, M: Message> Future for Send<'s, M> { let res = this.sender.next_op(this.i, cx, |b| { assert!(!b.closed, "writing on a closed stream"); - b.write(&this.m, cx) + b.write(this.m.borrow(), cx) }); // A successful write: wake the next in line. // But not while holding the lock on state. diff --git a/ipa-core/src/helpers/gateway/send.rs b/ipa-core/src/helpers/gateway/send.rs index bdc51a4da..00d8de096 100644 --- a/ipa-core/src/helpers/gateway/send.rs +++ b/ipa-core/src/helpers/gateway/send.rs @@ -1,4 +1,5 @@ use std::{ + borrow::Borrow, marker::PhantomData, num::NonZeroUsize, pin::Pin, @@ -52,7 +53,11 @@ impl GatewaySender { } } - pub async fn send(&self, record_id: RecordId, msg: M) -> Result<(), Error> { + pub async fn send>( + &self, + record_id: RecordId, + msg: B, + ) -> Result<(), Error> { debug_assert!( self.total_records.is_specified(), "total_records cannot be unspecified when sending" @@ -109,7 +114,7 @@ impl SendingEnd { /// /// [`set_total_records`]: crate::protocol::context::Context::set_total_records #[tracing::instrument(level = "trace", "send", skip_all, fields(i = %record_id, total = %self.inner.total_records, to = ?self.channel_id.role, gate = ?self.channel_id.gate.as_ref()))] - pub async fn send(&self, record_id: RecordId, msg: M) -> Result<(), Error> { + pub async fn send>(&self, record_id: RecordId, msg: B) -> Result<(), Error> { let r = self.inner.send(record_id, msg).await; metrics::increment_counter!(RECORDS_SENT, STEP => self.channel_id.gate.as_ref().to_string(), diff --git a/ipa-core/src/helpers/gateway/stall_detection.rs b/ipa-core/src/helpers/gateway/stall_detection.rs index c2e288572..9a1b28732 100644 --- a/ipa-core/src/helpers/gateway/stall_detection.rs +++ b/ipa-core/src/helpers/gateway/stall_detection.rs @@ -270,6 +270,7 @@ mod receive { mod send { use std::{ + borrow::Borrow, collections::BTreeMap, fmt::{Debug, Formatter}, }; @@ -288,7 +289,7 @@ mod send { delegate::delegate! { to { self.advance(); self.inner() } { #[inline] - pub async fn send(&self, record_id: RecordId, msg: M) -> Result<(), Error>; + pub async fn send>(&self, record_id: RecordId, msg: B) -> Result<(), Error>; } } } diff --git a/ipa-core/src/protocol/basics/mul/mod.rs b/ipa-core/src/protocol/basics/mul/mod.rs index ed98e9d0b..b8924343a 100644 --- a/ipa-core/src/protocol/basics/mul/mod.rs +++ b/ipa-core/src/protocol/basics/mul/mod.rs @@ -7,9 +7,12 @@ use crate::{ context::{Context, UpgradedMaliciousContext}, RecordId, }, - secret_sharing::replicated::{ - malicious::{AdditiveShare as MaliciousReplicated, ExtendableField}, - semi_honest::AdditiveShare as Replicated, + secret_sharing::{ + replicated::{ + malicious::{AdditiveShare as MaliciousReplicated, ExtendableField}, + semi_honest::AdditiveShare as Replicated, + }, + FieldSimd, }, }; @@ -52,7 +55,11 @@ use {malicious::multiply as malicious_mul, semi_honest::multiply as semi_honest_ /// Implement secure multiplication for semi-honest contexts with replicated secret sharing. #[async_trait] -impl SecureMul for Replicated { +impl SecureMul for Replicated +where + C: Context, + F: Field + FieldSimd, +{ async fn multiply_sparse<'fut>( &self, rhs: &Self, diff --git a/ipa-core/src/protocol/basics/mul/semi_honest.rs b/ipa-core/src/protocol/basics/mul/semi_honest.rs index 25de86946..67171ff25 100644 --- a/ipa-core/src/protocol/basics/mul/semi_honest.rs +++ b/ipa-core/src/protocol/basics/mul/semi_honest.rs @@ -8,8 +8,9 @@ use crate::{ prss::SharedRandomness, RecordId, }, - secret_sharing::replicated::{ - semi_honest::AdditiveShare as Replicated, ReplicatedSecretSharing, + secret_sharing::{ + replicated::semi_honest::AdditiveShare as Replicated, FieldSimd, SharedValueArray, + Vectorizable, }, }; @@ -26,16 +27,16 @@ use crate::{ /// ## Errors /// Lots of things may go wrong here, from timeouts to bad output. They will be signalled /// back via the error response -pub async fn multiply( +pub async fn multiply( ctx: C, record_id: RecordId, - a: &Replicated, - b: &Replicated, + a: &Replicated, + b: &Replicated, zeros: MultiplyZeroPositions, -) -> Result, Error> +) -> Result, Error> where C: Context, - F: Field, + F: Field + FieldSimd, { let role = ctx.role(); let [need_to_recv, need_to_send, need_random_right] = zeros.work_for(role); @@ -43,19 +44,26 @@ where zeros.1.check(role, "b", b); // Shared randomness used to mask the values that are sent. - let (s0, s1) = ctx.prss().generate(record_id); + let (s0, s1) = ctx + .prss() + .generate::<(>::Array, _), _>(record_id); + + let mut rhs = a.right_arr().clone() * b.right_arr(); - let mut rhs = a.right() * b.right(); if need_to_send { // Compute the value (d_i) we want to send to the right helper (i+1). - let right_d = a.left() * b.right() + a.right() * b.left() - s0; + let right_d = + a.left_arr().clone() * b.right_arr() + a.right_arr().clone() * b.left_arr() - &s0; - ctx.send_channel(role.peer(Direction::Right)) - .send(record_id, right_d) + ctx.send_channel::<>::Array>(role.peer(Direction::Right)) + .send(record_id, &right_d) .await?; rhs += right_d; } else { - debug_assert_eq!(a.left() * b.right() + a.right() * b.left(), F::ZERO); + debug_assert_eq!( + a.left_arr().clone() * b.right_arr() + a.right_arr().clone() * b.left_arr(), + <>::Array as SharedValueArray>::ZERO + ); } // Add randomness to this value whether we sent or not, depending on whether the // peer to the right needed to send. If they send, they subtract randomness, @@ -65,9 +73,9 @@ where } // Sleep until helper on the left sends us their (d_i-1) value. - let mut lhs = a.left() * b.left(); + let mut lhs = a.left_arr().clone() * b.left_arr(); if need_to_recv { - let left_d = ctx + let left_d: >::Array = ctx .recv_channel(role.peer(Direction::Left)) .receive(record_id) .await?; @@ -78,21 +86,32 @@ where lhs += s0; } - Ok(Replicated::new(lhs, rhs)) + Ok(Replicated::new_arr(lhs, rhs)) } #[cfg(all(test, unit_test))] mod test { - use std::iter::{repeat, zip}; + use std::{ + array, + iter::{repeat, zip}, + time::Instant, + }; use rand::distributions::{Distribution, Standard}; + use super::multiply; use crate::{ - ff::{Field, Fp31}, - protocol::{basics::SecureMul, context::Context, RecordId}, + ff::{Field, Fp31, Fp32BitPrime}, + helpers::TotalRecords, + protocol::{ + basics::{SecureMul, ZeroPositions}, + context::Context, + RecordId, + }, rand::{thread_rng, Rng}, + secret_sharing::replicated::semi_honest::AdditiveShare, seq_join::SeqJoin, - test_fixture::{Reconstruct, Runner, TestWorld}, + test_fixture::{Reconstruct, ReconstructArr, Runner, TestWorld}, }; #[tokio::test] @@ -182,4 +201,145 @@ mod test { result.reconstruct().as_u128() } + + #[tokio::test] + pub async fn wide_mul() { + const COUNT: usize = 32; + let world = TestWorld::default(); + + let mut rng = thread_rng(); + let a: [Fp32BitPrime; COUNT] = (0..COUNT) + .map(|_| rng.gen::()) + .collect::>() + .try_into() + .unwrap(); + let b: [Fp32BitPrime; COUNT] = (0..COUNT) + .map(|_| rng.gen::()) + .collect::>() + .try_into() + .unwrap(); + let expected: [Fp32BitPrime; COUNT] = zip(a.iter(), b.iter()) + .map(|(&a, &b)| a * b) + .collect::>() + .try_into() + .unwrap(); + let results = world + .semi_honest((a, b), |ctx, (a_shares, b_shares)| async move { + multiply( + ctx.set_total_records(1), + RecordId::from(0), + &a_shares, + &b_shares, + ZeroPositions::NONE, + ) + .await + .unwrap() + }) + .await; + assert_eq!(expected, results.reconstruct_arr()); + } + + // The manymult test is a microbenchmark. The test generates a DxW matrix of field elements. The + // matrix is reduced to a single W-element row vector by taking the element-wise product of the + // D values in each column. The non-vectorized implementation (manymult_novec) simply does a + // parallel_join of W semi-honest multiplies. The vectorized implementation (manymult_vec) + // processes a row at a time. For manymult_vec, MANYMULT_WIDTH must match a supported + // vectorization width. + const MANYMULT_ITERS: usize = 512; + const MANYMULT_WIDTH: usize = 32; + + #[tokio::test] + pub async fn manymult_novec() { + let world = TestWorld::default(); + let mut rng = thread_rng(); + let mut inputs = Vec::>::new(); + for _ in 0..MANYMULT_ITERS { + inputs.push( + (0..MANYMULT_WIDTH) + .map(|_| Fp32BitPrime::try_from(u128::from(rng.gen_range(0u32..100))).unwrap()) + .collect::>(), + ); + } + let expected = inputs + .iter() + .fold(None, |acc: Option>, b| match acc { + Some(a) => Some(a.iter().zip(b.iter()).map(|(&a, &b)| a * b).collect()), + None => Some(b.clone()), + }) + .unwrap(); + + let begin = Instant::now(); + let result = world + .semi_honest( + inputs.into_iter().map(IntoIterator::into_iter), + |ctx, share: Vec>>| async move { + let ctx = ctx.set_total_records(MANYMULT_ITERS * MANYMULT_WIDTH); + let mut iter = share.iter(); + let mut val = iter.next().unwrap().clone(); + for i in 1..MANYMULT_ITERS { + let cur = iter.next().unwrap(); + let mut res = Vec::with_capacity(MANYMULT_WIDTH); + for j in 0..MANYMULT_WIDTH { + res.push(val[j].multiply( + &cur[j], + ctx.clone(), + RecordId::from(MANYMULT_WIDTH * (i - 1) + j), + )); + } + val = ctx.parallel_join(res).await.unwrap(); + } + val + }, + ) + .await; + tracing::debug!("Protocol execution time: {:?}", begin.elapsed()); + assert_eq!(expected, result.reconstruct()); + } + + #[tokio::test] + pub async fn manymult_vec() { + let world = TestWorld::default(); + let mut rng = thread_rng(); + let mut inputs = Vec::<[Fp32BitPrime; MANYMULT_WIDTH]>::new(); + for _ in 0..MANYMULT_ITERS { + inputs.push(array::from_fn(|_| rng.gen())); + } + let expected = inputs + .iter() + .fold(None, |acc: Option>, b| match acc { + Some(a) => Some(a.iter().zip(b.iter()).map(|(&a, &b)| a * b).collect()), + None => Some(b.to_vec()), + }) + .unwrap(); + + let begin = Instant::now(); + let result = world + .semi_honest( + inputs.into_iter(), + |ctx, share: Vec>| async move { + // The output of each row is input to the next row, so no parallelization + // across rows is possible. Thus we set TotalRecords::Indeterminate, which + // flushes after every record. If a row were larger than one record, we could + // instead configure the active work in TestWorld to match the row size. + let ctx = ctx.set_total_records(TotalRecords::Indeterminate); + let mut iter = share.iter(); + let mut val = iter.next().unwrap().clone(); + for i in 1..MANYMULT_ITERS { + val = multiply( + ctx.clone(), + RecordId::from(i - 1), + &val, + iter.next().unwrap(), + ZeroPositions::NONE, + ) + .await + .unwrap(); + } + val + }, + ) + .await; + tracing::debug!("Protocol execution time: {:?}", begin.elapsed()); + assert_eq!(expected, result.reconstruct_arr()); + } } diff --git a/ipa-core/src/protocol/basics/mul/sparse.rs b/ipa-core/src/protocol/basics/mul/sparse.rs index 9f1ad9943..878199b23 100644 --- a/ipa-core/src/protocol/basics/mul/sparse.rs +++ b/ipa-core/src/protocol/basics/mul/sparse.rs @@ -1,5 +1,8 @@ +#[cfg_attr(not(debug_assertions), allow(unused_variables))] +use crate::secret_sharing::Vectorizable; use crate::{ - ff::Field, helpers::Role, secret_sharing::replicated::semi_honest::AdditiveShare as Replicated, + helpers::Role, + secret_sharing::{replicated::semi_honest::AdditiveShare as Replicated, SharedValue}, }; /// A description of a replicated secret sharing, with zero values at known positions. @@ -105,25 +108,28 @@ impl ZeroPositions { /// # Panics /// When the input value includes a non-zero value in a position marked as having a zero. #[cfg_attr(not(debug_assertions), allow(unused_variables))] - pub fn check(self, role: Role, which: &str, v: &Replicated) { + pub fn check, const N: usize>( + self, + role: Role, + which: &str, + v: &Replicated, + ) { #[cfg(debug_assertions)] { - use crate::{ - helpers::Direction::Right, secret_sharing::replicated::ReplicatedSecretSharing, - }; + use crate::{helpers::Direction::Right, secret_sharing::SharedValueArray}; let flags = <[bool; 3]>::from(self); if flags[role as usize] { assert_eq!( - F::ZERO, - v.left(), + &>::Array::ZERO, + v.left_arr(), "expected a zero on the left for input {which}" ); } if flags[role.peer(Right) as usize] { assert_eq!( - F::ZERO, - v.right(), + &>::Array::ZERO, + v.right_arr(), "expected a zero on the right for input {which}" ); } diff --git a/ipa-core/src/protocol/modulus_conversion/convert_shares.rs b/ipa-core/src/protocol/modulus_conversion/convert_shares.rs index ce2a6a369..08a8dac75 100644 --- a/ipa-core/src/protocol/modulus_conversion/convert_shares.rs +++ b/ipa-core/src/protocol/modulus_conversion/convert_shares.rs @@ -70,7 +70,7 @@ impl BitConversionTriple> { /// /// # Panics /// If any bits in the bitwise shared input cannot be converted into the given field `F` - /// without truncation or if the bit index is out of range for `B`. + /// without truncation. #[must_use] pub fn new(helper_role: Role, left: bool, right: bool) -> Self { let left = F::try_from(u128::from(left)).unwrap(); diff --git a/ipa-core/src/secret_sharing/array.rs b/ipa-core/src/secret_sharing/array.rs new file mode 100644 index 000000000..f05cc30dc --- /dev/null +++ b/ipa-core/src/secret_sharing/array.rs @@ -0,0 +1,315 @@ +use std::{ + array, + borrow::Borrow, + fmt::Debug, + ops::{Add, AddAssign, Mul, Neg, Not, Sub, SubAssign}, +}; + +use generic_array::{ArrayLength, GenericArray}; +use typenum::{U1, U32}; + +use crate::{ + ff::{Field, Fp32BitPrime, Serializable}, + helpers::Message, + protocol::prss::{FromRandom, FromRandomU128}, + secret_sharing::{FieldArray, SharedValue, SharedValueArray}, +}; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct StdArray([V; N]); + +impl PartialEq for StdArray +where + V: SharedValue, + T: Borrow<[V]>, +{ + fn eq(&self, other: &T) -> bool { + self.0.as_slice() == other.borrow() + } +} + +impl PartialEq> for Vec { + fn eq(&self, other: &StdArray) -> bool { + other.eq(self) + } +} + +impl PartialEq> for [V; N] { + fn eq(&self, other: &StdArray) -> bool { + other.eq(self) + } +} + +impl SharedValueArray for StdArray +where + Self: Serializable, +{ + const ZERO: Self = Self([V::ZERO; N]); + + fn from_fn V>(f: F) -> Self { + Self(array::from_fn(f)) + } + + fn get(&self, index: usize) -> V { + self.0[index] + } + + fn get_mut(&mut self, index: usize) -> &mut V { + &mut self.0[index] + } + + fn set(&mut self, index: usize, value: V) { + self.0[index] = value; + } +} + +impl FieldArray for StdArray where Self: FromRandom + Serializable +{} + +impl TryFrom> for StdArray { + type Error = (); + fn try_from(value: Vec) -> Result { + value.try_into().map(Self).map_err(|_| ()) + } +} + +// Panics if the iterator terminates before producing N items. +impl FromIterator for StdArray +where + Self: Serializable, +{ + fn from_iter>(iter: T) -> Self { + let mut res = Self::ZERO; + let mut iter = iter.into_iter(); + + for i in 0..N { + res.0[i] = iter.next().unwrap(); + } + + res + } +} + +impl IntoIterator for StdArray { + type Item = V; + type IntoIter = std::array::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +impl<'a, 'b, V: SharedValue, const N: usize> Add<&'b StdArray> for &'a StdArray { + type Output = StdArray; + + fn add(self, rhs: &'b StdArray) -> Self::Output { + StdArray(array::from_fn(|i| self.0[i] + rhs.0[i])) + } +} + +impl Add for StdArray { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + Add::add(&self, &rhs) + } +} + +// add(owned, ref) should be preferred over this. +impl Add> for &StdArray { + type Output = StdArray; + + fn add(self, rhs: StdArray) -> Self::Output { + Add::add(self, &rhs) + } +} + +impl Add<&StdArray> for StdArray { + type Output = Self; + + fn add(self, rhs: &Self) -> Self::Output { + Add::add(&self, rhs) + } +} + +impl AddAssign<&Self> for StdArray { + fn add_assign(&mut self, rhs: &Self) { + for (a, b) in self.0.iter_mut().zip(rhs.0.iter()) { + *a += *b; + } + } +} + +impl AddAssign for StdArray { + fn add_assign(&mut self, rhs: Self) { + AddAssign::add_assign(self, &rhs); + } +} + +impl Neg for &StdArray { + type Output = StdArray; + + fn neg(self) -> Self::Output { + StdArray(array::from_fn(|i| -self.0[i])) + } +} + +impl Neg for StdArray { + type Output = Self; + + fn neg(self) -> Self::Output { + Neg::neg(&self) + } +} + +impl Sub for &StdArray { + type Output = StdArray; + + fn sub(self, rhs: Self) -> Self::Output { + StdArray(array::from_fn(|i| self.0[i] - rhs.0[i])) + } +} + +impl Sub for StdArray { + type Output = Self; + + fn sub(self, rhs: Self) -> Self::Output { + Sub::sub(&self, &rhs) + } +} + +impl Sub<&Self> for StdArray { + type Output = Self; + + fn sub(self, rhs: &Self) -> Self::Output { + Sub::sub(&self, rhs) + } +} + +impl Sub> for &StdArray { + type Output = StdArray; + + fn sub(self, rhs: StdArray) -> Self::Output { + Sub::sub(self, &rhs) + } +} + +impl SubAssign<&Self> for StdArray { + fn sub_assign(&mut self, rhs: &Self) { + for (a, b) in self.0.iter_mut().zip(rhs.0.iter()) { + *a -= *b; + } + } +} + +impl SubAssign for StdArray { + fn sub_assign(&mut self, rhs: Self) { + SubAssign::sub_assign(self, &rhs); + } +} + +impl<'a, 'b, F: Field, const N: usize> Mul<&'b F> for &'a StdArray { + type Output = StdArray; + + fn mul(self, rhs: &'b F) -> Self::Output { + StdArray(array::from_fn(|i| self.0[i] * *rhs)) + } +} + +impl Mul for StdArray { + type Output = Self; + + fn mul(self, rhs: F) -> Self::Output { + Mul::mul(&self, &rhs) + } +} + +impl Mul<&F> for StdArray { + type Output = Self; + + fn mul(self, rhs: &F) -> Self::Output { + Mul::mul(&self, rhs) + } +} + +impl Mul for &StdArray { + type Output = StdArray; + + fn mul(self, rhs: F) -> Self::Output { + Mul::mul(self, &rhs) + } +} + +impl<'a, F: Field, const N: usize> Mul<&'a StdArray> for StdArray { + type Output = StdArray; + + fn mul(self, rhs: &'a StdArray) -> Self::Output { + StdArray(array::from_fn(|i| self.0[i] * rhs.0[i])) + } +} + +impl, const N: usize> Not for StdArray { + type Output = StdArray; + + fn not(self) -> Self::Output { + StdArray(array::from_fn(|i| !self.0[i])) + } +} + +impl> FromRandom for StdArray { + type SourceLength = U1; + fn from_random(src: GenericArray) -> Self { + Self([F::from_random(src)]) + } +} + +impl FromRandom for StdArray { + type SourceLength = U32; + + fn from_random(src: GenericArray) -> Self { + Self(array::from_fn(|i| Fp32BitPrime::from_random_u128(src[i]))) + } +} + +impl Serializable for StdArray { + type Size = ::Size; + type DeserializationError = ::DeserializationError; + + fn serialize(&self, buf: &mut GenericArray) { + self.0[0].serialize(buf); + } + + fn deserialize(buf: &GenericArray) -> Result { + Ok(StdArray([V::deserialize(buf)?])) + } +} + +impl Serializable for StdArray +where + V: SharedValue, + ::Size: Mul, + <::Size as Mul>::Output: ArrayLength, +{ + type Size = <::Size as Mul>::Output; + type DeserializationError = ::DeserializationError; + + fn serialize(&self, buf: &mut GenericArray) { + let sz: usize = (::BITS / 8).try_into().unwrap(); + for i in 0..32 { + self.0[i].serialize( + GenericArray::try_from_mut_slice(&mut buf[sz * i..sz * (i + 1)]).unwrap(), + ); + } + } + + fn deserialize(buf: &GenericArray) -> Result { + let sz: usize = (::BITS / 8).try_into().unwrap(); + let mut res = [V::ZERO; 32]; + for i in 0..32 { + res[i] = V::deserialize(GenericArray::from_slice(&buf[sz * i..sz * (i + 1)]))?; + } + Ok(StdArray(res)) + } +} + +impl Message for StdArray where Self: Serializable {} diff --git a/ipa-core/src/secret_sharing/mod.rs b/ipa-core/src/secret_sharing/mod.rs index 42a62ca55..3b4923854 100644 --- a/ipa-core/src/secret_sharing/mod.rs +++ b/ipa-core/src/secret_sharing/mod.rs @@ -1,14 +1,62 @@ +//! # Vectorization +//! +//! Vectorization refers to adapting an implementation that previously operated on one value at a +//! time, to instead operate on `N` values at a time. Vectorization improves performance in two ways: +//! +//! 1. Vectorized code can make use of special CPU instructions (Intel AVX, ARM NEON) that operate +//! on multiple values at a time. This reduces the CPU time required to perform computations. +//! We also use vectorization to refer to "bit packing" of boolean values, i.e., packing +//! 64 boolean values into a single u64 rather than using a byte (or even a word) for each +//! value. +//! 2. Aside from the core arithmetic operations that are involved in our MPC, a substantial +//! amount of other code is needed to send values between helpers, schedule futures for +//! execution, etc. Vectorization can result in a greater amount of arithmetic work being +//! performed for a given amount of overhead work, thus increasing the efficiency of the +//! implementation. +//! +//! ## Vectorization traits +//! +//! There are two sets of traits related to vectorization. +//! +//! If you are writing protocols, the trait of interest is `FieldSimd`, which can be specified in +//! a trait bound, something like `F: Field + FieldSimd`. +//! +//! The other traits are `Vectorizable` (for `SharedValue`s) and `FieldVectorizable`. These traits +//! are needed to work around a limitation in the rust type system. See the `FieldVectorizable` +//! documentation for details. +//! +//! We require that each supported vectorization configuration (i.e. combination of data type and +//! width) be explicitly identified, by implementing the `Vectorizable` and/or `FieldVectorizable` +//! traits for base data type (e.g. `Fp32BitPrime`). This is for two reasons: +//! 1. Rust doesn't yet support evaluating expressions involving const parameters at compile time, +//! which makes it difficult or impossible to write generic serialization routines for +//! arbitrary widths. +//! 2. As a measure of protection against inadvertently using a configuration that will not be +//! efficient (i.e. an excessive vector width). +//! +//! ## Adding a new supported vectorization +//! +//! To add a new supported vectorization: +//! +//! 1. Add `FieldSimd` impl (in `secret_sharing/mod.rs`) +//! 2. Add `FromRandom` impl (in `array.rs` or `boolean_array.rs`) +//! 3. Add `Serializable` impl (in `array.rs` or `boolean_array.rs`) +//! 4. Add `Vectorizable` and `FieldVectorizable` impls (with the primitive type def in e.g. `galois_field.rs` + pub mod replicated; +mod array; mod decomposed; mod into_shares; mod scheme; use std::{ fmt::Debug, - ops::{Mul, MulAssign, Neg}, + iter::once, + ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign}, }; +pub use array::StdArray; pub use decomposed::BitDecomposed; use generic_array::ArrayLength; pub use into_shares::IntoShares; @@ -21,7 +69,11 @@ use rand::{ use replicated::{semi_honest::AdditiveShare, ReplicatedSecretSharing}; pub use scheme::{Bitwise, Linear, LinearRefOps, SecretSharing}; -use crate::ff::{AddSub, AddSubAssign, Serializable}; +use crate::{ + ff::{AddSub, AddSubAssign, Field, Fp32BitPrime, Serializable}, + helpers::Message, + protocol::prss::FromRandom, +}; /// Operations supported for weak shared values. pub trait Additive: @@ -57,13 +109,140 @@ pub trait Block: Sized + Copy + Debug { /// (capable of supporting addition and multiplication) is desired, the `Field` trait extends /// `SharedValue` to require multiplication. pub trait SharedValue: - Clone + Copy + Eq + Debug + Send + Sync + Sized + Additive + Serializable + 'static + Clone + + Copy + + Eq + + Debug + + Send + + Sync + + Sized + + Additive + + Serializable + + Vectorizable<1> + + 'static { type Storage: Block; const BITS: u32; const ZERO: Self; + + // Note the trait bound of `Vectorizable<1>` here, i.e., these + // helpers only apply to arrays of a single element. + fn into_array(self) -> A + where + Self: Vectorizable<1, Array = A>, + A: SharedValueArray, + { + once(self).collect::() + } + + fn from_array(array: &A) -> Self + where + Self: Vectorizable<1, Array = A>, + A: SharedValueArray, + { + array.get(0) + } + + fn from_array_mut(array: &mut A) -> &mut Self + where + Self: Vectorizable<1, Array = A>, + A: SharedValueArray, + { + array.get_mut(0) + } +} + +// Note that we can either make `trait Vectorizable: SharedValue`, or we can make `trait +// SharedValue: Vectorizable<1>`, but doing both creates a cycle. (Similarly for +// `FieldVectorizable` / `Field`.) +// +// Although it is somewhat unnatural, we choose to do the latter, because it allows existing +// high-level protocols unaware of vectorization to call vectorized versions of core protocols (with +// width of 1) without updating all of the trait bounds. This does mean that the trait definitions +// do not prevent implementing `Vectorizable` for something that is not a `SharedValue`, but please +// don't do that. + +/// Trait for `SharedValue`s supporting operations on `N`-wide vectors. +pub trait Vectorizable: Sized { + type Array: SharedValueArray; +} + +/// Trait for `Field`s supporting operations on `N`-wide vectors. +/// +/// We would like `F` to be `FieldVectorizable` if it satisfies all of the following: +/// 1. `F: Field`. +/// 2. `>::Array: FieldArray`. Rust does not support expressing a +/// constraint on a super-trait's associated type directly. Instead, this effect is achieved +/// by constraining the `ArrayAlias` associated type and then constraining that +/// `Vectorizable::Array == FieldVectorizable::ArrayAlias` where necessary (e.g. in the +/// definition and blanket impl of the `FieldSimd` trait. We call it `ArrayAlias` instead of +/// `Array` so that references to the `Array` associated type do not require qualification +/// with a trait name. +/// 3. `F: Vectorizable`. This is implied by the previous two, because `FieldArray` +/// is a sub-trait of `SharedValueArray`. +pub trait FieldVectorizable: SharedValue + Sized { + type ArrayAlias: FieldArray; +} + +// We could define a `SharedValueSimd` trait that is the analog of this for `SharedValue`s, but +// there are not currently any protocols that need it. +pub trait FieldSimd: + Field + Vectorizable>::ArrayAlias> + FieldVectorizable +{ +} + +// Portions of the implementation treat non-vectorized operations as a vector with `N = 1`. This +// blanket impl (and the fact that `F: Field` is the only trait bound) is important in allowing code +// that writes `F: Field` to continue working without modification. +impl FieldSimd<1> for F {} + +// Supported vectorizations + +impl FieldSimd<32> for Fp32BitPrime {} + +pub trait SharedValueArray: + Clone + + Eq + + Debug + + Send + + Sync + + Sized + + TryFrom, Error = ()> + + FromIterator + + IntoIterator + + Add + + for<'a> Add<&'a Self, Output = Self> + + AddAssign + + for<'a> AddAssign<&'a Self> + + Neg + + Sub + + for<'a> Sub<&'a Self, Output = Self> + + SubAssign + + for<'a> SubAssign<&'a Self> + + Message +{ + const ZERO: Self; + + fn from_fn V>(f: F) -> Self; + + fn get(&self, index: usize) -> V; + + fn get_mut(&mut self, index: usize) -> &mut V; + + fn set(&mut self, index: usize, value: V); +} + +// Some `SharedValue` types (and thus their arrays) implement `FromRandom`, but `RP25519` does not. +// We overload this distinction on `FieldArray` instead of creating a separate `ArrayFromRandom` trait, +// to avoid making the `Vectorizable` / `FieldVectorizable` situation that much more complicated. +pub trait FieldArray: + SharedValueArray + + FromRandom + + for<'a> Mul<&'a F, Output = Self> + + for<'a> Mul<&'a Self, Output = Self> +{ } #[cfg(any(test, feature = "test-fixture", feature = "cli"))] @@ -85,6 +264,29 @@ where } } +#[cfg(any(test, feature = "test-fixture", feature = "cli"))] +impl IntoShares> for [V; N] +where + V: SharedValue + Vectorizable, + Standard: Distribution, +{ + fn share_with(self, rng: &mut R) -> [AdditiveShare; 3] { + // For arrays large enough that the compiler doesn't just unroll everything, it might be + // more efficient to avoid the intermediate vector by implementing this as a specialized + // hybrid of the impls for `F as IntoShares>` and ` as + // IntoShares>`. Not bothering since this is test-support functionality. + let [v1, v2, v3] = self.into_iter().share_with(rng); + let (v1l, v1r): (Vec, Vec) = v1.iter().map(AdditiveShare::as_tuple).unzip(); + let (v2l, v2r): (Vec, Vec) = v2.iter().map(AdditiveShare::as_tuple).unzip(); + let (v3l, v3r): (Vec, Vec) = v3.iter().map(AdditiveShare::as_tuple).unzip(); + [ + AdditiveShare::new_arr(v1l.try_into().unwrap(), v1r.try_into().unwrap()), + AdditiveShare::new_arr(v2l.try_into().unwrap(), v2r.try_into().unwrap()), + AdditiveShare::new_arr(v3l.try_into().unwrap(), v3r.try_into().unwrap()), + ] + } +} + #[cfg(all(test, unit_test))] mod tests { use crate::{ diff --git a/ipa-core/src/secret_sharing/replicated/semi_honest/additive_share.rs b/ipa-core/src/secret_sharing/replicated/semi_honest/additive_share.rs index a1011fce6..319a438c7 100644 --- a/ipa-core/src/secret_sharing/replicated/semi_honest/additive_share.rs +++ b/ipa-core/src/secret_sharing/replicated/semi_honest/additive_share.rs @@ -1,6 +1,6 @@ use std::{ fmt::{Debug, Formatter}, - ops::{Add, AddAssign, Mul, Neg, Sub, SubAssign}, + ops::{Add, AddAssign, Mul, Neg, Range, Sub, SubAssign}, }; use generic_array::{ArrayLength, GenericArray}; @@ -9,23 +9,37 @@ use typenum::Unsigned; use crate::{ ff::{ArrayAccess, Expand, Field, Serializable}, secret_sharing::{ - replicated::ReplicatedSecretSharing, Linear as LinearSecretSharing, SecretSharing, - SharedValue, + replicated::ReplicatedSecretSharing, FieldSimd, Linear as LinearSecretSharing, + SecretSharing, SharedValue, SharedValueArray, Vectorizable, }, }; +/// Additive secret sharing. +/// +/// `AdditiveShare` holds two out of three shares of an additive secret sharing, either of a single +/// value with type `V`, or a vector of such values. #[derive(Clone, PartialEq, Eq)] -pub struct AdditiveShare(V, V); +pub struct AdditiveShare, const N: usize = 1>( + >::Array, + >::Array, +); #[derive(Clone, PartialEq, Eq)] -pub struct ASIterator(pub T, pub T); +pub struct ASIterator<'a, S: SharedValue + ArrayAccess> { + range: Range, + share: &'a AdditiveShare, +} -impl SecretSharing for AdditiveShare { - const ZERO: Self = AdditiveShare::ZERO; +impl, const N: usize> SecretSharing for AdditiveShare { + const ZERO: Self = Self( + >::Array::ZERO, + >::Array::ZERO, + ); } -impl LinearSecretSharing for AdditiveShare {} -impl Debug for AdditiveShare { +impl LinearSecretSharing for AdditiveShare where F: Field + FieldSimd {} + +impl + Debug, const N: usize> Debug for AdditiveShare { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "({:?}, {:?})", self.0, self.1) } @@ -37,26 +51,48 @@ impl Default for AdditiveShare { } } -impl AdditiveShare { - /// Replicated secret share where both left and right values are `F::ZERO` - pub const ZERO: Self = Self(V::ZERO, V::ZERO); +impl, const N: usize> AdditiveShare { + /// Replicated secret share where both left and right values are `V::ZERO` + pub const ZERO: Self = Self( + >::Array::ZERO, + >::Array::ZERO, + ); +} +impl AdditiveShare { pub fn as_tuple(&self) -> (V, V) { - (self.0, self.1) + (V::from_array(&self.0), V::from_array(&self.1)) } } -impl ReplicatedSecretSharing for AdditiveShare { +impl ReplicatedSecretSharing for AdditiveShare +where + V: SharedValue + Vectorizable<1>, +{ fn new(a: V, b: V) -> Self { - Self(a, b) + Self(a.into_array(), b.into_array()) } fn left(&self) -> V { - self.0 + V::from_array(&self.0) } fn right(&self) -> V { - self.1 + V::from_array(&self.1) + } +} + +impl, const N: usize> AdditiveShare { + pub fn new_arr(a: >::Array, b: >::Array) -> Self { + Self(a, b) + } + + pub fn left_arr(&self) -> &>::Array { + &self.0 + } + + pub fn right_arr(&self) -> &>::Array { + &self.1 } } @@ -86,15 +122,20 @@ where } } -impl<'a, 'b, V: SharedValue> Add<&'b AdditiveShare> for &'a AdditiveShare { - type Output = AdditiveShare; +impl<'a, 'b, V: SharedValue + Vectorizable, const N: usize> Add<&'b AdditiveShare> + for &'a AdditiveShare +{ + type Output = AdditiveShare; - fn add(self, rhs: &'b AdditiveShare) -> Self::Output { - AdditiveShare(self.0 + rhs.0, self.1 + rhs.1) + fn add(self, rhs: &'b AdditiveShare) -> Self::Output { + AdditiveShare( + Add::add(self.0.clone(), &rhs.0), + Add::add(self.1.clone(), &rhs.1), + ) } } -impl Add for AdditiveShare { +impl, const N: usize> Add for AdditiveShare { type Output = Self; fn add(self, rhs: Self) -> Self::Output { @@ -102,15 +143,19 @@ impl Add for AdditiveShare { } } -impl Add> for &AdditiveShare { - type Output = AdditiveShare; +impl, const N: usize> Add> + for &AdditiveShare +{ + type Output = AdditiveShare; - fn add(self, rhs: AdditiveShare) -> Self::Output { + fn add(self, rhs: AdditiveShare) -> Self::Output { Add::add(self, &rhs) } } -impl Add<&AdditiveShare> for AdditiveShare { +impl, const N: usize> Add<&AdditiveShare> + for AdditiveShare +{ type Output = Self; fn add(self, rhs: &Self) -> Self::Output { @@ -118,28 +163,28 @@ impl Add<&AdditiveShare> for AdditiveShare { } } -impl AddAssign<&Self> for AdditiveShare { +impl, const N: usize> AddAssign<&Self> for AdditiveShare { fn add_assign(&mut self, rhs: &Self) { - self.0 += rhs.0; - self.1 += rhs.1; + self.0 += &rhs.0; + self.1 += &rhs.1; } } -impl AddAssign for AdditiveShare { +impl, const N: usize> AddAssign for AdditiveShare { fn add_assign(&mut self, rhs: Self) { AddAssign::add_assign(self, &rhs); } } -impl Neg for &AdditiveShare { - type Output = AdditiveShare; +impl, const N: usize> Neg for &AdditiveShare { + type Output = AdditiveShare; fn neg(self) -> Self::Output { - AdditiveShare(-self.0, -self.1) + AdditiveShare(-self.0.clone(), -self.1.clone()) } } -impl Neg for AdditiveShare { +impl, const N: usize> Neg for AdditiveShare { type Output = Self; fn neg(self) -> Self::Output { @@ -147,15 +192,18 @@ impl Neg for AdditiveShare { } } -impl Sub for &AdditiveShare { - type Output = AdditiveShare; +impl, const N: usize> Sub for &AdditiveShare { + type Output = AdditiveShare; fn sub(self, rhs: Self) -> Self::Output { - AdditiveShare(self.0 - rhs.0, self.1 - rhs.1) + AdditiveShare( + Sub::sub(self.0.clone(), &rhs.0), + Sub::sub(self.1.clone(), &rhs.1), + ) } } -impl Sub for AdditiveShare { +impl, const N: usize> Sub for AdditiveShare { type Output = Self; fn sub(self, rhs: Self) -> Self::Output { @@ -163,7 +211,7 @@ impl Sub for AdditiveShare { } } -impl Sub<&Self> for AdditiveShare { +impl, const N: usize> Sub<&Self> for AdditiveShare { type Output = Self; fn sub(self, rhs: &Self) -> Self::Output { @@ -171,53 +219,64 @@ impl Sub<&Self> for AdditiveShare { } } -impl Sub> for &AdditiveShare { - type Output = AdditiveShare; +impl, const N: usize> Sub> + for &AdditiveShare +{ + type Output = AdditiveShare; - fn sub(self, rhs: AdditiveShare) -> Self::Output { + fn sub(self, rhs: AdditiveShare) -> Self::Output { Sub::sub(self, &rhs) } } -impl SubAssign<&Self> for AdditiveShare { +impl, const N: usize> SubAssign<&Self> for AdditiveShare { fn sub_assign(&mut self, rhs: &Self) { - self.0 -= rhs.0; - self.1 -= rhs.1; + self.0 -= &rhs.0; + self.1 -= &rhs.1; } } -impl SubAssign for AdditiveShare { +impl, const N: usize> SubAssign for AdditiveShare { fn sub_assign(&mut self, rhs: Self) { SubAssign::sub_assign(self, &rhs); } } -impl<'a, 'b, F: Field> Mul<&'b F> for &'a AdditiveShare { - type Output = AdditiveShare; +impl<'a, 'b, F, const N: usize> Mul<&'b F> for &'a AdditiveShare +where + F: Field + FieldSimd, +{ + type Output = AdditiveShare; fn mul(self, rhs: &'b F) -> Self::Output { - AdditiveShare(self.0 * *rhs, self.1 * *rhs) + AdditiveShare(self.0.clone() * rhs, self.1.clone() * rhs) } } -impl Mul for AdditiveShare { +impl Mul for AdditiveShare +where + F: Field + FieldSimd, +{ type Output = Self; fn mul(self, rhs: F) -> Self::Output { - Mul::mul(&self, &rhs) + Mul::mul(&self, rhs) } } -impl Mul<&F> for AdditiveShare { +impl<'a, F: Field + FieldSimd, const N: usize> Mul<&'a F> for AdditiveShare { type Output = Self; fn mul(self, rhs: &F) -> Self::Output { - Mul::mul(&self, rhs) + Mul::mul(&self, *rhs) } } -impl Mul for &AdditiveShare { - type Output = AdditiveShare; +impl Mul for &AdditiveShare +where + F: Field + FieldSimd, +{ + type Output = AdditiveShare; fn mul(self, rhs: F) -> Self::Output { Mul::mul(self, &rhs) @@ -230,11 +289,15 @@ impl From<(V, V)> for AdditiveShare { } } -impl + SharedValue> std::ops::Not for AdditiveShare { +impl std::ops::Not for AdditiveShare +where + V: SharedValue + Vectorizable, + >::Array: std::ops::Not>::Array>, +{ type Output = Self; fn not(self) -> Self::Output { - AdditiveShare(!(self.0), !(self.1)) + AdditiveShare(!self.0, !self.1) } } @@ -261,55 +324,79 @@ where } /// Implement `ArrayAccess` for `AdditiveShare` over `SharedValue` that implements `ArrayAccess` -impl ArrayAccess for AdditiveShare +// You can think of S as a Boolean array type and V as Boolean. +impl ArrayAccess for AdditiveShare where - S: ArrayAccess + SharedValue, - ::Output: SharedValue, + S: SharedValue + ArrayAccess, + V: SharedValue + Vectorizable<1, Array = A>, + A: SharedValueArray, { - type Output = AdditiveShare<::Output>; - type Iter<'a> = ASIterator>; + type Output = AdditiveShare; + type Iter<'a> = ASIterator<'a, S>; fn get(&self, index: usize) -> Option { - self.0 + S::from_array(&self.0) .get(index) - .zip(self.1.get(index)) - .map(|v| AdditiveShare(v.0, v.1)) + .zip(S::from_array(&self.1).get(index)) + .map(|v| AdditiveShare(v.0.into_array(), v.1.into_array())) } fn set(&mut self, index: usize, e: Self::Output) { - self.0.set(index, e.0); - self.1.set(index, e.1); + S::from_array_mut(&mut self.0).set(index, V::from_array(&e.0)); + S::from_array_mut(&mut self.1).set(index, V::from_array(&e.1)); } fn iter(&self) -> Self::Iter<'_> { - ASIterator(self.0.iter(), self.1.iter()) + ASIterator { + range: Range { + start: 0, + end: S::from_array(&self.0).iter().len(), + }, + share: self, + } } } -impl Expand for AdditiveShare +impl Expand for AdditiveShare where - S: Expand + SharedValue, - ::Input: SharedValue, + S: Expand + SharedValue + Vectorizable<1, Array = A>, + A: SharedValueArray, + T: SharedValue, { type Input = AdditiveShare<::Input>; fn expand(v: &Self::Input) -> Self { - AdditiveShare(S::expand(&v.0), S::expand(&v.1)) + AdditiveShare( + S::expand(&T::from_array(&v.0)).into_array(), + S::expand(&T::from_array(&v.1)).into_array(), + ) } } -impl Iterator for ASIterator +impl<'a, S, T> Iterator for ASIterator<'a, S> where - T: Iterator, - T::Item: SharedValue, + S: SharedValue + ArrayAccess, + T: SharedValue, { - type Item = AdditiveShare; + type Item = AdditiveShare; fn next(&mut self) -> Option { - match (self.0.next(), self.1.next()) { - (Some(left), Some(right)) => Some(AdditiveShare(left, right)), - _ => None, - } + self.range.next().map(|i| { + AdditiveShare( + S::from_array(&self.share.0).get(i).unwrap().into_array(), + S::from_array(&self.share.1).get(i).unwrap().into_array(), + ) + }) + } +} + +impl<'a, S> ExactSizeIterator for ASIterator<'a, S> +where + S: SharedValue + ArrayAccess, + ::Output: SharedValue, +{ + fn len(&self) -> usize { + self.range.len() } } @@ -370,8 +457,14 @@ mod tests { a3: &AdditiveShare, expected_value: u128, ) { - assert_eq!(a1.0 + a2.0 + a3.0, Fp31::truncate_from(expected_value)); - assert_eq!(a1.1 + a2.1 + a3.1, Fp31::truncate_from(expected_value)); + assert_eq!( + a1.left() + a2.left() + a3.left(), + Fp31::truncate_from(expected_value) + ); + assert_eq!( + a1.right() + a2.right() + a3.right(), + Fp31::truncate_from(expected_value) + ); } fn addition_test_case(a: (u8, u8, u8), b: (u8, u8, u8), expected_output: u128) { diff --git a/ipa-core/src/test_fixture/circuit.rs b/ipa-core/src/test_fixture/circuit.rs index 8e8e4ac99..1fce74ead 100644 --- a/ipa-core/src/test_fixture/circuit.rs +++ b/ipa-core/src/test_fixture/circuit.rs @@ -1,4 +1,5 @@ use futures_util::future::join_all; +use rand::distributions::{Distribution, Standard}; use super::join3v; use crate::{ @@ -10,18 +11,20 @@ use crate::{ RecordId, }, rand::thread_rng, - secret_sharing::{replicated::semi_honest::AdditiveShare as Replicated, IntoShares}, - test_fixture::{narrow_contexts, Reconstruct, TestWorld}, + secret_sharing::{replicated::semi_honest::AdditiveShare as Replicated, FieldSimd, IntoShares}, + test_fixture::{narrow_contexts, ReconstructArr, TestWorld}, }; /// Creates an arithmetic circuit with the given width and depth. /// /// # Panics /// panics when circuits did not produce the expected value. -pub async fn arithmetic(width: u32, depth: u8) +pub async fn arithmetic(width: u32, depth: u16) where - F: Field + IntoShares>, - for<'a> Replicated: SecureMul>, + F: Field + FieldSimd, + for<'a> Replicated: SecureMul>, + [F; N]: IntoShares>, + Standard: Distribution, { let world = TestWorld::default(); // Re-use contexts for the entire execution because record identifiers are contiguous. @@ -35,31 +38,40 @@ where #[allow(clippy::disallowed_methods)] // Just for testing purposes. let results = join_all(multiplications).await; - let mut sum = 0; + let mut sum = [0u128; N]; for line in results { - sum += line.reconstruct().as_u128(); + for (this_sum, this_value) in sum.iter_mut().zip(line.reconstruct_arr()) { + *this_sum += this_value.as_u128(); + } } - assert_eq!(sum, u128::from(width)); + assert_eq!(sum, [u128::from(width); N]); } -async fn circuit<'a, F>( +async fn circuit<'a, F, const N: usize>( top_ctx: &[SemiHonestContext<'a>; 3], record_id: RecordId, - depth: u8, -) -> [Replicated; 3] + depth: u16, +) -> [Replicated; 3] where - F: Field + IntoShares>, - Replicated: SecureMul>, + F: Field + FieldSimd, + Replicated: SecureMul>, + [F; N]: IntoShares>, { - let mut a = F::ONE.share_with(&mut thread_rng()); + assert_eq!( + depth % u16::try_from(N).unwrap(), + 0, + "depth must be a multiple of vectorization factor" + ); + + let mut a = [F::ONE; N].share_with(&mut thread_rng()); - for bit in 0..depth { - let b = F::ONE.share_with(&mut thread_rng()); - let bit_ctx = narrow_contexts(top_ctx, &format!("b{bit}")); + for stripe in 0..(depth / u16::try_from(N).unwrap()) { + let b = [F::ONE; N].share_with(&mut thread_rng()); + let stripe_ctx = narrow_contexts(top_ctx, &format!("s{stripe}")); a = async move { let mut coll = Vec::new(); - for (i, ctx) in bit_ctx.iter().enumerate() { + for (i, ctx) in stripe_ctx.iter().enumerate() { let mul = a[i].multiply( &b[i], ctx.narrow("mult") diff --git a/ipa-core/src/test_fixture/mod.rs b/ipa-core/src/test_fixture/mod.rs index acfb8f853..e383d4db0 100644 --- a/ipa-core/src/test_fixture/mod.rs +++ b/ipa-core/src/test_fixture/mod.rs @@ -23,7 +23,7 @@ pub use event_gen::{Config as EventGeneratorConfig, EventGenerator}; use futures::TryFuture; use rand::{distributions::Standard, prelude::Distribution, rngs::mock::StepRng}; use rand_core::{CryptoRng, RngCore}; -pub use sharing::{get_bits, into_bits, Reconstruct}; +pub use sharing::{get_bits, into_bits, Reconstruct, ReconstructArr}; #[cfg(feature = "in-memory-infra")] pub use world::{Runner, TestWorld, TestWorldConfig}; diff --git a/ipa-core/src/test_fixture/sharing.rs b/ipa-core/src/test_fixture/sharing.rs index a9ac85cf3..95eba9902 100644 --- a/ipa-core/src/test_fixture/sharing.rs +++ b/ipa-core/src/test_fixture/sharing.rs @@ -9,7 +9,7 @@ use crate::{ semi_honest::AdditiveShare as Replicated, ReplicatedSecretSharing, }, - BitDecomposed, SecretSharing, + BitDecomposed, FieldSimd, SecretSharing, Vectorizable, }, }; @@ -20,7 +20,7 @@ pub fn into_bits(v: F) -> BitDecomposed { }) } -/// Deconstructs a value into N values, one for each bi3t. +/// Deconstructs a value into N values, one for each bit. /// # Panics /// It won't #[must_use] @@ -37,6 +37,19 @@ pub trait Reconstruct { fn reconstruct(&self) -> T; } +/// Alternate version of `Reconstruct` for vectors. +/// +/// There is no difference in the traits, but this avoids having to add +/// type annotations everywhere to disambiguate whether a single-bit +/// result should be reconstructed as `F` or `[F; 1]`. +pub trait ReconstructArr { + /// Validates correctness of the secret sharing scheme. + /// + /// # Panics + /// Panics if the given input is not a valid replicated secret share. + fn reconstruct_arr(&self) -> T; +} + impl Reconstruct for [&Replicated; 3] { fn reconstruct(&self) -> F { let s0 = &self[0]; @@ -62,6 +75,27 @@ impl Reconstruct for [Replicated; 3] { } } +impl, const N: usize> ReconstructArr<>::Array> + for [Replicated; 3] +{ + fn reconstruct_arr(&self) -> >::Array { + let s0l = self[0].left_arr(); + let s0r = self[0].right_arr(); + let s1l = self[1].left_arr(); + let s1r = self[1].right_arr(); + let s2l = self[2].left_arr(); + let s2r = self[2].right_arr(); + + assert_eq!(s0l.clone() + s1l + s2l, s0r.clone() + s1r + s2r); + + assert_eq!(s0r, s1l); + assert_eq!(s1r, s2l); + assert_eq!(s2r, s0l); + + s0l.clone() + s1l + s2l + } +} + impl Reconstruct<(V, W)> for [(T, U); 3] where for<'t> [&'t T; 3]: Reconstruct,