Skip to content

Commit

Permalink
Merge pull request #973 from andyleiserson/transpose
Browse files Browse the repository at this point in the history
Bit matrix transposes for converting to/from vectorized shares
  • Loading branch information
andyleiserson authored Mar 19, 2024
2 parents d4b18bb + a4a2aa2 commit f707f06
Show file tree
Hide file tree
Showing 11 changed files with 1,142 additions and 36 deletions.
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ members = ["ipa-core", "ipa-macros"]
incremental = true
lto = "thin"

[profile.release-max]
inherits = "release"
codegen-units = 1

[profile.bench-dhat]
inherits = "bench"
incremental = true
Expand Down
5 changes: 5 additions & 0 deletions ipa-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,11 @@ path = "benches/oneshot/ipa.rs"
harness = false
required-features = ["enable-benches", "descriptive-gate"]

[[bench]]
name = "transpose"
harness = false
required-features = ["enable-benches"]

[[test]]
name = "helper_networks"
required-features = [
Expand Down
114 changes: 114 additions & 0 deletions ipa-core/benches/transpose.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
//! Benchmarks for bit matrix transpose operations.
//!
//! Some of these routines run very fast, which doesn't work well with the default Criterion settings.
//! The warm up time and measurement time are reduced, because the defaults will produce a very large
//! number of samples (which in turn will take Criterion a long time to analyze).
//!
//! Some of the benchmark routines are looped so that the running time is long enough for Criterion
//! to measure reliably. When too short, Criterion complains that some measurements take zero time.
//! Presumably, the behavior of the underlying system clock is a contributing factor here.
//!
//! There is also a panic in the `plotters` crate used by Criterion to produce HTML reports that can
//! occur with very fast-running routines. This can be worked around by passing the `-n` option to
//! Criterion to disable HTML reports.
use std::{array, iter::repeat_with, time::Duration};

use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput};
use ipa_core::{
error::UnwrapInfallible,
ff::boolean_array::BA64,
secret_sharing::{
vector::{transpose_16x16, transpose_8x8},
SharedValue, TransposeFrom,
},
};
use rand::{
distributions::{Distribution, Standard},
thread_rng, Rng,
};

fn random_array<T, const N: usize>() -> [T; N]
where
Standard: Distribution<T>,
{
let mut rng = thread_rng();
array::from_fn(|_| rng.gen())
}

struct Params {
rows: usize,
cols: usize,
iters: usize,
}

fn do_benchmark<O, T, const N: usize>(
c: &mut Criterion,
Params { rows, cols, iters }: Params,
routine: fn(&[T; N]) -> O,
) where
Standard: Distribution<T>,
{
let mut group = c.benchmark_group(format!("{rows}x{cols}"));
group.warm_up_time(Duration::from_millis(200));
group.measurement_time(Duration::from_millis(200));
group.throughput(Throughput::Elements((rows * cols * iters) as u64));

group.bench_with_input(
BenchmarkId::new("transpose", format!("{iters}x")),
&(),
move |b, _| {
b.iter_batched_ref(
|| repeat_with(random_array).take(iters).collect::<Vec<_>>(),
|input| input.iter().map(routine).count(),
BatchSize::SmallInput,
)
},
);
group.finish();
}

fn bench_8x8(c: &mut Criterion) {
do_benchmark(
c,
Params {
rows: 8,
cols: 8,
iters: 100,
},
|m| transpose_8x8(m),
);
}

fn bench_16x16(c: &mut Criterion) {
do_benchmark(
c,
Params {
rows: 16,
cols: 16,
iters: 50,
},
transpose_16x16,
);
}

fn bench_64x64(c: &mut Criterion) {
do_benchmark(
c,
Params {
rows: 64,
cols: 64,
iters: 1,
},
|src| {
let mut dst = array::from_fn(|_| BA64::ZERO);
dst.transpose_from(src).unwrap_infallible();
dst
},
);
}

criterion_group!(benches_8x8, bench_8x8);
criterion_group!(benches_16x16, bench_16x16);
criterion_group!(benches_64x64, bench_64x64);
criterion_main!(benches_8x8, benches_16x16, benches_64x64);
21 changes: 21 additions & 0 deletions ipa-core/src/ff/boolean_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,18 @@ macro_rules! boolean_array_impl {
impl $name {
#[cfg(all(test, unit_test))]
const STORE_LEN: usize = bitvec::mem::elts::<u8>($bits);

#[inline]
#[must_use]
pub fn as_raw_slice(&self) -> &[u8] {
self.0.as_raw_slice()
}

#[inline]
#[must_use]
pub fn as_raw_mut_slice(&mut self) -> &mut [u8] {
self.0.as_raw_mut_slice()
}
}

impl ArrayAccess for $name {
Expand Down Expand Up @@ -736,12 +748,21 @@ boolean_array_impl_small!(boolean_array_5, BA5, 5, fallible);
boolean_array_impl_small!(boolean_array_6, BA6, 6, fallible);
boolean_array_impl_small!(boolean_array_7, BA7, 7, fallible);
boolean_array_impl_small!(boolean_array_8, BA8, 8, infallible);
boolean_array_impl_small!(boolean_array_16, BA16, 16, infallible);
boolean_array_impl_small!(boolean_array_20, BA20, 20, fallible);
boolean_array_impl_small!(boolean_array_32, BA32, 32, infallible);
boolean_array_impl_small!(boolean_array_64, BA64, 64, infallible);
boolean_array_impl_small!(boolean_array_112, BA112, 112, infallible);
boolean_array_impl!(boolean_array_256, BA256, 256, infallible);

impl Vectorizable<256> for BA64 {
type Array = StdArray<BA64, 256>;
}

impl Vectorizable<256> for BA256 {
type Array = StdArray<BA256, 256>;
}

// used to convert into Fp25519
impl From<(u128, u128)> for BA256 {
fn from(value: (u128, u128)) -> Self {
Expand Down
20 changes: 18 additions & 2 deletions ipa-core/src/secret_sharing/decomposed.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
use std::{fmt::Debug, ops::Deref, slice};
use std::{
fmt::Debug,
ops::{Deref, DerefMut},
slice,
};

use crate::{
error::Error,
Expand All @@ -12,7 +16,7 @@ pub struct BitDecomposed<S> {
}

impl<S> BitDecomposed<S> {
const MAX: usize = 64;
const MAX: usize = 256;

/// Create a new value from an iterator.
/// # Panics
Expand Down Expand Up @@ -99,6 +103,12 @@ impl<S> BitDecomposed<S> {
}
}

impl<S: Clone> BitDecomposed<S> {
pub fn resize(&mut self, new_len: usize, value: S) {
self.bits.resize(new_len, value);
}
}

impl<S> TryFrom<Vec<S>> for BitDecomposed<S> {
type Error = Error;
fn try_from(bits: Vec<S>) -> Result<Self, Self::Error> {
Expand Down Expand Up @@ -148,6 +158,12 @@ impl<S> Deref for BitDecomposed<S> {
}
}

impl<S> DerefMut for BitDecomposed<S> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.bits
}
}

impl<S> IntoIterator for BitDecomposed<S> {
type Item = S;
type IntoIter = <Vec<S> as IntoIterator>::IntoIter;
Expand Down
6 changes: 5 additions & 1 deletion ipa-core/src/secret_sharing/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ pub mod replicated;
mod decomposed;
mod into_shares;
mod scheme;
#[cfg(not(feature = "enable-benches"))]
mod vector;
#[cfg(feature = "enable-benches")]
pub mod vector;

use std::{
fmt::Debug,
Expand All @@ -20,7 +23,8 @@ use rand::{
};
pub use scheme::{Bitwise, Linear, LinearRefOps, SecretSharing};
pub use vector::{
FieldArray, FieldSimd, FieldVectorizable, SharedValueArray, StdArray, Vectorizable,
FieldArray, FieldSimd, FieldVectorizable, SharedValueArray, StdArray, TransposeFrom,
Vectorizable,
};

#[cfg(any(test, feature = "test-fixture", feature = "cli"))]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,28 @@ impl<V: SharedValue + Vectorizable<N>, const N: usize> AdditiveShare<V, N> {
pub fn right_arr(&self) -> &<V as Vectorizable<N>>::Array {
&self.1
}

pub(in crate::secret_sharing) fn left_arr_mut(&mut self) -> &mut <V as Vectorizable<N>>::Array {
&mut self.0
}

pub(in crate::secret_sharing) fn right_arr_mut(
&mut self,
) -> &mut <V as Vectorizable<N>>::Array {
&mut self.1
}

pub fn into_arr_tuple(self) -> (<V as Vectorizable<N>>::Array, <V as Vectorizable<N>>::Array) {
let Self(left, right) = self;
(left, right)
}

pub fn from_fns<LF: FnMut(usize) -> V, RF: FnMut(usize) -> V>(lf: LF, rf: RF) -> Self {
Self(
<V as Vectorizable<N>>::Array::from_fn(lf),
<V as Vectorizable<N>>::Array::from_fn(rf),
)
}
}

impl<V: SharedValue> AdditiveShare<V>
Expand Down
85 changes: 53 additions & 32 deletions ipa-core/src/secret_sharing/vector/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ use std::{
};

use generic_array::{ArrayLength, GenericArray};
use typenum::U32;
use typenum::{U16, U256, U32, U64};

use crate::{
error::LengthError,
ff::{Field, Fp32BitPrime, Serializable},
protocol::prss::{FromRandom, FromRandomU128},
protocol::prss::FromRandom,
secret_sharing::{FieldArray, Sendable, SharedValue, SharedValueArray},
};

Expand Down Expand Up @@ -277,14 +277,24 @@ impl<F: SharedValue + FromRandom> FromRandom for StdArray<F, 1> {
}
}

impl FromRandom for StdArray<Fp32BitPrime, 32> {
type SourceLength = U32;
macro_rules! impl_from_random {
($value_ty:ty, $width:expr, $source_len:ty, $item_len:expr) => {
impl FromRandom for StdArray<$value_ty, $width> {
type SourceLength = $source_len;

fn from_random(src: GenericArray<u128, U32>) -> Self {
Self(array::from_fn(|i| Fp32BitPrime::from_random_u128(src[i])))
}
fn from_random(src: GenericArray<u128, Self::SourceLength>) -> Self {
Self(array::from_fn(|i| {
<$value_ty>::from_random(
GenericArray::from_slice(&src[$item_len * i..$item_len * (i + 1)]).clone(),
)
}))
}
}
};
}

impl_from_random!(Fp32BitPrime, 32, U32, 1);

impl<V: SharedValue> Serializable for StdArray<V, 1> {
type Size = <V as Serializable>::Size;
type DeserializationError = <V as Serializable>::DeserializationError;
Expand All @@ -298,34 +308,45 @@ impl<V: SharedValue> Serializable for StdArray<V, 1> {
}
}

impl<V: SharedValue> Serializable for StdArray<V, 32>
where
V: SharedValue,
<V as Serializable>::Size: Mul<U32>,
<<V as Serializable>::Size as Mul<U32>>::Output: ArrayLength,
{
type Size = <<V as Serializable>::Size as Mul<U32>>::Output;
type DeserializationError = <V as Serializable>::DeserializationError;

fn serialize(&self, buf: &mut GenericArray<u8, Self::Size>) {
let sz: usize = (<V as SharedValue>::BITS / 8).try_into().unwrap();
for i in 0..32 {
self.0[i].serialize(
GenericArray::try_from_mut_slice(&mut buf[sz * i..sz * (i + 1)]).unwrap(),
);
}
}

fn deserialize(buf: &GenericArray<u8, Self::Size>) -> Result<Self, Self::DeserializationError> {
let sz: usize = (<V as SharedValue>::BITS / 8).try_into().unwrap();
let mut res = [V::ZERO; 32];
for i in 0..32 {
res[i] = V::deserialize(GenericArray::from_slice(&buf[sz * i..sz * (i + 1)]))?;
macro_rules! impl_serializable {
($width:expr, $width_ty:ty) => {
impl<V: SharedValue> Serializable for StdArray<V, $width>
where
V: SharedValue,
<V as Serializable>::Size: Mul<$width_ty>,
<<V as Serializable>::Size as Mul<$width_ty>>::Output: ArrayLength,
{
type Size = <<V as Serializable>::Size as Mul<$width_ty>>::Output;
type DeserializationError = <V as Serializable>::DeserializationError;

fn serialize(&self, buf: &mut GenericArray<u8, Self::Size>) {
let sz: usize = (<V as SharedValue>::BITS / 8).try_into().unwrap();
for i in 0..$width {
self.0[i].serialize(
GenericArray::try_from_mut_slice(&mut buf[sz * i..sz * (i + 1)]).unwrap(),
);
}
}

fn deserialize(
buf: &GenericArray<u8, Self::Size>,
) -> Result<Self, Self::DeserializationError> {
let sz: usize = (<V as SharedValue>::BITS / 8).try_into().unwrap();
let mut res = [V::ZERO; $width];
for i in 0..$width {
res[i] = V::deserialize(GenericArray::from_slice(&buf[sz * i..sz * (i + 1)]))?;
}
Ok(StdArray(res))
}
}
Ok(StdArray(res))
}
};
}

impl_serializable!(16, U16);
impl_serializable!(32, U32);
impl_serializable!(64, U64);
impl_serializable!(256, U256);

#[cfg(all(test, unit_test))]
mod test {
use std::iter;
Expand Down
Loading

0 comments on commit f707f06

Please sign in to comment.