Skip to content

Commit

Permalink
chacha: Move x86 CPU capability checks to Rust (Merge BoringSSL 6d0caa1)
Browse files Browse the repository at this point in the history
  • Loading branch information
briansmith committed Jan 21, 2025
2 parents 95eac8e + 6d0caa1 commit ea7502a
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 28 deletions.
30 changes: 8 additions & 22 deletions crypto/chacha/asm/chacha-x86.pl
Original file line number Diff line number Diff line change
Expand Up @@ -114,26 +114,10 @@ sub QUARTERROUND {
($d,$d_)=($d_,$d);
}

&static_label("ssse3_shortcut");
&static_label("ssse3_data");
&static_label("pic_point");

&function_begin("ChaCha20_ctr32");
&xor ("eax","eax");
&cmp ("eax",&wparam(2)); # len==0?
&je (&label("no_data"));
if ($xmm) {
&call (&label("pic_point"));
&set_label("pic_point");
&blindpop("eax");
&picmeup("ebp","OPENSSL_ia32cap_P","eax",&label("pic_point"));
&test (&DWP(0,"ebp"),1<<24); # test FXSR bit
&jz (&label("x86"));
&test (&DWP(4,"ebp"),1<<9); # test SSSE3 bit
&jz (&label("x86"));
&jmp (&label("ssse3_shortcut"));
&set_label("x86");
}
&function_begin("ChaCha20_ctr32_nohw");
&mov ("esi",&wparam(3)); # key
&mov ("edi",&wparam(4)); # counter and nonce

Expand Down Expand Up @@ -355,8 +339,7 @@ sub QUARTERROUND {

&set_label("done");
&stack_pop(33);
&set_label("no_data");
&function_end("ChaCha20_ctr32");
&function_end("ChaCha20_ctr32_nohw");

if ($xmm) {
my ($xa,$xa_,$xb,$xb_,$xc,$xc_,$xd,$xd_)=map("xmm$_",(0..7));
Expand Down Expand Up @@ -428,8 +411,11 @@ sub QUARTERROUND_SSSE3 {
($xd,$xd_)=($xd_,$xd);
}

&function_begin("_ChaCha20_ssse3");
&set_label("ssse3_shortcut");
&function_begin("ChaCha20_ctr32_ssse3");
&call (&label("pic_point"));
&set_label("pic_point");
&blindpop("eax");

&mov ($out,&wparam(0));
&mov ($inp,&wparam(1));
&mov ($len,&wparam(2));
Expand Down Expand Up @@ -751,7 +737,7 @@ sub SSSE3ROUND { # critical path is 20 "SIMD ticks" per round
}
&set_label("done");
&mov ("esp",&DWP(512,"esp"));
&function_end("_ChaCha20_ssse3");
&function_end("ChaCha20_ctr32_ssse3");

&align (64);
&set_label("ssse3_data");
Expand Down
18 changes: 12 additions & 6 deletions src/aead/chacha.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,6 @@ impl Key {

#[inline(always)]
pub(super) fn encrypt(&self, counter: Counter, in_out: Overlapping<'_>, cpu: cpu::Features) {
// XXX: The x86 and at least one branch of the ARM assembly language
// code doesn't allow overlapping input and output unless they are
// "in place". See https://rt.openssl.org/Ticket/Display.html?id=4362.
cfg_if! {
if #[cfg(all(target_arch = "aarch64", target_endian = "little"))] {
use cpu::{GetFeature as _, arm::Neon};
Expand Down Expand Up @@ -116,9 +113,18 @@ impl Key {
self, counter, in_out.copy_within(), ())
}
} else if #[cfg(target_arch = "x86")] {
chacha20_ctr32_ffi!(
unsafe { (0, cpu::Features, &mut [u8]) => ChaCha20_ctr32 },
self, counter, in_out.copy_within(), cpu)
use cpu::{GetFeature as _, intel::{Fxsr, Ssse3}};
if in_out.len() >= 1 {
if let Some(cpu) = cpu.get_feature() {
chacha20_ctr32_ffi!(
unsafe { (1, (Fxsr, Ssse3), &mut [u8]) => ChaCha20_ctr32_ssse3 },
self, counter, in_out.copy_within(), cpu)
} else {
chacha20_ctr32_ffi!(
unsafe { (1, (), &mut [u8]) => ChaCha20_ctr32_nohw },
self, counter, in_out.copy_within(), ())
}
}
} else if #[cfg(target_arch = "x86_64")] {
use cpu::{GetFeature, intel::{Avx2, Ssse3}};
const SSE_MIN_LEN: usize = 128 + 1; // Also AVX2, SSSE3_4X, SSSE3
Expand Down

0 comments on commit ea7502a

Please sign in to comment.