From 9cf52b7e2cfe0d84d228c347e91944b4011a0e26 Mon Sep 17 00:00:00 2001
From: Brian Smith <brian@briansmith.org>
Date: Sun, 19 Jan 2025 17:08:51 -0800
Subject: [PATCH] arithmetic internals: Restore old signature of bn_mul_mont.

In BoringSSL, it returns `int`. Have it do that again, to make the
next bit of refactoring easier, where we replace `bn_mul_mont` with a
Rust function that dispatches to a bunch of other functions, all of
which also are declared as having the same semantics.

The good thing is that this restores the assembly code to match
BoringSSL.

The bad thing is that this some bits of dead code to handle zero
return values that we'll never see.
---
 crypto/fipsmodule/bn/asm/x86-mont.pl    |  3 +++
 crypto/fipsmodule/bn/asm/x86_64-mont.pl |  2 +-
 crypto/fipsmodule/bn/internal.h         | 21 +++++++------------
 crypto/fipsmodule/ec/gfp_p256.c         |  5 ++++-
 crypto/fipsmodule/ec/gfp_p384.c         | 10 +++++++--
 src/arithmetic/ffi.rs                   | 18 +++++++++++------
 src/arithmetic/montgomery.rs            |  3 ++-
 src/bssl.rs                             | 27 ++++++++++++++++++-------
 src/prefixed.rs                         |  5 +++--
 9 files changed, 60 insertions(+), 34 deletions(-)

diff --git a/crypto/fipsmodule/bn/asm/x86-mont.pl b/crypto/fipsmodule/bn/asm/x86-mont.pl
index eccefa051c..6f7e25c7dc 100755
--- a/crypto/fipsmodule/bn/asm/x86-mont.pl
+++ b/crypto/fipsmodule/bn/asm/x86-mont.pl
@@ -68,6 +68,8 @@
 
 	&xor	("eax","eax");
 	&mov	("edi",&wparam(5));	# int num
+	&cmp	("edi",4);
+	&jl	(&label("just_leave"));
 
 	&lea	("esi",&wparam(0));	# put aside pointer to argument block
 	&lea	("edx",&wparam(1));	# load ap
@@ -327,6 +329,7 @@
 
 	&mov	("esp",$_sp);		# pull saved stack pointer
 	&mov	("eax",1);
+&set_label("just_leave");
 &function_end("bn_mul_mont");
 
 &asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont.pl b/crypto/fipsmodule/bn/asm/x86_64-mont.pl
index ce89b17679..be4c69b55a 100755
--- a/crypto/fipsmodule/bn/asm/x86_64-mont.pl
+++ b/crypto/fipsmodule/bn/asm/x86_64-mont.pl
@@ -65,7 +65,7 @@
 # output, so this isn't useful anyway.
 $addx = 1;
 
-# void bn_mul_mont(
+# int bn_mul_mont(
 $rp="%rdi";	# BN_ULONG *rp,
 $ap="%rsi";	# const BN_ULONG *ap,
 $bp="%rdx";	# const BN_ULONG *bp,
diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h
index 3fbb7d7521..309f8f744c 100644
--- a/crypto/fipsmodule/bn/internal.h
+++ b/crypto/fipsmodule/bn/internal.h
@@ -165,23 +165,16 @@ typedef crypto_word_t BN_ULONG;
 #error "Must define either OPENSSL_32_BIT or OPENSSL_64_BIT"
 #endif
 
-
-// |num| must be at least 4, at least on x86.
-//
-// In other forks, |bn_mul_mont| returns an |int| indicating whether it
-// actually did the multiplication. All our implementations always do the
-// multiplication, and forcing callers to deal with the possibility of it
-// failing just leads to further problems.
-//
-// In other forks, |bn_mod_mul|'s `num` argument has type |int| but it is
-// implicitly treated as a |size_t|; when |int| is smaller than |size_t|
-// then the |movq 48(%rsp),%r9| done by x86_64-xlate.pl implicitly does the
-// conversion.
+// See `bn_mul_mont_ffi` and `_MAX_LIMBS_ADDRESSES_MEMORY_SAFETY_ISSUES`.
 OPENSSL_STATIC_ASSERT(sizeof(int) == sizeof(size_t) ||
                       (sizeof(int) == 4 && sizeof(size_t) == 8),
                       "int and size_t ABI mismatch");
-void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
-                 const BN_ULONG *np, const BN_ULONG *n0, size_t num);
+// |num| must be at least 4, at least on x86.
+// BoringSSL documents the return value as being 1 or 0, but some
+// implementations (e.g. 32-bit ARM) return different values on
+// success.
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+                const BN_ULONG *np, const BN_ULONG *n0, size_t num);
 
 static inline void bn_umult_lohi(BN_ULONG *low_out, BN_ULONG *high_out,
                                  BN_ULONG a, BN_ULONG b) {
diff --git a/crypto/fipsmodule/ec/gfp_p256.c b/crypto/fipsmodule/ec/gfp_p256.c
index d82938ed52..837366e548 100644
--- a/crypto/fipsmodule/ec/gfp_p256.c
+++ b/crypto/fipsmodule/ec/gfp_p256.c
@@ -39,7 +39,10 @@ static const BN_ULONG N_N0[] = {
 void p256_scalar_mul_mont(ScalarMont r, const ScalarMont a,
                               const ScalarMont b) {
   /* XXX: Inefficient. TODO: optimize with dedicated multiplication routine. */
-  bn_mul_mont(r, a, b, N, N_N0, P256_LIMBS);
+  debug_assert_nonsecret(P256_LIMBS >= 4);
+  int res = bn_mul_mont(r, a, b, N, N_N0, P256_LIMBS);
+  debug_assert_nonsecret(res != 0);
+  (void)res;
 }
 
 /* XXX: Inefficient. TODO: optimize with dedicated squaring routine. */
diff --git a/crypto/fipsmodule/ec/gfp_p384.c b/crypto/fipsmodule/ec/gfp_p384.c
index 90065eaeb0..39a6bf136e 100644
--- a/crypto/fipsmodule/ec/gfp_p384.c
+++ b/crypto/fipsmodule/ec/gfp_p384.c
@@ -170,7 +170,10 @@ static void elem_div_by_2(Elem r, const Elem a) {
 
 static inline void elem_mul_mont(Elem r, const Elem a, const Elem b) {
   /* XXX: Not (clearly) constant-time; inefficient.*/
-  bn_mul_mont(r, a, b, Q, Q_N0, P384_LIMBS);
+  debug_assert_nonsecret(P384_LIMBS >= 4);
+  int res = bn_mul_mont(r, a, b, Q, Q_N0, P384_LIMBS);
+  debug_assert_nonsecret(res != 0);
+  (void)res;
 }
 
 static inline void elem_mul_by_2(Elem r, const Elem a) {
@@ -215,7 +218,10 @@ void p384_elem_neg(Elem r, const Elem a) {
 void p384_scalar_mul_mont(ScalarMont r, const ScalarMont a,
                               const ScalarMont b) {
   /* XXX: Inefficient. TODO: Add dedicated multiplication routine. */
-  bn_mul_mont(r, a, b, N, N_N0, P384_LIMBS);
+  debug_assert_nonsecret(P384_LIMBS >= 4);
+  int res = bn_mul_mont(r, a, b, N, N_N0, P384_LIMBS);
+  debug_assert_nonsecret(res != 0);
+  (void)res;
 }
 
 
diff --git a/src/arithmetic/ffi.rs b/src/arithmetic/ffi.rs
index b3566546d1..bf9549ab79 100644
--- a/src/arithmetic/ffi.rs
+++ b/src/arithmetic/ffi.rs
@@ -13,7 +13,7 @@
 // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
 use super::{inout::AliasingSlices, n0::N0, LimbSliceError, MAX_LIMBS, MIN_LIMBS};
-use crate::{c, limb::Limb, polyfill::usize_from_u32};
+use crate::{c, error, limb::Limb, polyfill::usize_from_u32};
 use core::mem::size_of;
 
 const _MAX_LIMBS_ADDRESSES_MEMORY_SAFETY_ISSUES: () = {
@@ -41,7 +41,7 @@ macro_rules! bn_mul_mont_ffi {
                 n: *const Limb,
                 n0: &N0,
                 len: c::size_t,
-            );
+            ) -> crate::bssl::Result;
         }
         unsafe {
             crate::arithmetic::ffi::bn_mul_mont_ffi::<$Cpu, { $MIN_LEN }>(
@@ -64,7 +64,7 @@ pub(super) unsafe fn bn_mul_mont_ffi<Cpu, const MIN_LEN: usize>(
         n: *const Limb,
         n0: &N0,
         len: c::size_t,
-    ),
+    ) -> crate::bssl::Result,
 ) -> Result<(), LimbSliceError> {
     /// The x86 implementation of `bn_mul_mont`, at least, requires at least 4
     /// limbs. For a long time we have required 4 limbs for all targets, though
@@ -81,12 +81,18 @@ pub(super) unsafe fn bn_mul_mont_ffi<Cpu, const MIN_LEN: usize>(
         return Err(LimbSliceError::too_long(n.len()));
     }
 
+    let len = n.len();
     in_out
         .with_pointers(n.len(), |r, a, b| {
-            let len = n.len();
             let n = n.as_ptr();
             let _: Cpu = cpu;
-            unsafe { f(r, a, b, n, n0, len) };
+            let result = unsafe { f(r, a, b, n, n0, len) };
+            Result::from(result)
         })
-        .map_err(LimbSliceError::len_mismatch)
+        .map_err(LimbSliceError::len_mismatch)?
+        .map_err(
+            #[cold]
+            #[inline(never)]
+            |_: error::Unspecified| LimbSliceError::too_short(len),
+        )
 }
diff --git a/src/arithmetic/montgomery.rs b/src/arithmetic/montgomery.rs
index 6d9a834835..5f07689fe5 100644
--- a/src/arithmetic/montgomery.rs
+++ b/src/arithmetic/montgomery.rs
@@ -140,7 +140,7 @@ prefixed_export! {
         n: *const Limb,
         n0: &N0,
         num_limbs: c::size_t,
-    ) {
+    ) -> crate::bssl::Result {
         use super::MAX_LIMBS;
 
         // The mutable pointer `r` may alias `a` and/or `b`, so the lifetimes of
@@ -159,6 +159,7 @@ prefixed_export! {
         }
         let r: &mut [Limb] = unsafe { core::slice::from_raw_parts_mut(r, num_limbs) };
         limbs_from_mont_in_place(r, tmp, n, n0);
+        bssl::Result::ok()
     }
 }
 
diff --git a/src/bssl.rs b/src/bssl.rs
index 9d958b3b6c..9f20f8ee01 100644
--- a/src/bssl.rs
+++ b/src/bssl.rs
@@ -21,14 +21,27 @@ use crate::{c, error};
 #[repr(transparent)]
 pub struct Result(c::int);
 
+impl Result {
+    #[cfg(not(any(
+        all(target_arch = "aarch64", target_endian = "little"),
+        all(target_arch = "arm", target_endian = "little"),
+        target_arch = "x86",
+        target_arch = "x86_64"
+    )))]
+    pub fn ok() -> Self {
+        Self(1)
+    }
+}
+
 impl From<Result> for core::result::Result<(), error::Unspecified> {
-    fn from(ret: Result) -> Self {
-        match ret.0 {
-            1 => Ok(()),
-            c => {
-                debug_assert_eq!(c, 0, "`bssl::Result` value must be 0 or 1");
-                Err(error::Unspecified)
-            }
+    fn from(Result(ret): Result) -> Self {
+        // BoringSSL functions are supposed to return 1 on success but some,
+        // such as bn_mul_mont* on 32-bit ARM at least, return other non-zero
+        // values on success instead.
+        if ret == 0 {
+            Err(error::Unspecified)
+        } else {
+            Ok(())
         }
     }
 }
diff --git a/src/prefixed.rs b/src/prefixed.rs
index bd89439f83..a551b11812 100644
--- a/src/prefixed.rs
+++ b/src/prefixed.rs
@@ -75,14 +75,15 @@ macro_rules! prefixed_export {
     // A function.
     {
         $( #[$meta:meta] )*
-        $vis:vis unsafe fn $name:ident ( $( $arg_pat:ident : $arg_ty:ty ),* $(,)? ) $body:block
+        $vis:vis unsafe fn $name:ident ( $( $arg_pat:ident : $arg_ty:ty ),* $(,)? )
+        -> $ret:ty $body:block
     } => {
         prefixed_item! {
             export_name
             $name
             {
                 $( #[$meta] )*
-                $vis unsafe fn $name ( $( $arg_pat : $arg_ty ),* ) $body
+                $vis unsafe fn $name ( $( $arg_pat : $arg_ty ),* ) -> $ret $body
             }
         }
     };