diff --git a/libm-test/src/generate/case_list.rs b/libm-test/src/generate/case_list.rs index f1e6fcec3..bda3dca76 100644 --- a/libm-test/src/generate/case_list.rs +++ b/libm-test/src/generate/case_list.rs @@ -18,7 +18,6 @@ pub struct TestCase { } impl TestCase { - #[expect(dead_code)] fn append_inputs(v: &mut Vec, l: &[Op::RustArgs]) { v.extend(l.iter().copied().map(|input| Self { input, @@ -458,7 +457,18 @@ fn frexpf_cases() -> Vec> { } fn hypot_cases() -> Vec> { - vec![] + let mut v = vec![]; + TestCase::append_inputs( + &mut v, + &[ + // Cases that can overflow exponent if wrapping arithmetic is not used + (hf64!("-0x1.800f800f80100p+1023"), hf64!("0x1.8354835473720p+996")), + (hf64!("0x1.201b201b201c0p+0"), hf64!("0x1.b028b028b02a0p-1")), + (hf64!("-0x1.e538e538e564p+980"), hf64!("-0x1.c4dfc4dfc508p+983")), + (hf64!("-0x1.2f22e4f77aa58p+983"), hf64!("-0x1.44c9f5524c8ccp+980")), + ], + ); + v } fn hypotf_cases() -> Vec> { diff --git a/libm-test/src/precision.rs b/libm-test/src/precision.rs index f5fb5f670..6a543ceca 100644 --- a/libm-test/src/precision.rs +++ b/libm-test/src/precision.rs @@ -48,6 +48,7 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { // Operations that aren't required to be exact, but our implementations are. Bn::Cbrt => 0, + Bn::Hypot if ctx.fn_ident == Id::Hypot => 0, // Bessel functions have large inaccuracies. Bn::J0 | Bn::J1 | Bn::Y0 | Bn::Y1 | Bn::Jn | Bn::Yn => 8_000_000, @@ -100,6 +101,7 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 { Id::Cbrt => ulp = 2, // FIXME(#401): musl has an incorrect result here. Id::Fdim => ulp = 2, + Id::Hypot => ulp = 1, Id::Sincosf => ulp = 500, Id::Tgamma => ulp = 20, _ => (), diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 7df84fe18..c7eabeeda 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -8,6 +8,7 @@ )] #![cfg_attr(f128_enabled, feature(f128))] #![cfg_attr(f16_enabled, feature(f16))] +#![allow(internal_features)] #![allow(clippy::assign_op_pattern)] #![allow(clippy::deprecated_cfg_attr)] #![allow(clippy::eq_op)] diff --git a/libm/src/math/hypot.rs b/libm/src/math/hypot.rs index da458ea1d..dfe7a09b5 100644 --- a/libm/src/math/hypot.rs +++ b/libm/src/math/hypot.rs @@ -1,74 +1,310 @@ -use core::f64; +/* SPDX-License-Identifier: MIT */ +/* origin: core-math/src/binary64/hypot/hypot.c + * Copyright (c) 2022 Alexei Sibidanov. + * Ported to Rust in 2025 by Trevor Gross. + */ + +//! Euclidian distance via the pythagorean theorem (`√(x2 + y2)`). +//! +//! Per IEEE 754-2019: +//! +//! - Domain: `[−∞, +∞] × [−∞, +∞]` +//! - `hypot(±0, ±0)` is +0 +//! - `hypot(±∞, qNaN)` is +∞ +//! - `hypot(qNaN, ±∞)` is +∞. +//! - May raise overflow or underflow use super::sqrt; +#[allow(unused_imports)] // msrv compat +use super::support::Float; +use super::support::cold_path; + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn hypot(x: f64, y: f64) -> f64 { + return cr_hypot(x, y); +} + +fn cr_hypot(mut x: f64, mut y: f64) -> f64 { + let flag = get_flags(); + + let xi = x.to_bits(); + let yi = y.to_bits(); + + let emsk: u64 = 0x7ffu64 << 52; + let mut ex: u64 = xi & emsk; + let mut ey: u64 = yi & emsk; + /* emsk corresponds to the upper bits of NaN and Inf (apart the sign bit) */ + x = x.abs(); + y = y.abs(); + if ex == emsk || ey == emsk { + cold_path(); + + /* Either x or y is NaN or Inf */ + let wx: u64 = xi << 1; + let wy: u64 = yi << 1; + let wm: u64 = emsk << 1; + + let one_inf = (wx == wm) ^ (wy == wm); + let one_nan = x.is_nan() ^ y.is_nan(); + + // let nqnn: i32 = (((wx >> 52) == 0xfff) ^ ((wy >> 52) == 0xfff)) as i32; + // /* ninf is 1 when only one of x and y is +/-Inf + // nqnn is 1 when only one of x and y is qNaN + // IEEE 754 says that hypot(+/-Inf,qNaN)=hypot(qNaN,+/-Inf)=+Inf. */ + if one_inf && one_nan { + return f64::INFINITY; + } + return x + y; /* inf, sNaN */ + } + + let u: f64 = x.max(y); + let v: f64 = x.min(y); + let mut xd: u64 = u.to_bits(); + let mut yd: u64 = v.to_bits(); + ey = yd; + + if ey >> 52 == 0 { + cold_path(); + + if yd == 0 { + return f64::from_bits(xd); + } -const SPLIT: f64 = 134217728. + 1.; // 0x1p27 + 1 === (2 ^ 27) + 1 + ex = xd; -fn sq(x: f64) -> (f64, f64) { - let xh: f64; - let xl: f64; - let xc: f64; + if ex >> 52 == 0 { + cold_path(); - xc = x * SPLIT; - xh = x - xc + xc; - xl = x - xh; - let hi = x * x; - let lo = xh * xh - hi + 2. * xh * xl + xl * xl; - (hi, lo) + if ex == 0 { + return 0.0; + } + + return as_hypot_denorm(ex, ey); + } + + let nz: u32 = ey.leading_zeros(); + ey <<= nz - 11; + ey &= u64::MAX >> 12; + ey = ey.wrapping_sub(((nz as i64 - 12i64) << 52) as u64); + let t = ey; // why did they do this? + yd = t; + } + + let de: u64 = xd.wrapping_sub(yd); + if de > (27_u64 << 52) { + cold_path(); + return fmaf64(hf64!("0x1p-27"), v, u); + } + + let off: i64 = (0x3ff_i64 << 52) - (xd & emsk) as i64; + xd = xd.wrapping_add(off as u64); + yd = yd.wrapping_add(off as u64); + x = f64::from_bits(xd); + y = f64::from_bits(yd); + let x2: f64 = x * x; + let dx2: f64 = fmaf64(x, x, -x2); + let y2: f64 = y * y; + let dy2: f64 = fmaf64(y, y, -y2); + let r2: f64 = x2 + y2; + let ir2: f64 = 0.5 / r2; + let dr2: f64 = ((x2 - r2) + y2) + (dx2 + dy2); + let mut th: f64 = sqrt(r2); + let rsqrt: f64 = th * ir2; + let dz: f64 = dr2 - fmaf64(th, th, -r2); + let mut tl: f64 = rsqrt * dz; + th = fasttwosum(th, tl, &mut tl); + let mut thd: u64 = th.to_bits(); + let tld = tl.abs().to_bits(); + ex = thd; + ey = tld; + ex &= 0x7ff_u64 << 52; + let aidr: u64 = ey.wrapping_add(0x3fe_u64 << 52).wrapping_sub(ex); + let mid: u64 = (aidr.wrapping_sub(0x3c90000000000000).wrapping_add(16)) >> 5; + if mid == 0 || !(0x39b0000000000000_u64..=0x3c9fffffffffff80_u64).contains(&aidr) { + cold_path(); + thd = as_hypot_hard(x, y, flag).to_bits(); + } + thd = thd.wrapping_sub(off as u64); + if thd >= (0x7ff_u64 << 52) { + cold_path(); + return as_hypot_overflow(); + } + + f64::from_bits(thd) } -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn hypot(mut x: f64, mut y: f64) -> f64 { - let x1p700 = f64::from_bits(0x6bb0000000000000); // 0x1p700 === 2 ^ 700 - let x1p_700 = f64::from_bits(0x1430000000000000); // 0x1p-700 === 2 ^ -700 - - let mut uxi = x.to_bits(); - let mut uyi = y.to_bits(); - let uti; - let ex: i64; - let ey: i64; - let mut z: f64; - - /* arrange |x| >= |y| */ - uxi &= -1i64 as u64 >> 1; - uyi &= -1i64 as u64 >> 1; - if uxi < uyi { - uti = uxi; - uxi = uyi; - uyi = uti; +fn fasttwosum(x: f64, y: f64, e: &mut f64) -> f64 { + let s: f64 = x + y; + let z: f64 = s - x; + *e = y - z; + s +} + +fn as_hypot_overflow() -> f64 { + let z: f64 = hf64!("0x1.fffffffffffffp1023"); + let f = z + z; + if f > z { + // errno = ERANGE } + f +} - /* special cases */ - ex = (uxi >> 52) as i64; - ey = (uyi >> 52) as i64; - x = f64::from_bits(uxi); - y = f64::from_bits(uyi); - /* note: hypot(inf,nan) == inf */ - if ey == 0x7ff { - return y; +/* Here the square root is refined by Newton iterations: x^2+y^2 is exact +and fits in a 128-bit integer, so the approximation is squared (which +also fits in a 128-bit integer), compared and adjusted if necessary using +the exact value of x^2+y^2. */ +fn as_hypot_hard(x: f64, y: f64, flag: FExcept) -> f64 { + let op: f64 = 1.0 + hf64!("0x1p-54"); + let om: f64 = 1.0 - hf64!("0x1p-54"); + let mut xi: u64 = x.to_bits(); + let yi: u64 = y.to_bits(); + let mut bm: u64 = (xi & (u64::MAX >> 12)) | 1u64 << 52; + let mut lm: u64 = (yi & (u64::MAX >> 12)) | 1u64 << 52; + let be: i32 = (xi >> 52) as i32; + let le: i32 = (yi >> 52) as i32; + let ri: u64 = sqrt(x * x + y * y).to_bits(); + let bs: i32 = 2; + let mut rm: u64 = ri & (u64::MAX >> 12); + let mut re: i32 = (ri >> 52) as i32 - 0x3ff; + rm |= 1u64 << 52; + + for _ in 0..3 { + if rm == 1u64 << 52 { + rm = u64::MAX >> 11; + re -= 1; + } else { + cold_path(); + rm -= 1; + } + } + + bm <<= bs; + let mut m2: u64 = bm.wrapping_mul(bm); + let de: i32 = be - le; + let mut ls: i32 = bs - de; + + if ls >= 0 { + lm <<= ls; + m2 = m2.wrapping_add(lm.wrapping_mul(lm)); + } else { + cold_path(); + let lm2: u128 = (lm as u128) * (lm as u128); + ls *= 2; + m2 = m2.wrapping_add((lm2 >> -ls) as u64); + m2 |= ((lm2 << (128 + ls)) != 0) as u64; + } + + let k: i32 = bs + re; + let mut d: i64; + + loop { + rm += 1 + (rm >= (1u64 << 53)) as u64; + let tm: u64 = rm << k; + let rm2: u64 = tm.wrapping_mul(tm); + d = m2 as i64 - rm2 as i64; + + if d <= 0 { + break; + } + } + + if d == 0 { + set_flags(flag); + } else if op == om { + let tm: u64 = (rm << k) - (1 << (k - (rm <= (1u64 << 53)) as i32)); + d = m2 as i64 - (tm.wrapping_mul(tm)) as i64; + + if d == 0 { + cold_path(); + rm -= rm & 1; + } else { + rm = rm.wrapping_add((d >> 63) as u64); + } + } else { + cold_path(); + rm -= ((op == 1.0) as u64) << (rm > (1u64 << 53)) as u32; + } + + if rm >= (1u64 << 53) { + rm >>= 1; + re += 1; } - if ex == 0x7ff || uyi == 0 { - return x; + + let e: u64 = (be - 1 + re) as u64; + xi = (e << 52) + rm; + + f64::from_bits(xi) +} + +fn as_hypot_denorm(mut a: u64, mut b: u64) -> f64 { + let op: f64 = 1.0 + hf64!("0x1p-54"); + let om: f64 = 1.0 - hf64!("0x1p-54"); + let af: f64 = a as i64 as f64; + let bf: f64 = b as i64 as f64; + a <<= 1; + b <<= 1; + // Is this casting right? + let mut rm: u64 = sqrt(af * af + bf * bf) as u64; + let tm: u64 = rm << 1; + let mut d: i64 = (a.wrapping_mul(a) as i64) + .wrapping_sub(tm.wrapping_mul(tm) as i64) + .wrapping_add(b.wrapping_mul(b) as i64); + let sd: i64 = d >> 63; + let um: i64 = ((rm as i64) ^ sd) - sd; + let mut drm: i64 = sd + 1; + let mut dd: i64 = (um << 3) + 4; + let mut p_d: i64; + rm -= drm as u64; + drm += sd; + loop { + p_d = d; + rm = rm.wrapping_add(drm as u64); + d = d.wrapping_sub(dd); + dd = dd.wrapping_add(8); + if (d ^ p_d) <= 0 { + cold_path(); + break; + } } - /* note: hypot(x,y) ~= x + y*y/x/2 with inexact for small y/x */ - /* 64 difference is enough for ld80 double_t */ - if ex - ey > 64 { - return x + y; + p_d = (sd & d) + (!sd & p_d); + if p_d != 0 { + if op == om { + let sum: i64 = p_d.wrapping_sub(4u64.wrapping_mul(rm) as i64).wrapping_sub(1); + + if sum != 0 { + rm = rm.wrapping_add((sum >> 63).wrapping_add(1) as u64); + } else { + cold_path(); + rm += rm & 1; + } + } else { + cold_path(); + rm += (op > 1.0) as u64; + } + } else { + cold_path(); } - /* precise sqrt argument in nearest rounding mode without overflow */ - /* xh*xh must not overflow and xl*xl must not underflow in sq */ - z = 1.; - if ex > 0x3ff + 510 { - z = x1p700; - x *= x1p_700; - y *= x1p_700; - } else if ey < 0x3ff - 450 { - z = x1p_700; - x *= x1p700; - y *= x1p700; + let xi: u64 = rm; + f64::from_bits(xi) +} + +fn fmaf64(x: f64, y: f64, z: f64) -> f64 { + #[cfg(intrinsics_enabled)] + { + return unsafe { core::intrinsics::fmaf64(x, y, z) }; } - let (hx, lx) = sq(x); - let (hy, ly) = sq(y); - z * sqrt(ly + lx + hy + hx) + + #[cfg(not(intrinsics_enabled))] + { + return super::fma(x, y, z); + } +} + +type FExcept = u32; + +fn set_flags(_flag: FExcept) {} + +fn get_flags() -> FExcept { + 0 }