Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use the chaining methods on PartialOrd for slices too #138881

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions library/core/src/cmp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2053,6 +2053,22 @@ mod impls {
fn ge(&self, other: &&B) -> bool {
PartialOrd::ge(*self, *other)
}
#[inline]
fn __chaining_lt(&self, other: &&B) -> ControlFlow<bool> {
PartialOrd::__chaining_lt(*self, *other)
}
#[inline]
fn __chaining_le(&self, other: &&B) -> ControlFlow<bool> {
PartialOrd::__chaining_le(*self, *other)
}
#[inline]
fn __chaining_gt(&self, other: &&B) -> ControlFlow<bool> {
PartialOrd::__chaining_gt(*self, *other)
}
#[inline]
fn __chaining_ge(&self, other: &&B) -> ControlFlow<bool> {
PartialOrd::__chaining_ge(*self, *other)
}
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<A: ?Sized> Ord for &A
Expand Down Expand Up @@ -2108,6 +2124,22 @@ mod impls {
fn ge(&self, other: &&mut B) -> bool {
PartialOrd::ge(*self, *other)
}
#[inline]
fn __chaining_lt(&self, other: &&mut B) -> ControlFlow<bool> {
PartialOrd::__chaining_lt(*self, *other)
}
#[inline]
fn __chaining_le(&self, other: &&mut B) -> ControlFlow<bool> {
PartialOrd::__chaining_le(*self, *other)
}
#[inline]
fn __chaining_gt(&self, other: &&mut B) -> ControlFlow<bool> {
PartialOrd::__chaining_gt(*self, *other)
}
#[inline]
fn __chaining_ge(&self, other: &&mut B) -> ControlFlow<bool> {
PartialOrd::__chaining_ge(*self, *other)
}
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<A: ?Sized> Ord for &mut A
Expand Down
174 changes: 145 additions & 29 deletions library/core/src/slice/cmp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crate::ascii;
use crate::cmp::{self, BytewiseEq, Ordering};
use crate::intrinsics::compare_bytes;
use crate::num::NonZero;
use crate::ops::ControlFlow;

#[stable(feature = "rust1", since = "1.0.0")]
impl<T, U> PartialEq<[U]> for [T]
Expand All @@ -31,12 +32,64 @@ impl<T: Ord> Ord for [T] {
}
}

#[inline]
fn as_underlying(x: ControlFlow<bool>) -> u8 {
// SAFETY: This will only compile if `bool` and `ControlFlow<bool>` have the same
// size (which isn't guaranteed but this is libcore). Because they have the same
// size, it's a niched implementation, which in one byte means there can't be
// any uninitialized memory. The callers then only check for `0` or `1` from this,
// which must necessarily match the `Break` variant, and we're fine no matter
// what ends up getting picked as the value representing `Continue(())`.
unsafe { crate::mem::transmute(x) }
}

/// Implements comparison of slices [lexicographically](Ord#lexicographical-comparison).
#[stable(feature = "rust1", since = "1.0.0")]
impl<T: PartialOrd> PartialOrd for [T] {
#[inline]
fn partial_cmp(&self, other: &[T]) -> Option<Ordering> {
SlicePartialOrd::partial_compare(self, other)
}
#[inline]
fn lt(&self, other: &Self) -> bool {
// This is certainly not the obvious way to implement these methods.
// Unfortunately, using anything that looks at the discriminant means that
// LLVM sees a check for `2` (aka `ControlFlow<bool>::Continue(())`) and
// gets very distracted by that, ending up generating extraneous code.
// This should be changed to something simpler once either LLVM is smarter,
// see <https://github.com/llvm/llvm-project/issues/132678>, or we generate
// niche discriminant checks in a way that doesn't trigger it.
Comment on lines +55 to +61
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So there's a backlink: llvm/llvm-project#132678


as_underlying(self.__chaining_lt(other)) == 1
}
#[inline]
fn le(&self, other: &Self) -> bool {
as_underlying(self.__chaining_le(other)) != 0
}
#[inline]
fn gt(&self, other: &Self) -> bool {
as_underlying(self.__chaining_gt(other)) == 1
}
#[inline]
fn ge(&self, other: &Self) -> bool {
as_underlying(self.__chaining_ge(other)) != 0
}
#[inline]
fn __chaining_lt(&self, other: &Self) -> ControlFlow<bool> {
SliceChain::chaining_lt(self, other)
}
#[inline]
fn __chaining_le(&self, other: &Self) -> ControlFlow<bool> {
SliceChain::chaining_le(self, other)
}
#[inline]
fn __chaining_gt(&self, other: &Self) -> ControlFlow<bool> {
SliceChain::chaining_gt(self, other)
}
#[inline]
fn __chaining_ge(&self, other: &Self) -> ControlFlow<bool> {
SliceChain::chaining_ge(self, other)
}
}

#[doc(hidden)]
Expand Down Expand Up @@ -99,24 +152,63 @@ trait SlicePartialOrd: Sized {
fn partial_compare(left: &[Self], right: &[Self]) -> Option<Ordering>;
}

#[doc(hidden)]
// intermediate trait for specialization of slice's PartialOrd chaining methods
trait SliceChain: Sized {
fn chaining_lt(left: &[Self], right: &[Self]) -> ControlFlow<bool>;
fn chaining_le(left: &[Self], right: &[Self]) -> ControlFlow<bool>;
fn chaining_gt(left: &[Self], right: &[Self]) -> ControlFlow<bool>;
fn chaining_ge(left: &[Self], right: &[Self]) -> ControlFlow<bool>;
}

type AlwaysBreak<B> = ControlFlow<B, crate::convert::Infallible>;

impl<A: PartialOrd> SlicePartialOrd for A {
default fn partial_compare(left: &[A], right: &[A]) -> Option<Ordering> {
let l = cmp::min(left.len(), right.len());

// Slice to the loop iteration range to enable bound check
// elimination in the compiler
let lhs = &left[..l];
let rhs = &right[..l];
let elem_chain = |a, b| match PartialOrd::partial_cmp(a, b) {
Some(Ordering::Equal) => ControlFlow::Continue(()),
non_eq => ControlFlow::Break(non_eq),
};
let len_chain = |a: &_, b: &_| ControlFlow::Break(usize::partial_cmp(a, b));
let AlwaysBreak::Break(b) = chaining_impl(left, right, elem_chain, len_chain);
b
}
}

for i in 0..l {
match lhs[i].partial_cmp(&rhs[i]) {
Some(Ordering::Equal) => (),
non_eq => return non_eq,
}
}
impl<A: PartialOrd> SliceChain for A {
default fn chaining_lt(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
chaining_impl(left, right, PartialOrd::__chaining_lt, usize::__chaining_lt)
}
default fn chaining_le(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
chaining_impl(left, right, PartialOrd::__chaining_le, usize::__chaining_le)
}
default fn chaining_gt(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
chaining_impl(left, right, PartialOrd::__chaining_gt, usize::__chaining_gt)
}
default fn chaining_ge(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
chaining_impl(left, right, PartialOrd::__chaining_ge, usize::__chaining_ge)
}
}

left.len().partial_cmp(&right.len())
#[inline]
fn chaining_impl<'l, 'r, A: PartialOrd, B, C>(
left: &'l [A],
right: &'r [A],
elem_chain: impl Fn(&'l A, &'r A) -> ControlFlow<B>,
len_chain: impl for<'a> FnOnce(&'a usize, &'a usize) -> ControlFlow<B, C>,
) -> ControlFlow<B, C> {
let l = cmp::min(left.len(), right.len());

// Slice to the loop iteration range to enable bound check
// elimination in the compiler
let lhs = &left[..l];
let rhs = &right[..l];

for i in 0..l {
elem_chain(&lhs[i], &rhs[i])?;
}

len_chain(&left.len(), &right.len())
}

// This is the impl that we would like to have. Unfortunately it's not sound.
Expand Down Expand Up @@ -165,21 +257,13 @@ trait SliceOrd: Sized {

impl<A: Ord> SliceOrd for A {
default fn compare(left: &[Self], right: &[Self]) -> Ordering {
let l = cmp::min(left.len(), right.len());

// Slice to the loop iteration range to enable bound check
// elimination in the compiler
let lhs = &left[..l];
let rhs = &right[..l];

for i in 0..l {
match lhs[i].cmp(&rhs[i]) {
Ordering::Equal => (),
non_eq => return non_eq,
}
}

left.len().cmp(&right.len())
let elem_chain = |a, b| match Ord::cmp(a, b) {
Ordering::Equal => ControlFlow::Continue(()),
non_eq => ControlFlow::Break(non_eq),
};
let len_chain = |a: &_, b: &_| ControlFlow::Break(usize::cmp(a, b));
let AlwaysBreak::Break(b) = chaining_impl(left, right, elem_chain, len_chain);
b
}
}

Expand All @@ -191,7 +275,7 @@ impl<A: Ord> SliceOrd for A {
/// * For every `x` and `y` of this type, `Ord(x, y)` must return the same
/// value as `Ord::cmp(transmute::<_, u8>(x), transmute::<_, u8>(y))`.
#[rustc_specialization_trait]
unsafe trait UnsignedBytewiseOrd {}
unsafe trait UnsignedBytewiseOrd: Ord {}

unsafe impl UnsignedBytewiseOrd for bool {}
unsafe impl UnsignedBytewiseOrd for u8 {}
Expand Down Expand Up @@ -225,6 +309,38 @@ impl<A: Ord + UnsignedBytewiseOrd> SliceOrd for A {
}
}

// Don't generate our own chaining loops for `memcmp`-able things either.
impl<A: PartialOrd + UnsignedBytewiseOrd> SliceChain for A {
#[inline]
fn chaining_lt(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
match SliceOrd::compare(left, right) {
Ordering::Equal => ControlFlow::Continue(()),
ne => ControlFlow::Break(ne.is_lt()),
}
}
#[inline]
fn chaining_le(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
match SliceOrd::compare(left, right) {
Ordering::Equal => ControlFlow::Continue(()),
ne => ControlFlow::Break(ne.is_le()),
}
}
#[inline]
fn chaining_gt(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
match SliceOrd::compare(left, right) {
Ordering::Equal => ControlFlow::Continue(()),
ne => ControlFlow::Break(ne.is_gt()),
}
}
#[inline]
fn chaining_ge(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
match SliceOrd::compare(left, right) {
Ordering::Equal => ControlFlow::Continue(()),
ne => ControlFlow::Break(ne.is_ge()),
}
}
}

pub(super) trait SliceContains: Sized {
fn slice_contains(&self, x: &[Self]) -> bool;
}
Expand Down
29 changes: 28 additions & 1 deletion library/core/src/tuple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use crate::cmp::Ordering::{self, *};
use crate::marker::{ConstParamTy_, StructuralPartialEq, UnsizedConstParamTy};
use crate::ops::ControlFlow::{Break, Continue};
use crate::ops::ControlFlow::{self, Break, Continue};

// Recursive macro for implementing n-ary tuple functions and operations
//
Expand Down Expand Up @@ -95,6 +95,22 @@ macro_rules! tuple_impls {
fn gt(&self, other: &($($T,)+)) -> bool {
lexical_ord!(gt, __chaining_gt, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
}
#[inline]
fn __chaining_lt(&self, other: &($($T,)+)) -> ControlFlow<bool> {
lexical_chain!(__chaining_lt, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
}
#[inline]
fn __chaining_le(&self, other: &($($T,)+)) -> ControlFlow<bool> {
lexical_chain!(__chaining_le, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
}
#[inline]
fn __chaining_gt(&self, other: &($($T,)+)) -> ControlFlow<bool> {
lexical_chain!(__chaining_gt, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
}
#[inline]
fn __chaining_ge(&self, other: &($($T,)+)) -> ControlFlow<bool> {
lexical_chain!(__chaining_ge, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
}
}
}

Expand Down Expand Up @@ -187,6 +203,17 @@ macro_rules! lexical_ord {
};
}

// Same parameter interleaving as `lexical_ord` above
macro_rules! lexical_chain {
($chain_rel: ident, $a:expr, $b:expr $(,$rest_a:expr, $rest_b:expr)*) => {{
PartialOrd::$chain_rel(&$a, &$b)?;
lexical_chain!($chain_rel $(,$rest_a, $rest_b)*)
}};
($chain_rel: ident) => {
Continue(())
};
}

macro_rules! lexical_partial_cmp {
($a:expr, $b:expr, $($rest_a:expr, $rest_b:expr),+) => {
match ($a).partial_cmp(&$b) {
Expand Down
55 changes: 55 additions & 0 deletions tests/codegen/array-cmp.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Ensure the asm for array comparisons is properly optimized.

//@ compile-flags: -C opt-level=2
//@ needs-deterministic-layouts (checks depend on tuple layout)

#![crate_type = "lib"]

Expand All @@ -17,3 +18,57 @@ pub fn compare() -> bool {
[0x00, 0x00, 0x48, 0x41]
}
}

// CHECK-LABEL: @array_of_tuple_le
#[no_mangle]
pub fn array_of_tuple_le(a: &[(i16, u16); 2], b: &[(i16, u16); 2]) -> bool {
Copy link
Member Author

@scottmcm scottmcm Mar 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nightly, note the setl+setg from the cmp methods:

array_of_tuple_le:
        movzwl  (%rdi), %eax
        movzwl  (%rsi), %ecx
        cmpw    %cx, %ax
        setl    %cl
        setg    %al
        jne     .LBB0_1
        movzwl  2(%rdi), %eax
        cmpw    2(%rsi), %ax
        seta    %al
        sbbb    $0, %al
        testb   %al, %al
        jne     .LBB0_7
        jmp     .LBB0_4
.LBB0_1:
        subb    %cl, %al
        testb   %al, %al
        jne     .LBB0_7
.LBB0_4:
        movzwl  4(%rdi), %eax
        movzwl  4(%rsi), %ecx
        cmpw    %cx, %ax
        setl    %cl
        setg    %al
        jne     .LBB0_5
        movzwl  6(%rdi), %eax
        cmpw    6(%rsi), %ax
        seta    %al
        sbbb    $0, %al
        jmp     .LBB0_7
.LBB0_5:
        subb    %cl, %al
.LBB0_7:
        testb   %al, %al
        setle   %al
        retq

With this PR:

array_of_tuple_le:
	movzwl	(%rcx), %eax
	movzwl	(%rdx), %r8d
	cmpw	%r8w, %ax
	jne	.LBB1_1
	movzwl	2(%rcx), %r8d
	movzwl	2(%rdx), %r9d
	cmpw	%r9w, %r8w
	jne	.LBB1_2
	movzwl	4(%rcx), %eax
	movzwl	4(%rdx), %r8d
	cmpw	%r8w, %ax
	jne	.LBB1_1
	movzwl	6(%rcx), %r8d
	movzwl	6(%rdx), %r9d
	movb	$1, %al
	cmpw	%r9w, %r8w
	jne	.LBB1_2
	retq
.LBB1_1:
	cmpw	%r8w, %ax
	setle	%al
	retq
.LBB1_2:
	cmpw	%r9w, %r8w
	setb	%al
	retq

That's so nice -- straight line with no jumps taken if they're equal, otherwise jumps to the check it needs (dedup'd by consistent register choices), saves that, and returns.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that even with #133984 landed, we still don't optimize away the [su]cmps in nightly today: https://rust.godbolt.org/z/ss5E7q6PT.

// Ensure that, after all the optimizations have run, the happy path just checks
// `eq` on each corresponding pair and moves onto the next one if it is.
// Then there's a dedup'd comparison for the place that's different.
// (As opposed to, say, running a full `[su]cmp` as part of checking equality.)

// This is written quite specifically because different library code was triggering
// <https://github.com/llvm/llvm-project/issues/132678> along the way, so this
// has enough checks to make sure that's not happening. It doesn't need to be
// *exactly* this IR, but be careful if you ever need to update these checks.

// CHECK: start:
// CHECK: %[[A00:.+]] = load i16, ptr %a
// CHECK: %[[B00:.+]] = load i16, ptr %b
// CHECK-NOT: cmp
// CHECK: %[[EQ00:.+]] = icmp eq i16 %[[A00]], %[[B00]]
// CHECK-NEXT: br i1 %[[EQ00]], label %[[L01:.+]], label %[[EXIT_S:.+]]

// CHECK: [[L01]]:
// CHECK: %[[PA01:.+]] = getelementptr{{.+}}i8, ptr %a, i64 2
// CHECK: %[[PB01:.+]] = getelementptr{{.+}}i8, ptr %b, i64 2
// CHECK: %[[A01:.+]] = load i16, ptr %[[PA01]]
// CHECK: %[[B01:.+]] = load i16, ptr %[[PB01]]
// CHECK-NOT: cmp
// CHECK: %[[EQ01:.+]] = icmp eq i16 %[[A01]], %[[B01]]
// CHECK-NEXT: br i1 %[[EQ01]], label %[[L10:.+]], label %[[EXIT_U:.+]]

// CHECK: [[L10]]:
// CHECK: %[[PA10:.+]] = getelementptr{{.+}}i8, ptr %a, i64 4
// CHECK: %[[PB10:.+]] = getelementptr{{.+}}i8, ptr %b, i64 4
// CHECK: %[[A10:.+]] = load i16, ptr %[[PA10]]
// CHECK: %[[B10:.+]] = load i16, ptr %[[PB10]]
// CHECK-NOT: cmp
// CHECK: %[[EQ10:.+]] = icmp eq i16 %[[A10]], %[[B10]]
// CHECK-NEXT: br i1 %[[EQ10]], label %[[L11:.+]], label %[[EXIT_S]]

// CHECK: [[L11]]:
// CHECK: %[[PA11:.+]] = getelementptr{{.+}}i8, ptr %a, i64 6
// CHECK: %[[PB11:.+]] = getelementptr{{.+}}i8, ptr %b, i64 6
// CHECK: %[[A11:.+]] = load i16, ptr %[[PA11]]
// CHECK: %[[B11:.+]] = load i16, ptr %[[PB11]]
// CHECK-NOT: cmp
// CHECK: %[[EQ11:.+]] = icmp eq i16 %[[A11]], %[[B11]]
// CHECK-NEXT: br i1 %[[EQ11]], label %[[DONE:.+]], label %[[EXIT_U]]

// CHECK: [[DONE]]:
// CHECK: %[[RET:.+]] = phi i1 [ %{{.+}}, %[[EXIT_S]] ], [ %{{.+}}, %[[EXIT_U]] ], [ true, %[[L11]] ]
// CHECK: ret i1 %[[RET]]

a <= b
}
Loading