Skip to content

Commit cf991d9

Browse files
committed
Extend the chaining logic to slices too
1 parent 19648ce commit cf991d9

File tree

4 files changed

+254
-36
lines changed

4 files changed

+254
-36
lines changed

library/core/src/cmp.rs

+32
Original file line numberDiff line numberDiff line change
@@ -2053,6 +2053,22 @@ mod impls {
20532053
fn ge(&self, other: &&B) -> bool {
20542054
PartialOrd::ge(*self, *other)
20552055
}
2056+
#[inline]
2057+
fn __chaining_lt(&self, other: &&B) -> ControlFlow<bool> {
2058+
PartialOrd::__chaining_lt(*self, *other)
2059+
}
2060+
#[inline]
2061+
fn __chaining_le(&self, other: &&B) -> ControlFlow<bool> {
2062+
PartialOrd::__chaining_le(*self, *other)
2063+
}
2064+
#[inline]
2065+
fn __chaining_gt(&self, other: &&B) -> ControlFlow<bool> {
2066+
PartialOrd::__chaining_gt(*self, *other)
2067+
}
2068+
#[inline]
2069+
fn __chaining_ge(&self, other: &&B) -> ControlFlow<bool> {
2070+
PartialOrd::__chaining_ge(*self, *other)
2071+
}
20562072
}
20572073
#[stable(feature = "rust1", since = "1.0.0")]
20582074
impl<A: ?Sized> Ord for &A
@@ -2108,6 +2124,22 @@ mod impls {
21082124
fn ge(&self, other: &&mut B) -> bool {
21092125
PartialOrd::ge(*self, *other)
21102126
}
2127+
#[inline]
2128+
fn __chaining_lt(&self, other: &&mut B) -> ControlFlow<bool> {
2129+
PartialOrd::__chaining_lt(*self, *other)
2130+
}
2131+
#[inline]
2132+
fn __chaining_le(&self, other: &&mut B) -> ControlFlow<bool> {
2133+
PartialOrd::__chaining_le(*self, *other)
2134+
}
2135+
#[inline]
2136+
fn __chaining_gt(&self, other: &&mut B) -> ControlFlow<bool> {
2137+
PartialOrd::__chaining_gt(*self, *other)
2138+
}
2139+
#[inline]
2140+
fn __chaining_ge(&self, other: &&mut B) -> ControlFlow<bool> {
2141+
PartialOrd::__chaining_ge(*self, *other)
2142+
}
21112143
}
21122144
#[stable(feature = "rust1", since = "1.0.0")]
21132145
impl<A: ?Sized> Ord for &mut A

library/core/src/slice/cmp.rs

+145-29
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use crate::ascii;
55
use crate::cmp::{self, BytewiseEq, Ordering};
66
use crate::intrinsics::compare_bytes;
77
use crate::num::NonZero;
8+
use crate::ops::ControlFlow;
89

910
#[stable(feature = "rust1", since = "1.0.0")]
1011
impl<T, U> PartialEq<[U]> for [T]
@@ -31,12 +32,64 @@ impl<T: Ord> Ord for [T] {
3132
}
3233
}
3334

35+
#[inline]
36+
fn as_underlying(x: ControlFlow<bool>) -> u8 {
37+
// SAFETY: This will only compile if `bool` and `ControlFlow<bool>` have the same
38+
// size (which isn't guaranteed but this is libcore). Because they have the same
39+
// size, it's a niched implementation, which in one byte means there can't be
40+
// any uninitialized memory. The callers then only check for `0` or `1` from this,
41+
// which must necessarily match the `Break` variant, and we're fine no matter
42+
// what ends up getting picked as the value representing `Continue(())`.
43+
unsafe { crate::mem::transmute(x) }
44+
}
45+
3446
/// Implements comparison of slices [lexicographically](Ord#lexicographical-comparison).
3547
#[stable(feature = "rust1", since = "1.0.0")]
3648
impl<T: PartialOrd> PartialOrd for [T] {
49+
#[inline]
3750
fn partial_cmp(&self, other: &[T]) -> Option<Ordering> {
3851
SlicePartialOrd::partial_compare(self, other)
3952
}
53+
#[inline]
54+
fn lt(&self, other: &Self) -> bool {
55+
// This is certainly not the obvious way to implement these methods.
56+
// Unfortunately, using anything that looks at the discriminant means that
57+
// LLVM sees a check for `2` (aka `ControlFlow<bool>::Continue(())`) and
58+
// gets very distracted by that, ending up generating extraneous code.
59+
// This should be changed to something simpler once either LLVM is smarter,
60+
// see <https://github.com/llvm/llvm-project/issues/132678>, or we generate
61+
// niche discriminant checks in a way that doesn't trigger it.
62+
63+
as_underlying(self.__chaining_lt(other)) == 1
64+
}
65+
#[inline]
66+
fn le(&self, other: &Self) -> bool {
67+
as_underlying(self.__chaining_le(other)) != 0
68+
}
69+
#[inline]
70+
fn gt(&self, other: &Self) -> bool {
71+
as_underlying(self.__chaining_gt(other)) == 1
72+
}
73+
#[inline]
74+
fn ge(&self, other: &Self) -> bool {
75+
as_underlying(self.__chaining_ge(other)) != 0
76+
}
77+
#[inline]
78+
fn __chaining_lt(&self, other: &Self) -> ControlFlow<bool> {
79+
SliceChain::chaining_lt(self, other)
80+
}
81+
#[inline]
82+
fn __chaining_le(&self, other: &Self) -> ControlFlow<bool> {
83+
SliceChain::chaining_le(self, other)
84+
}
85+
#[inline]
86+
fn __chaining_gt(&self, other: &Self) -> ControlFlow<bool> {
87+
SliceChain::chaining_gt(self, other)
88+
}
89+
#[inline]
90+
fn __chaining_ge(&self, other: &Self) -> ControlFlow<bool> {
91+
SliceChain::chaining_ge(self, other)
92+
}
4093
}
4194

4295
#[doc(hidden)]
@@ -99,24 +152,63 @@ trait SlicePartialOrd: Sized {
99152
fn partial_compare(left: &[Self], right: &[Self]) -> Option<Ordering>;
100153
}
101154

155+
#[doc(hidden)]
156+
// intermediate trait for specialization of slice's PartialOrd chaining methods
157+
trait SliceChain: Sized {
158+
fn chaining_lt(left: &[Self], right: &[Self]) -> ControlFlow<bool>;
159+
fn chaining_le(left: &[Self], right: &[Self]) -> ControlFlow<bool>;
160+
fn chaining_gt(left: &[Self], right: &[Self]) -> ControlFlow<bool>;
161+
fn chaining_ge(left: &[Self], right: &[Self]) -> ControlFlow<bool>;
162+
}
163+
164+
type AlwaysBreak<B> = ControlFlow<B, crate::convert::Infallible>;
165+
102166
impl<A: PartialOrd> SlicePartialOrd for A {
103167
default fn partial_compare(left: &[A], right: &[A]) -> Option<Ordering> {
104-
let l = cmp::min(left.len(), right.len());
105-
106-
// Slice to the loop iteration range to enable bound check
107-
// elimination in the compiler
108-
let lhs = &left[..l];
109-
let rhs = &right[..l];
168+
let elem_chain = |a, b| match PartialOrd::partial_cmp(a, b) {
169+
Some(Ordering::Equal) => ControlFlow::Continue(()),
170+
non_eq => ControlFlow::Break(non_eq),
171+
};
172+
let len_chain = |a: &_, b: &_| ControlFlow::Break(usize::partial_cmp(a, b));
173+
let AlwaysBreak::Break(b) = chaining_impl(left, right, elem_chain, len_chain);
174+
b
175+
}
176+
}
110177

111-
for i in 0..l {
112-
match lhs[i].partial_cmp(&rhs[i]) {
113-
Some(Ordering::Equal) => (),
114-
non_eq => return non_eq,
115-
}
116-
}
178+
impl<A: PartialOrd> SliceChain for A {
179+
default fn chaining_lt(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
180+
chaining_impl(left, right, PartialOrd::__chaining_lt, usize::__chaining_lt)
181+
}
182+
default fn chaining_le(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
183+
chaining_impl(left, right, PartialOrd::__chaining_le, usize::__chaining_le)
184+
}
185+
default fn chaining_gt(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
186+
chaining_impl(left, right, PartialOrd::__chaining_gt, usize::__chaining_gt)
187+
}
188+
default fn chaining_ge(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
189+
chaining_impl(left, right, PartialOrd::__chaining_ge, usize::__chaining_ge)
190+
}
191+
}
117192

118-
left.len().partial_cmp(&right.len())
193+
#[inline]
194+
fn chaining_impl<'l, 'r, A: PartialOrd, B, C>(
195+
left: &'l [A],
196+
right: &'r [A],
197+
elem_chain: impl Fn(&'l A, &'r A) -> ControlFlow<B>,
198+
len_chain: impl for<'a> FnOnce(&'a usize, &'a usize) -> ControlFlow<B, C>,
199+
) -> ControlFlow<B, C> {
200+
let l = cmp::min(left.len(), right.len());
201+
202+
// Slice to the loop iteration range to enable bound check
203+
// elimination in the compiler
204+
let lhs = &left[..l];
205+
let rhs = &right[..l];
206+
207+
for i in 0..l {
208+
elem_chain(&lhs[i], &rhs[i])?;
119209
}
210+
211+
len_chain(&left.len(), &right.len())
120212
}
121213

122214
// This is the impl that we would like to have. Unfortunately it's not sound.
@@ -165,21 +257,13 @@ trait SliceOrd: Sized {
165257

166258
impl<A: Ord> SliceOrd for A {
167259
default fn compare(left: &[Self], right: &[Self]) -> Ordering {
168-
let l = cmp::min(left.len(), right.len());
169-
170-
// Slice to the loop iteration range to enable bound check
171-
// elimination in the compiler
172-
let lhs = &left[..l];
173-
let rhs = &right[..l];
174-
175-
for i in 0..l {
176-
match lhs[i].cmp(&rhs[i]) {
177-
Ordering::Equal => (),
178-
non_eq => return non_eq,
179-
}
180-
}
181-
182-
left.len().cmp(&right.len())
260+
let elem_chain = |a, b| match Ord::cmp(a, b) {
261+
Ordering::Equal => ControlFlow::Continue(()),
262+
non_eq => ControlFlow::Break(non_eq),
263+
};
264+
let len_chain = |a: &_, b: &_| ControlFlow::Break(usize::cmp(a, b));
265+
let AlwaysBreak::Break(b) = chaining_impl(left, right, elem_chain, len_chain);
266+
b
183267
}
184268
}
185269

@@ -191,7 +275,7 @@ impl<A: Ord> SliceOrd for A {
191275
/// * For every `x` and `y` of this type, `Ord(x, y)` must return the same
192276
/// value as `Ord::cmp(transmute::<_, u8>(x), transmute::<_, u8>(y))`.
193277
#[rustc_specialization_trait]
194-
unsafe trait UnsignedBytewiseOrd {}
278+
unsafe trait UnsignedBytewiseOrd: Ord {}
195279

196280
unsafe impl UnsignedBytewiseOrd for bool {}
197281
unsafe impl UnsignedBytewiseOrd for u8 {}
@@ -225,6 +309,38 @@ impl<A: Ord + UnsignedBytewiseOrd> SliceOrd for A {
225309
}
226310
}
227311

312+
// Don't generate our own chaining loops for `memcmp`-able things either.
313+
impl<A: PartialOrd + UnsignedBytewiseOrd> SliceChain for A {
314+
#[inline]
315+
fn chaining_lt(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
316+
match SliceOrd::compare(left, right) {
317+
Ordering::Equal => ControlFlow::Continue(()),
318+
ne => ControlFlow::Break(ne.is_lt()),
319+
}
320+
}
321+
#[inline]
322+
fn chaining_le(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
323+
match SliceOrd::compare(left, right) {
324+
Ordering::Equal => ControlFlow::Continue(()),
325+
ne => ControlFlow::Break(ne.is_le()),
326+
}
327+
}
328+
#[inline]
329+
fn chaining_gt(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
330+
match SliceOrd::compare(left, right) {
331+
Ordering::Equal => ControlFlow::Continue(()),
332+
ne => ControlFlow::Break(ne.is_gt()),
333+
}
334+
}
335+
#[inline]
336+
fn chaining_ge(left: &[Self], right: &[Self]) -> ControlFlow<bool> {
337+
match SliceOrd::compare(left, right) {
338+
Ordering::Equal => ControlFlow::Continue(()),
339+
ne => ControlFlow::Break(ne.is_ge()),
340+
}
341+
}
342+
}
343+
228344
pub(super) trait SliceContains: Sized {
229345
fn slice_contains(&self, x: &[Self]) -> bool;
230346
}

library/core/src/tuple.rs

+28-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
use crate::cmp::Ordering::{self, *};
44
use crate::marker::{ConstParamTy_, StructuralPartialEq, UnsizedConstParamTy};
5-
use crate::ops::ControlFlow::{Break, Continue};
5+
use crate::ops::ControlFlow::{self, Break, Continue};
66

77
// Recursive macro for implementing n-ary tuple functions and operations
88
//
@@ -95,6 +95,22 @@ macro_rules! tuple_impls {
9595
fn gt(&self, other: &($($T,)+)) -> bool {
9696
lexical_ord!(gt, __chaining_gt, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
9797
}
98+
#[inline]
99+
fn __chaining_lt(&self, other: &($($T,)+)) -> ControlFlow<bool> {
100+
lexical_chain!(__chaining_lt, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
101+
}
102+
#[inline]
103+
fn __chaining_le(&self, other: &($($T,)+)) -> ControlFlow<bool> {
104+
lexical_chain!(__chaining_le, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
105+
}
106+
#[inline]
107+
fn __chaining_gt(&self, other: &($($T,)+)) -> ControlFlow<bool> {
108+
lexical_chain!(__chaining_gt, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
109+
}
110+
#[inline]
111+
fn __chaining_ge(&self, other: &($($T,)+)) -> ControlFlow<bool> {
112+
lexical_chain!(__chaining_ge, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
113+
}
98114
}
99115
}
100116

@@ -187,6 +203,17 @@ macro_rules! lexical_ord {
187203
};
188204
}
189205

206+
// Same parameter interleaving as `lexical_ord` above
207+
macro_rules! lexical_chain {
208+
($chain_rel: ident, $a:expr, $b:expr $(,$rest_a:expr, $rest_b:expr)*) => {{
209+
PartialOrd::$chain_rel(&$a, &$b)?;
210+
lexical_chain!($chain_rel $(,$rest_a, $rest_b)*)
211+
}};
212+
($chain_rel: ident) => {
213+
Continue(())
214+
};
215+
}
216+
190217
macro_rules! lexical_partial_cmp {
191218
($a:expr, $b:expr, $($rest_a:expr, $rest_b:expr),+) => {
192219
match ($a).partial_cmp(&$b) {

tests/codegen/array-cmp.rs

+49-6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// Ensure the asm for array comparisons is properly optimized.
22

33
//@ compile-flags: -C opt-level=2
4+
//@ needs-deterministic-layouts (checks depend on tuple layout)
45

56
#![crate_type = "lib"]
67

@@ -19,13 +20,55 @@ pub fn compare() -> bool {
1920
}
2021

2122
// CHECK-LABEL: @array_of_tuple_le
22-
// CHECK: call{{.+}}i8 @llvm.scmp.i8.i16
23-
// CHECK: call{{.+}}i8 @llvm.ucmp.i8.i16
24-
// CHECK: call{{.+}}i8 @llvm.scmp.i8.i16
25-
// CHECK: call{{.+}}i8 @llvm.ucmp.i8.i16
26-
// CHECK: %[[RET:.+]] = icmp slt i8 {{.+}}, 1
27-
// CHECK: ret i8 %[[RET]]
2823
#[no_mangle]
2924
pub fn array_of_tuple_le(a: &[(i16, u16); 2], b: &[(i16, u16); 2]) -> bool {
25+
// Ensure that, after all the optimizations have run, the happy path just checks
26+
// `eq` on each corresponding pair and moves onto the next one if it is.
27+
// Then there's a dedup'd comparison for the place that's different.
28+
// (As opposed to, say, running a full `[su]cmp` as part of checking equality.)
29+
30+
// This is written quite specifically because different library code was triggering
31+
// <https://github.com/llvm/llvm-project/issues/132678> along the way, so this
32+
// has enough checks to make sure that's not happening. It doesn't need to be
33+
// *exactly* this IR, but be careful if you ever need to update these checks.
34+
35+
// CHECK: start:
36+
// CHECK: %[[A00:.+]] = load i16, ptr %a
37+
// CHECK: %[[B00:.+]] = load i16, ptr %b
38+
// CHECK-NOT: cmp
39+
// CHECK: %[[EQ00:.+]] = icmp eq i16 %[[A00]], %[[B00]]
40+
// CHECK-NEXT: br i1 %[[EQ00]], label %[[L01:.+]], label %[[EXIT_S:.+]]
41+
42+
// CHECK: [[L01]]:
43+
// CHECK: %[[PA01:.+]] = getelementptr{{.+}}i8, ptr %a, i64 2
44+
// CHECK: %[[PB01:.+]] = getelementptr{{.+}}i8, ptr %b, i64 2
45+
// CHECK: %[[A01:.+]] = load i16, ptr %[[PA01]]
46+
// CHECK: %[[B01:.+]] = load i16, ptr %[[PB01]]
47+
// CHECK-NOT: cmp
48+
// CHECK: %[[EQ01:.+]] = icmp eq i16 %[[A01]], %[[B01]]
49+
// CHECK-NEXT: br i1 %[[EQ01]], label %[[L10:.+]], label %[[EXIT_U:.+]]
50+
51+
// CHECK: [[L10]]:
52+
// CHECK: %[[PA10:.+]] = getelementptr{{.+}}i8, ptr %a, i64 4
53+
// CHECK: %[[PB10:.+]] = getelementptr{{.+}}i8, ptr %b, i64 4
54+
// CHECK: %[[A10:.+]] = load i16, ptr %[[PA10]]
55+
// CHECK: %[[B10:.+]] = load i16, ptr %[[PB10]]
56+
// CHECK-NOT: cmp
57+
// CHECK: %[[EQ10:.+]] = icmp eq i16 %[[A10]], %[[B10]]
58+
// CHECK-NEXT: br i1 %[[EQ10]], label %[[L11:.+]], label %[[EXIT_S]]
59+
60+
// CHECK: [[L11]]:
61+
// CHECK: %[[PA11:.+]] = getelementptr{{.+}}i8, ptr %a, i64 6
62+
// CHECK: %[[PB11:.+]] = getelementptr{{.+}}i8, ptr %b, i64 6
63+
// CHECK: %[[A11:.+]] = load i16, ptr %[[PA11]]
64+
// CHECK: %[[B11:.+]] = load i16, ptr %[[PB11]]
65+
// CHECK-NOT: cmp
66+
// CHECK: %[[EQ11:.+]] = icmp eq i16 %[[A11]], %[[B11]]
67+
// CHECK-NEXT: br i1 %[[EQ11]], label %[[DONE:.+]], label %[[EXIT_U]]
68+
69+
// CHECK: [[DONE]]:
70+
// CHECK: %[[RET:.+]] = phi i1 [ %{{.+}}, %[[EXIT_S]] ], [ %{{.+}}, %[[EXIT_U]] ], [ true, %[[L11]] ]
71+
// CHECK: ret i1 %[[RET]]
72+
3073
a <= b
3174
}

0 commit comments

Comments
 (0)