Skip to content

Commit 2f9f763

Browse files
committed
Make slice iterators carry only a single provenance
Today they carry two, which makes certain optimizations illegal at the LLVM-IR level. In particular, it makes it matter whether an operation is done from the start pointer or the end pointer, since as far as LLVM knows those might have different provenance. For example, this code ```rust pub unsafe fn first_via_nth_back(mut it: std::slice::Iter<'_, i8>) -> &i8 { // CHECK: ret ptr %0 let len = it.len(); it.nth_back(len - 1).unwrap_unchecked() } ``` is <https://rust.godbolt.org/z/8e61vqzhP> ```llvm %2 = ptrtoint ptr %1 to i64 %3 = ptrtoint ptr %0 to i64 %.neg = add i64 %3, 1 %_6.neg = sub i64 %.neg, %2 %_15.i6.i = getelementptr inbounds i8, ptr %1, i64 %_6.neg %_15.i.i = getelementptr inbounds i8, ptr %_15.i6.i, i64 -1 ret ptr %_15.i.i ``` whereas after this PR it's just ```llvm ret ptr %0 ``` (some `assume`s removed in both cases)
1 parent 3014e79 commit 2f9f763

15 files changed

+641
-1145
lines changed

library/core/src/slice/iter.rs

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ use crate::iter::{
1111
use crate::marker::PhantomData;
1212
use crate::mem::{self, SizedTypeProperties};
1313
use crate::num::NonZero;
14-
use crate::ptr::{NonNull, without_provenance, without_provenance_mut};
15-
use crate::{cmp, fmt};
14+
use crate::ptr::{self, NonNull};
15+
use crate::{cmp, fmt, intrinsics};
1616

1717
#[stable(feature = "boxed_slice_into_iter", since = "1.80.0")]
1818
impl<T> !Iterator for [T] {}
@@ -72,10 +72,14 @@ pub struct Iter<'a, T: 'a> {
7272
///
7373
/// This address will be used for all ZST elements, never changed.
7474
ptr: NonNull<T>,
75-
/// For non-ZSTs, the non-null pointer to the past-the-end element.
75+
/// For non-ZSTs, the address of the past-the-end element. This is
76+
/// intentionally *not* a pointer, so that it doesn't carry provenance.
77+
/// If you're turning this into a pointer, you need to use the provenance from
78+
/// `ptr` instead. (If this carried provenance, the compiler wouldn't know
79+
/// that reads from the start and the end are actually the same provenance.)
7680
///
77-
/// For ZSTs, this is `ptr::without_provenance_mut(len)`.
78-
end_or_len: *const T,
81+
/// For ZSTs, this is the length.
82+
end_addr_or_len: usize,
7983
_marker: PhantomData<&'a T>,
8084
}
8185

@@ -98,10 +102,9 @@ impl<'a, T> Iter<'a, T> {
98102
let ptr: NonNull<T> = NonNull::from_ref(slice).cast();
99103
// SAFETY: Similar to `IterMut::new`.
100104
unsafe {
101-
let end_or_len =
102-
if T::IS_ZST { without_provenance(len) } else { ptr.as_ptr().add(len) };
105+
let end_addr_or_len = if T::IS_ZST { len } else { addr_usize(ptr.add(len)) };
103106

104-
Self { ptr, end_or_len, _marker: PhantomData }
107+
Self { ptr, end_addr_or_len, _marker: PhantomData }
105108
}
106109
}
107110

@@ -153,7 +156,7 @@ iterator! {struct Iter -> *const T, &'a T, const, {/* no mut */}, as_ref, {
153156
impl<T> Clone for Iter<'_, T> {
154157
#[inline]
155158
fn clone(&self) -> Self {
156-
Iter { ptr: self.ptr, end_or_len: self.end_or_len, _marker: self._marker }
159+
Iter { ptr: self.ptr, end_addr_or_len: self.end_addr_or_len, _marker: self._marker }
157160
}
158161
}
159162

@@ -197,10 +200,14 @@ pub struct IterMut<'a, T: 'a> {
197200
///
198201
/// This address will be used for all ZST elements, never changed.
199202
ptr: NonNull<T>,
200-
/// For non-ZSTs, the non-null pointer to the past-the-end element.
203+
/// For non-ZSTs, the address of the past-the-end element. This is
204+
/// intentionally *not* a pointer, so that it doesn't carry provenance.
205+
/// If you're turning this into a pointer, you need to use the provenance from
206+
/// `ptr` instead. (If this carried provenance, the compiler wouldn't know
207+
/// that reads from the start and the end are actually the same provenance.)
201208
///
202-
/// For ZSTs, this is `ptr::without_provenance_mut(len)`.
203-
end_or_len: *mut T,
209+
/// For ZSTs, this is the length.
210+
end_addr_or_len: usize,
204211
_marker: PhantomData<&'a mut T>,
205212
}
206213

@@ -238,10 +245,9 @@ impl<'a, T> IterMut<'a, T> {
238245
// See the `next_unchecked!` and `is_empty!` macros as well as the
239246
// `post_inc_start` method for more information.
240247
unsafe {
241-
let end_or_len =
242-
if T::IS_ZST { without_provenance_mut(len) } else { ptr.as_ptr().add(len) };
248+
let end_addr_or_len = if T::IS_ZST { len } else { addr_usize(ptr.add(len)) };
243249

244-
Self { ptr, end_or_len, _marker: PhantomData }
250+
Self { ptr, end_addr_or_len, _marker: PhantomData }
245251
}
246252
}
247253

@@ -3478,3 +3484,12 @@ impl<'a, T: 'a + fmt::Debug, P> fmt::Debug for ChunkByMut<'a, T, P> {
34783484
f.debug_struct("ChunkByMut").field("slice", &self.slice).finish()
34793485
}
34803486
}
3487+
3488+
/// Same as `p.addr().get()`, but faster to compile by avoiding a bunch of
3489+
/// intermediate steps and unneeded UB checks, which also inlines better.
3490+
#[inline]
3491+
const fn addr_usize<T>(p: NonNull<T>) -> usize {
3492+
// SAFETY: `NonNull` for a sized type has the same layout as `usize`,
3493+
// and we intentionally don't want to expose here.
3494+
unsafe { intrinsics::transmute(p) }
3495+
}

library/core/src/slice/iter/macros.rs

Lines changed: 52 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,64 @@
11
//! Macros used by iterators of slice.
22
3-
/// Convenience & performance macro for consuming the `end_or_len` field, by
3+
/// Convenience macro for updating the `end_addr_or_len` field for non-ZSTs.
4+
macro_rules! set_end {
5+
($this:ident . end = $new_end:expr) => {{
6+
$this.end_addr_or_len = addr_usize($new_end);
7+
}};
8+
}
9+
10+
/// Convenience & performance macro for consuming the `end_addr_or_len` field, by
411
/// giving a `(&mut) usize` or `(&mut) NonNull<T>` depending whether `T` is
512
/// or is not a ZST respectively.
613
///
7-
/// Internally, this reads the `end` through a pointer-to-`NonNull` so that
8-
/// it'll get the appropriate non-null metadata in the backend without needing
9-
/// to call `assume` manually.
14+
/// When giving a `NonNull<T>` for the end, it creates it by offsetting from the
15+
/// `ptr` so that the backend knows that both pointers have the same provenance.
1016
macro_rules! if_zst {
1117
(mut $this:ident, $len:ident => $zst_body:expr, $end:ident => $other_body:expr,) => {{
1218
#![allow(unused_unsafe)] // we're sometimes used within an unsafe block
1319

20+
let ptr = $this.ptr;
1421
if T::IS_ZST {
15-
// SAFETY: for ZSTs, the pointer is storing a provenance-free length,
16-
// so consuming and updating it as a `usize` is fine.
17-
let $len = unsafe { &mut *(&raw mut $this.end_or_len).cast::<usize>() };
22+
let $len = &mut $this.end_addr_or_len;
1823
$zst_body
1924
} else {
20-
// SAFETY: for non-ZSTs, the type invariant ensures it cannot be null
21-
let $end = unsafe { &mut *(&raw mut $this.end_or_len).cast::<NonNull<T>>() };
25+
let $len;
26+
#[allow(unused_unsafe)] // we're sometimes used within an unsafe block
27+
// SAFETY: By type invariant `end >= ptr`, and thus the subtraction
28+
// cannot overflow, and the iter represents a single allocated
29+
// object so the `add` will also be in-range.
30+
let $end = unsafe {
31+
// Need to load as `NonNull` to get `!nonnull` metadata
32+
// (Transmuting gets an assume instead)
33+
let end: NonNull<T> = *ptr::addr_of!($this.end_addr_or_len).cast();
34+
// Not using `with_addr` because we have ordering information that
35+
// we can take advantage of here that `with_addr` cannot.
36+
$len = intrinsics::ptr_offset_from_unsigned(end.as_ptr(), ptr.as_ptr());
37+
ptr.add($len)
38+
};
2239
$other_body
2340
}
2441
}};
2542
($this:ident, $len:ident => $zst_body:expr, $end:ident => $other_body:expr,) => {{
26-
#![allow(unused_unsafe)] // we're sometimes used within an unsafe block
27-
2843
if T::IS_ZST {
29-
let $len = $this.end_or_len.addr();
44+
let $len = $this.end_addr_or_len;
3045
$zst_body
3146
} else {
32-
// SAFETY: for non-ZSTs, the type invariant ensures it cannot be null
33-
let $end = unsafe { mem::transmute::<*const T, NonNull<T>>($this.end_or_len) };
47+
let $len;
48+
#[allow(unused_unsafe)] // we're sometimes used within an unsafe block
49+
// SAFETY: By type invariant `end >= ptr`, and thus the subtraction
50+
// cannot overflow, and the iter represents a single allocated
51+
// object so the `add` will also be in-range.
52+
let $end = unsafe {
53+
let ptr = $this.ptr;
54+
// Need to load as `NonNull` to get `!nonnull` metadata
55+
// (Transmuting gets an assume instead)
56+
let end: NonNull<T> = *ptr::addr_of!($this.end_addr_or_len).cast();
57+
// Not using `with_addr` because we have ordering information that
58+
// we can take advantage of here that `with_addr` cannot.
59+
$len = intrinsics::ptr_offset_from_unsigned(end.as_ptr(), ptr.as_ptr());
60+
ptr.add($len)
61+
};
3462
$other_body
3563
}
3664
}};
@@ -50,12 +78,7 @@ macro_rules! len {
5078
($self: ident) => {{
5179
if_zst!($self,
5280
len => len,
53-
end => {
54-
// To get rid of some bounds checks (see `position`), we use ptr_sub instead of
55-
// offset_from (Tested by `codegen/slice-position-bounds-check`.)
56-
// SAFETY: by the type invariant pointers are aligned and `start <= end`
57-
unsafe { end.offset_from_unsigned($self.ptr) }
58-
},
81+
_end => len,
5982
)
6083
}};
6184
}
@@ -128,8 +151,9 @@ macro_rules! iterator {
128151
// which is guaranteed to not overflow an `isize`. Also, the resulting pointer
129152
// is in bounds of `slice`, which fulfills the other requirements for `offset`.
130153
end => unsafe {
131-
*end = end.sub(offset);
132-
*end
154+
let new_end = end.sub(offset);
155+
set_end!(self.end = new_end);
156+
new_end
133157
},
134158
)
135159
}
@@ -158,25 +182,24 @@ macro_rules! iterator {
158182
// one of the most mono'd things in the library.
159183

160184
let ptr = self.ptr;
161-
let end_or_len = self.end_or_len;
162185
// SAFETY: See inner comments. (For some reason having multiple
163186
// block breaks inlining this -- if you can fix that please do!)
164187
unsafe {
165188
if T::IS_ZST {
166-
let len = end_or_len.addr();
189+
let len = self.end_addr_or_len;
167190
if len == 0 {
168191
return None;
169192
}
170193
// SAFETY: just checked that it's not zero, so subtracting one
171194
// cannot wrap. (Ideally this would be `checked_sub`, which
172195
// does the same thing internally, but as of 2025-02 that
173196
// doesn't optimize quite as small in MIR.)
174-
self.end_or_len = without_provenance_mut(len.unchecked_sub(1));
197+
self.end_addr_or_len = intrinsics::unchecked_sub(len, 1);
175198
} else {
176-
// SAFETY: by type invariant, the `end_or_len` field is always
199+
// SAFETY: by type invariant, the `end_addr_or_len` field is always
177200
// non-null for a non-ZST pointee. (This transmute ensures we
178201
// get `!nonnull` metadata on the load of the field.)
179-
if ptr == crate::intrinsics::transmute::<$ptr, NonNull<T>>(end_or_len) {
202+
if ptr == *(&raw const self.end_addr_or_len).cast::<NonNull<T>>() {
180203
return None;
181204
}
182205
// SAFETY: since it's not empty, per the check above, moving
@@ -207,7 +230,7 @@ macro_rules! iterator {
207230
// This iterator is now empty.
208231
if_zst!(mut self,
209232
len => *len = 0,
210-
end => self.ptr = *end,
233+
end => self.ptr = end,
211234
);
212235
return None;
213236
}
@@ -432,7 +455,7 @@ macro_rules! iterator {
432455
// This iterator is now empty.
433456
if_zst!(mut self,
434457
len => *len = 0,
435-
end => *end = self.ptr,
458+
_end => set_end!(self.end = self.ptr),
436459
);
437460
return None;
438461
}

tests/codegen/issues/issue-37945.rs

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,32 +3,33 @@
33

44
// Check that LLVM understands that `Iter` pointer is not null. Issue #37945.
55

6+
// There used to be a comparison against `null`, so we check that it's not there
7+
// and that the appropriate parameter metadata is.
8+
69
#![crate_type = "lib"]
710

811
use std::slice::Iter;
912

1013
#[no_mangle]
1114
pub fn is_empty_1(xs: Iter<f32>) -> bool {
12-
// CHECK-LABEL: @is_empty_1(
13-
// CHECK-NEXT: start:
14-
// CHECK-NEXT: [[A:%.*]] = icmp ne ptr {{%xs.0|%xs.1}}, null
15-
// CHECK-NEXT: tail call void @llvm.assume(i1 [[A]])
16-
// The order between %xs.0 and %xs.1 on the next line doesn't matter
17-
// and different LLVM versions produce different order.
18-
// CHECK-NEXT: [[B:%.*]] = icmp eq ptr {{%xs.0, %xs.1|%xs.1, %xs.0}}
19-
// CHECK-NEXT: ret i1 [[B:%.*]]
15+
// CHECK-LABEL: @is_empty_1
16+
// CHECK-SAME: (ptr noundef nonnull{{.*}}%xs.0, {{i32|i64}}{{.+}}%xs.1)
17+
18+
// CHECK-NOT: null
19+
// CHECK-NOT: i32 0
20+
// CHECK-NOT: i64 0
21+
2022
{ xs }.next().is_none()
2123
}
2224

2325
#[no_mangle]
2426
pub fn is_empty_2(xs: Iter<f32>) -> bool {
2527
// CHECK-LABEL: @is_empty_2
26-
// CHECK-NEXT: start:
27-
// CHECK-NEXT: [[C:%.*]] = icmp ne ptr {{%xs.0|%xs.1}}, null
28-
// CHECK-NEXT: tail call void @llvm.assume(i1 [[C]])
29-
// The order between %xs.0 and %xs.1 on the next line doesn't matter
30-
// and different LLVM versions produce different order.
31-
// CHECK-NEXT: [[D:%.*]] = icmp eq ptr {{%xs.0, %xs.1|%xs.1, %xs.0}}
32-
// CHECK-NEXT: ret i1 [[D:%.*]]
28+
// CHECK-SAME: (ptr noundef nonnull{{.*}}%xs.0, {{i32|i64}}{{.+}}%xs.1)
29+
30+
// CHECK-NOT: null
31+
// CHECK-NOT: i32 0
32+
// CHECK-NOT: i64 0
33+
3334
xs.map(|&x| x).next().is_none()
3435
}

tests/codegen/slice-iter-len-eq-zero.rs

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
//@ compile-flags: -Copt-level=3
2+
//@ only-64bit
23
//@ needs-deterministic-layouts (opposite scalar pair orders breaks it)
34
#![crate_type = "lib"]
45

@@ -7,8 +8,11 @@ type Demo = [u8; 3];
78
// CHECK-LABEL: @slice_iter_len_eq_zero
89
#[no_mangle]
910
pub fn slice_iter_len_eq_zero(y: std::slice::Iter<'_, Demo>) -> bool {
10-
// CHECK-NOT: sub
11-
// CHECK: %[[RET:.+]] = icmp eq ptr {{%y.0, %y.1|%y.1, %y.0}}
11+
// CHECK: start:
12+
// CHECK: %[[NOT_ZERO:.+]] = icmp ne i64 %1, 0
13+
// CHECK: call void @llvm.assume(i1 %[[NOT_ZERO]])
14+
// CHECK: %[[PTR_ADDR:.+]] = ptrtoint ptr %0 to i64
15+
// CHECK: %[[RET:.+]] = icmp eq i64 %1, %[[PTR_ADDR]]
1216
// CHECK: ret i1 %[[RET]]
1317
y.len() == 0
1418
}
@@ -21,7 +25,7 @@ pub fn slice_iter_len_eq_zero_ref(y: &mut std::slice::Iter<'_, Demo>) -> bool {
2125
// CHECK-SAME: !nonnull
2226
// CHECK: %[[B:.+]] = load ptr
2327
// CHECK-SAME: !nonnull
24-
// CHECK: %[[RET:.+]] = icmp eq ptr %[[A]], %[[B]]
28+
// CHECK: %[[RET:.+]] = icmp eq ptr %[[B]], %[[A]]
2529
// CHECK: ret i1 %[[RET]]
2630
y.len() == 0
2731
}
@@ -31,17 +35,17 @@ struct MyZST;
3135
// CHECK-LABEL: @slice_zst_iter_len_eq_zero
3236
#[no_mangle]
3337
pub fn slice_zst_iter_len_eq_zero(y: std::slice::Iter<'_, MyZST>) -> bool {
34-
// CHECK: %[[RET:.+]] = icmp eq ptr %y.1, null
38+
// CHECK: %[[RET:.+]] = icmp eq i64 %y.1, 0
3539
// CHECK: ret i1 %[[RET]]
3640
y.len() == 0
3741
}
3842

3943
// CHECK-LABEL: @slice_zst_iter_len_eq_zero_ref
4044
#[no_mangle]
4145
pub fn slice_zst_iter_len_eq_zero_ref(y: &mut std::slice::Iter<'_, MyZST>) -> bool {
42-
// CHECK: %[[LEN:.+]] = load ptr
43-
// CHECK-NOT: !nonnull
44-
// CHECK: %[[RET:.+]] = icmp eq ptr %[[LEN]], null
46+
// CHECK: %[[LEN:.+]] = load i64
47+
// CHECK-NOT: !range
48+
// CHECK: %[[RET:.+]] = icmp eq i64 %[[LEN]], 0
4549
// CHECK: ret i1 %[[RET]]
4650
y.len() == 0
4751
}

0 commit comments

Comments
 (0)