Commit 62efba8
committed
Auto merge of #90755 - scottmcm:spec-array-clone, r=jackh726
Specialize array cloning for Copy types
Because after PR 86041, the optimizer no longer load-merges at the LLVM IR level, which might be part of the perf loss. (I'll run perf and see if this makes a difference.)
Also I added a codegen test so this hopefully won't regress in future -- it passes on stable and with my change here, but not on the 2021-11-09 nightly.
Example on current nightly: <https://play.rust-lang.org/?version=nightly&mode=release&edition=2021&gist=1f52d46fb8fc3ca3ac9f097390085ffa>
```rust
type T = u8;
const N: usize = 3;
pub fn demo_clone(x: &[T; N]) -> [T; N] {
x.clone()
}
pub fn demo_copy(x: &[T; N]) -> [T; N] {
*x
}
```
```llvm-ir
; playground::demo_clone
; Function Attrs: mustprogress nofree nosync nounwind nonlazybind uwtable willreturn
define i24 `@_ZN10playground10demo_clone17h98a4f11453d1a753E([3` x i8]* noalias nocapture readonly align 1 dereferenceable(3) %x) unnamed_addr #0 personality i32 (i32, i32, i64, %"unwind::libunwind::_Unwind_Exception"*, %"unwind::libunwind::_Unwind_Context"*)* `@rust_eh_personality` {
start:
%0 = getelementptr [3 x i8], [3 x i8]* %x, i64 0, i64 0
%1 = getelementptr inbounds [3 x i8], [3 x i8]* %x, i64 0, i64 1
%.val.i.i.i.i.i.i.i.i.i = load i8, i8* %0, align 1, !alias.scope !2, !noalias !9
%2 = getelementptr inbounds [3 x i8], [3 x i8]* %x, i64 0, i64 2
%.val.i.i.i.i.i.1.i.i.i.i = load i8, i8* %1, align 1, !alias.scope !2, !noalias !20
%.val.i.i.i.i.i.2.i.i.i.i = load i8, i8* %2, align 1, !alias.scope !2, !noalias !23
%array.sroa.6.0.insert.ext.i.i.i.i = zext i8 %.val.i.i.i.i.i.2.i.i.i.i to i32
%array.sroa.6.0.insert.shift.i.i.i.i = shl nuw nsw i32 %array.sroa.6.0.insert.ext.i.i.i.i, 16
%array.sroa.5.0.insert.ext.i.i.i.i = zext i8 %.val.i.i.i.i.i.1.i.i.i.i to i32
%array.sroa.5.0.insert.shift.i.i.i.i = shl nuw nsw i32 %array.sroa.5.0.insert.ext.i.i.i.i, 8
%array.sroa.0.0.insert.ext.i.i.i.i = zext i8 %.val.i.i.i.i.i.i.i.i.i to i32
%array.sroa.5.0.insert.insert.i.i.i.i = or i32 %array.sroa.5.0.insert.shift.i.i.i.i, %array.sroa.0.0.insert.ext.i.i.i.i
%array.sroa.0.0.insert.insert.i.i.i.i = or i32 %array.sroa.5.0.insert.insert.i.i.i.i, %array.sroa.6.0.insert.shift.i.i.i.i
%.sroa.4.0.extract.trunc.i.i.i.i = trunc i32 %array.sroa.0.0.insert.insert.i.i.i.i to i24
ret i24 %.sroa.4.0.extract.trunc.i.i.i.i
}
; playground::demo_copy
; Function Attrs: mustprogress nofree norecurse nosync nounwind nonlazybind readonly uwtable willreturn
define i24 `@_ZN10playground9demo_copy17h7817453f9291d746E([3` x i8]* noalias nocapture readonly align 1 dereferenceable(3) %x) unnamed_addr #1 {
start:
%.sroa.0.0..sroa_cast = bitcast [3 x i8]* %x to i24*
%.sroa.0.0.copyload = load i24, i24* %.sroa.0.0..sroa_cast, align 1
ret i24 %.sroa.0.0.copyload
}
```File tree
2 files changed
+39
-3
lines changed- library/core/src/array
- src/test/codegen
2 files changed
+39
-3
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
339 | 339 | | |
340 | 340 | | |
341 | 341 | | |
342 | | - | |
343 | | - | |
344 | | - | |
| 342 | + | |
345 | 343 | | |
346 | 344 | | |
347 | 345 | | |
| |||
350 | 348 | | |
351 | 349 | | |
352 | 350 | | |
| 351 | + | |
| 352 | + | |
| 353 | + | |
| 354 | + | |
| 355 | + | |
| 356 | + | |
| 357 | + | |
| 358 | + | |
| 359 | + | |
| 360 | + | |
| 361 | + | |
| 362 | + | |
| 363 | + | |
| 364 | + | |
| 365 | + | |
| 366 | + | |
| 367 | + | |
| 368 | + | |
| 369 | + | |
| 370 | + | |
| 371 | + | |
| 372 | + | |
| 373 | + | |
353 | 374 | | |
354 | 375 | | |
355 | 376 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
| 1 | + | |
| 2 | + | |
| 3 | + | |
| 4 | + | |
| 5 | + | |
| 6 | + | |
| 7 | + | |
| 8 | + | |
| 9 | + | |
| 10 | + | |
| 11 | + | |
| 12 | + | |
| 13 | + | |
| 14 | + | |
| 15 | + | |
0 commit comments