Commit 4f184eb
authored
Rollup merge of rust-lang#48012 - scottmcm:faster-rangeinclusive-fold, r=alexcrichton
Override try_[r]fold for RangeInclusive
Because the last item needs special handling, it seems that LLVM has trouble canonicalizing the loops in external iteration. With the override, it becomes obvious that the start==end case exits the loop (as opposed to the one *after* that exiting the loop in external iteration).
Demo adapted from rust-lang#45222
```rust
#[no_mangle]
pub fn foo3r(n: u64) -> u64 {
let mut count = 0;
(0..n).for_each(|_| {
(0 ..= n).rev().for_each(|j| {
count += j;
})
});
count
}
```
<details>
<summary>Current nightly ASM, 100 lines (https://play.rust-lang.org/?gist=f5674c702c6e2045c3aab5d03763e5f6&version=nightly&mode=release)</summary>
```asm
foo3r:
pushq %rbx
.Lcfi0:
.Lcfi1:
testq %rdi, %rdi
je .LBB0_1
testb $1, %dil
jne .LBB0_4
xorl %eax, %eax
xorl %r8d, %r8d
cmpq $1, %rdi
jne .LBB0_11
jmp .LBB0_23
.LBB0_1:
xorl %eax, %eax
popq %rbx
retq
.LBB0_4:
xorl %r8d, %r8d
movq $-1, %r9
xorl %eax, %eax
movq %rdi, %r11
xorl %r10d, %r10d
jmp .LBB0_5
.LBB0_8:
addq %r11, %rax
movq %rsi, %r11
movq %rdx, %r10
.LBB0_5:
cmpq %r11, %r10
movl $1, %ecx
cmovbq %r9, %rcx
cmoveq %r8, %rcx
testq %rcx, %rcx
movl $0, %esi
movl $1, %edx
je .LBB0_8
cmpq $-1, %rcx
jne .LBB0_9
leaq -1(%r11), %rsi
movq %r10, %rdx
jmp .LBB0_8
.LBB0_9:
movl $1, %r8d
cmpq $1, %rdi
je .LBB0_23
.LBB0_11:
xorl %r9d, %r9d
movq $-1, %r10
.LBB0_12:
movq %rdi, %rsi
xorl %r11d, %r11d
jmp .LBB0_13
.LBB0_16:
addq %rsi, %rax
movq %rcx, %rsi
movq %rbx, %r11
.LBB0_13:
cmpq %rsi, %r11
movl $1, %edx
cmovbq %r10, %rdx
cmoveq %r9, %rdx
testq %rdx, %rdx
movl $0, %ecx
movl $1, %ebx
je .LBB0_16
cmpq $-1, %rdx
jne .LBB0_17
leaq -1(%rsi), %rcx
movq %r11, %rbx
jmp .LBB0_16
.LBB0_17:
movq %rdi, %rcx
xorl %r11d, %r11d
jmp .LBB0_18
.LBB0_21:
addq %rcx, %rax
movq %rsi, %rcx
movq %rbx, %r11
.LBB0_18:
cmpq %rcx, %r11
movl $1, %edx
cmovbq %r10, %rdx
cmoveq %r9, %rdx
testq %rdx, %rdx
movl $0, %esi
movl $1, %ebx
je .LBB0_21
cmpq $-1, %rdx
jne .LBB0_22
leaq -1(%rcx), %rsi
movq %r11, %rbx
jmp .LBB0_21
.LBB0_22:
addq $2, %r8
cmpq %rdi, %r8
jne .LBB0_12
.LBB0_23:
popq %rbx
retq
.Lfunc_end0:
```
</details><br>
With this PR:
```asm
foo3r:
test rcx, rcx
je .LBB3_1
lea r8, [rcx - 1]
lea rdx, [rcx - 2]
mov rax, r8
mul rdx
shld rdx, rax, 63
imul r8, r8
add r8, rcx
sub r8, rdx
imul r8, rcx
mov rax, r8
ret
.LBB3_1:
xor r8d, r8d
mov rax, r8
ret
```2 files changed
+65
-1
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
10 | 10 | | |
11 | 11 | | |
12 | 12 | | |
13 | | - | |
| 13 | + | |
14 | 14 | | |
15 | 15 | | |
16 | 16 | | |
| |||
397 | 397 | | |
398 | 398 | | |
399 | 399 | | |
| 400 | + | |
| 401 | + | |
| 402 | + | |
| 403 | + | |
| 404 | + | |
| 405 | + | |
| 406 | + | |
| 407 | + | |
| 408 | + | |
| 409 | + | |
| 410 | + | |
| 411 | + | |
| 412 | + | |
| 413 | + | |
| 414 | + | |
| 415 | + | |
| 416 | + | |
| 417 | + | |
| 418 | + | |
| 419 | + | |
| 420 | + | |
| 421 | + | |
400 | 422 | | |
401 | 423 | | |
402 | 424 | | |
| |||
418 | 440 | | |
419 | 441 | | |
420 | 442 | | |
| 443 | + | |
| 444 | + | |
| 445 | + | |
| 446 | + | |
| 447 | + | |
| 448 | + | |
| 449 | + | |
| 450 | + | |
| 451 | + | |
| 452 | + | |
| 453 | + | |
| 454 | + | |
| 455 | + | |
| 456 | + | |
| 457 | + | |
| 458 | + | |
| 459 | + | |
| 460 | + | |
| 461 | + | |
| 462 | + | |
| 463 | + | |
| 464 | + | |
421 | 465 | | |
422 | 466 | | |
423 | 467 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
1459 | 1459 | | |
1460 | 1460 | | |
1461 | 1461 | | |
| 1462 | + | |
| 1463 | + | |
| 1464 | + | |
| 1465 | + | |
| 1466 | + | |
| 1467 | + | |
| 1468 | + | |
| 1469 | + | |
| 1470 | + | |
| 1471 | + | |
| 1472 | + | |
| 1473 | + | |
| 1474 | + | |
| 1475 | + | |
| 1476 | + | |
| 1477 | + | |
| 1478 | + | |
| 1479 | + | |
| 1480 | + | |
| 1481 | + | |
1462 | 1482 | | |
1463 | 1483 | | |
1464 | 1484 | | |
| |||
0 commit comments