Skip to content

Commit 10e35be

Browse files
author
kvp
committed
[RISCV] Allow large div peephole optimization for minsize
When `minsize` function attribute is set, division of large integers by power-of-2 are not optimized as it's expected by ExpandLargeDivRem pass, which results to compiler crash
1 parent f7f6f6d commit 10e35be

File tree

2 files changed

+153
-1
lines changed

2 files changed

+153
-1
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24828,7 +24828,7 @@ bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
2482824828
// instruction, as it is usually smaller than the alternative sequence.
2482924829
// TODO: Add vector division?
2483024830
bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
24831-
return OptSize && !VT.isVector();
24831+
return OptSize && !VT.isVector() && VT.getSizeInBits() <= 128;
2483224832
}
2483324833

2483424834
bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const {

llvm/test/CodeGen/RISCV/div_minsize.ll

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,155 @@ define i32 @testsize4(i32 %x) minsize nounwind {
6868
%div = udiv i32 %x, 33
6969
ret i32 %div
7070
}
71+
72+
define i128 @i128_sdiv(i128 %arg0) minsize nounwind {
73+
; RV32IM-LABEL: i128_sdiv:
74+
; RV32IM: # %bb.0:
75+
; RV32IM-NEXT: addi sp, sp, -64
76+
; RV32IM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
77+
; RV32IM-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
78+
; RV32IM-NEXT: lw a3, 0(a1)
79+
; RV32IM-NEXT: lw a4, 4(a1)
80+
; RV32IM-NEXT: lw a5, 8(a1)
81+
; RV32IM-NEXT: lw a6, 12(a1)
82+
; RV32IM-NEXT: mv s0, a0
83+
; RV32IM-NEXT: li a7, 4
84+
; RV32IM-NEXT: addi a0, sp, 40
85+
; RV32IM-NEXT: addi a1, sp, 24
86+
; RV32IM-NEXT: addi a2, sp, 8
87+
; RV32IM-NEXT: sw a7, 8(sp)
88+
; RV32IM-NEXT: sw zero, 12(sp)
89+
; RV32IM-NEXT: sw zero, 16(sp)
90+
; RV32IM-NEXT: sw zero, 20(sp)
91+
; RV32IM-NEXT: sw a3, 24(sp)
92+
; RV32IM-NEXT: sw a4, 28(sp)
93+
; RV32IM-NEXT: sw a5, 32(sp)
94+
; RV32IM-NEXT: sw a6, 36(sp)
95+
; RV32IM-NEXT: call __divti3
96+
; RV32IM-NEXT: lw a0, 40(sp)
97+
; RV32IM-NEXT: lw a1, 44(sp)
98+
; RV32IM-NEXT: lw a2, 48(sp)
99+
; RV32IM-NEXT: lw a3, 52(sp)
100+
; RV32IM-NEXT: sw a0, 0(s0)
101+
; RV32IM-NEXT: sw a1, 4(s0)
102+
; RV32IM-NEXT: sw a2, 8(s0)
103+
; RV32IM-NEXT: sw a3, 12(s0)
104+
; RV32IM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
105+
; RV32IM-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
106+
; RV32IM-NEXT: addi sp, sp, 64
107+
; RV32IM-NEXT: ret
108+
;
109+
; RV64IM-LABEL: i128_sdiv:
110+
; RV64IM: # %bb.0:
111+
; RV64IM-NEXT: addi sp, sp, -16
112+
; RV64IM-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
113+
; RV64IM-NEXT: li a2, 4
114+
; RV64IM-NEXT: li a3, 0
115+
; RV64IM-NEXT: call __divti3
116+
; RV64IM-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
117+
; RV64IM-NEXT: addi sp, sp, 16
118+
; RV64IM-NEXT: ret
119+
%div = sdiv i128 %arg0, 4
120+
ret i128 %div
121+
}
122+
123+
define i256 @i256_sdiv(i256 %arg0) minsize nounwind {
124+
; RV32IM-LABEL: i256_sdiv:
125+
; RV32IM: # %bb.0:
126+
; RV32IM-NEXT: lw a5, 16(a1)
127+
; RV32IM-NEXT: lw a4, 20(a1)
128+
; RV32IM-NEXT: lw a2, 24(a1)
129+
; RV32IM-NEXT: lw a3, 28(a1)
130+
; RV32IM-NEXT: lw a6, 0(a1)
131+
; RV32IM-NEXT: lw a7, 4(a1)
132+
; RV32IM-NEXT: lw t0, 8(a1)
133+
; RV32IM-NEXT: lw t1, 12(a1)
134+
; RV32IM-NEXT: srai a1, a3, 31
135+
; RV32IM-NEXT: srli a1, a1, 30
136+
; RV32IM-NEXT: add a1, a6, a1
137+
; RV32IM-NEXT: sltu t2, a1, a6
138+
; RV32IM-NEXT: add a6, a7, t2
139+
; RV32IM-NEXT: sltu a7, a6, a7
140+
; RV32IM-NEXT: and t2, t2, a7
141+
; RV32IM-NEXT: add a7, t0, t2
142+
; RV32IM-NEXT: sltu t3, a7, t0
143+
; RV32IM-NEXT: add t0, t1, t3
144+
; RV32IM-NEXT: beqz t2, .LBB5_2
145+
; RV32IM-NEXT: # %bb.1:
146+
; RV32IM-NEXT: sltu t1, t0, t1
147+
; RV32IM-NEXT: and t2, t3, t1
148+
; RV32IM-NEXT: .LBB5_2:
149+
; RV32IM-NEXT: add t2, a5, t2
150+
; RV32IM-NEXT: srli t1, t0, 2
151+
; RV32IM-NEXT: srli t3, a7, 2
152+
; RV32IM-NEXT: slli t0, t0, 30
153+
; RV32IM-NEXT: slli a7, a7, 30
154+
; RV32IM-NEXT: or t0, t3, t0
155+
; RV32IM-NEXT: srli t3, a6, 2
156+
; RV32IM-NEXT: srli a1, a1, 2
157+
; RV32IM-NEXT: slli a6, a6, 30
158+
; RV32IM-NEXT: sltu a5, t2, a5
159+
; RV32IM-NEXT: or a7, t3, a7
160+
; RV32IM-NEXT: srli t3, t2, 2
161+
; RV32IM-NEXT: slli t2, t2, 30
162+
; RV32IM-NEXT: or a1, a1, a6
163+
; RV32IM-NEXT: add a6, a4, a5
164+
; RV32IM-NEXT: or t1, t1, t2
165+
; RV32IM-NEXT: sltu a4, a6, a4
166+
; RV32IM-NEXT: srli t2, a6, 2
167+
; RV32IM-NEXT: slli a6, a6, 30
168+
; RV32IM-NEXT: sw a1, 0(a0)
169+
; RV32IM-NEXT: sw a7, 4(a0)
170+
; RV32IM-NEXT: sw t0, 8(a0)
171+
; RV32IM-NEXT: sw t1, 12(a0)
172+
; RV32IM-NEXT: and a4, a5, a4
173+
; RV32IM-NEXT: or a1, t3, a6
174+
; RV32IM-NEXT: add a4, a2, a4
175+
; RV32IM-NEXT: srli a5, a4, 2
176+
; RV32IM-NEXT: sltu a2, a4, a2
177+
; RV32IM-NEXT: slli a4, a4, 30
178+
; RV32IM-NEXT: add a2, a3, a2
179+
; RV32IM-NEXT: or a3, t2, a4
180+
; RV32IM-NEXT: slli a4, a2, 30
181+
; RV32IM-NEXT: srai a2, a2, 2
182+
; RV32IM-NEXT: or a4, a5, a4
183+
; RV32IM-NEXT: sw a1, 16(a0)
184+
; RV32IM-NEXT: sw a3, 20(a0)
185+
; RV32IM-NEXT: sw a4, 24(a0)
186+
; RV32IM-NEXT: sw a2, 28(a0)
187+
; RV32IM-NEXT: ret
188+
;
189+
; RV64IM-LABEL: i256_sdiv:
190+
; RV64IM: # %bb.0:
191+
; RV64IM-NEXT: ld a2, 24(a1)
192+
; RV64IM-NEXT: ld a3, 16(a1)
193+
; RV64IM-NEXT: ld a4, 0(a1)
194+
; RV64IM-NEXT: ld a1, 8(a1)
195+
; RV64IM-NEXT: srai a5, a2, 63
196+
; RV64IM-NEXT: srli a5, a5, 62
197+
; RV64IM-NEXT: add a5, a4, a5
198+
; RV64IM-NEXT: sltu a4, a5, a4
199+
; RV64IM-NEXT: srli a5, a5, 2
200+
; RV64IM-NEXT: add a6, a1, a4
201+
; RV64IM-NEXT: sltu a1, a6, a1
202+
; RV64IM-NEXT: and a1, a4, a1
203+
; RV64IM-NEXT: srli a4, a6, 2
204+
; RV64IM-NEXT: slli a6, a6, 62
205+
; RV64IM-NEXT: or a5, a5, a6
206+
; RV64IM-NEXT: add a1, a3, a1
207+
; RV64IM-NEXT: srli a6, a1, 2
208+
; RV64IM-NEXT: sltu a3, a1, a3
209+
; RV64IM-NEXT: slli a1, a1, 62
210+
; RV64IM-NEXT: add a2, a2, a3
211+
; RV64IM-NEXT: or a1, a4, a1
212+
; RV64IM-NEXT: slli a3, a2, 62
213+
; RV64IM-NEXT: srai a2, a2, 2
214+
; RV64IM-NEXT: or a3, a6, a3
215+
; RV64IM-NEXT: sd a5, 0(a0)
216+
; RV64IM-NEXT: sd a1, 8(a0)
217+
; RV64IM-NEXT: sd a3, 16(a0)
218+
; RV64IM-NEXT: sd a2, 24(a0)
219+
; RV64IM-NEXT: ret
220+
%div = sdiv i256 %arg0, 4
221+
ret i256 %div
222+
}

0 commit comments

Comments
 (0)