@@ -99,12 +99,36 @@ define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
9999; ---------------------------------------------------------------------------- ;
100100
101101define i32 @bzhi32_c0 (i32 %val , i32 %numlowbits ) nounwind {
102- ; GCN-LABEL: bzhi32_c0:
102+ ; SI-LABEL: bzhi32_c0:
103+ ; SI: ; %bb.0:
104+ ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105+ ; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
106+ ; SI-NEXT: v_lshr_b32_e32 v1, -1, v1
107+ ; SI-NEXT: v_and_b32_e32 v0, v1, v0
108+ ; SI-NEXT: s_setpc_b64 s[30:31]
109+ ;
110+ ; VI-LABEL: bzhi32_c0:
111+ ; VI: ; %bb.0:
112+ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113+ ; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
114+ ; VI-NEXT: v_lshrrev_b32_e64 v1, v1, -1
115+ ; VI-NEXT: v_and_b32_e32 v0, v1, v0
116+ ; VI-NEXT: s_setpc_b64 s[30:31]
117+ %numhighbits = sub i32 32 , %numlowbits
118+ %mask = lshr i32 -1 , %numhighbits
119+ %masked = and i32 %mask , %val
120+ ret i32 %masked
121+ }
122+
123+ define i32 @bzhi32_c0_clamp (i32 %val , i32 %numlowbits ) nounwind {
124+ ; GCN-LABEL: bzhi32_c0_clamp:
103125; GCN: ; %bb.0:
104126; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127+ ; GCN-NEXT: v_and_b32_e32 v1, 31, v1
105128; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1
106129; GCN-NEXT: s_setpc_b64 s[30:31]
107- %numhighbits = sub i32 32 , %numlowbits
130+ %low5bits = and i32 %numlowbits , 31
131+ %numhighbits = sub i32 32 , %low5bits
108132 %mask = lshr i32 -1 , %numhighbits
109133 %masked = and i32 %mask , %val
110134 ret i32 %masked
@@ -134,11 +158,21 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
134158}
135159
136160define i32 @bzhi32_c4_commutative (i32 %val , i32 %numlowbits ) nounwind {
137- ; GCN-LABEL: bzhi32_c4_commutative:
138- ; GCN: ; %bb.0:
139- ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140- ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1
141- ; GCN-NEXT: s_setpc_b64 s[30:31]
161+ ; SI-LABEL: bzhi32_c4_commutative:
162+ ; SI: ; %bb.0:
163+ ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164+ ; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
165+ ; SI-NEXT: v_lshr_b32_e32 v1, -1, v1
166+ ; SI-NEXT: v_and_b32_e32 v0, v0, v1
167+ ; SI-NEXT: s_setpc_b64 s[30:31]
168+ ;
169+ ; VI-LABEL: bzhi32_c4_commutative:
170+ ; VI: ; %bb.0:
171+ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172+ ; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
173+ ; VI-NEXT: v_lshrrev_b32_e64 v1, v1, -1
174+ ; VI-NEXT: v_and_b32_e32 v0, v0, v1
175+ ; VI-NEXT: s_setpc_b64 s[30:31]
142176 %numhighbits = sub i32 32 , %numlowbits
143177 %mask = lshr i32 -1 , %numhighbits
144178 %masked = and i32 %val , %mask ; swapped order
0 commit comments