@@ -113,6 +113,13 @@ fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) {
113113 zero_div_fn ( )
114114}
115115
116+ // `inline(never)` is placed on unsigned division functions so that there are just three division
117+ // functions (`u32_div_rem`, `u64_div_rem`, and `u128_div_rem`) backing all `compiler-builtins`
118+ // division functions. The signed functions like `i32_div_rem` will get inlined into the
119+ // `compiler-builtins` signed division functions, so that they directly call the three division
120+ // functions. Otherwise, LLVM may try to inline the unsigned division functions 4 times into the
121+ // signed division functions, which results in an explosion in code size.
122+
116123// Whether `trifecta` or `delegate` is faster for 128 bit division depends on the speed at which a
117124// microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is
118125// faster if the target pointer width is at least 64.
@@ -129,7 +136,9 @@ impl_trifecta!(
129136 u32 ,
130137 u64 ,
131138 u128 ,
132- i128 , ;
139+ i128 ,
140+ inline( never) ;
141+ inline
133142) ;
134143
135144// If the pointer width less than 64, then the target architecture almost certainly does not have
@@ -148,7 +157,9 @@ impl_delegate!(
148157 u32 ,
149158 u64 ,
150159 u128 ,
151- i128 , ;
160+ i128 ,
161+ inline( never) ;
162+ inline
152163) ;
153164
154165/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@@ -190,7 +201,9 @@ impl_asymmetric!(
190201 u32 ,
191202 u64 ,
192203 u128 ,
193- i128 , ;
204+ i128 ,
205+ inline( never) ;
206+ inline
194207) ;
195208
196209/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@@ -223,7 +236,9 @@ impl_delegate!(
223236 u16 ,
224237 u32 ,
225238 u64 ,
226- i64 , ;
239+ i64 ,
240+ inline( never) ;
241+ inline
227242) ;
228243
229244// When not on x86 and the pointer width is 64, use `binary_long`.
@@ -238,7 +253,9 @@ impl_binary_long!(
238253 u64_normalization_shift,
239254 64 ,
240255 u64 ,
241- i64 , ;
256+ i64 ,
257+ inline( never) ;
258+ inline
242259) ;
243260
244261/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@@ -280,7 +297,9 @@ impl_asymmetric!(
280297 u16 ,
281298 u32 ,
282299 u64 ,
283- i64 , ;
300+ i64 ,
301+ inline( never) ;
302+ inline
284303) ;
285304
286305// 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division
@@ -291,5 +310,7 @@ impl_binary_long!(
291310 u32_normalization_shift,
292311 32 ,
293312 u32 ,
294- i32 , ;
313+ i32 ,
314+ inline( never) ;
315+ inline
295316) ;
0 commit comments