Change inlining to favor three underlying division functions

AaronKutch · AaronKutch · commit 16fe7ae2ce57 · 2020-07-28T13:47:33.000-05:00
diff --git a/src/int/specialized_div_rem/mod.rs b/src/int/specialized_div_rem/mod.rs
@@ -113,6 +113,13 @@ fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) {
     zero_div_fn()
 }
 
+// `inline(never)` is placed on unsigned division functions so that there are just three division
+// functions (`u32_div_rem`, `u64_div_rem`, and `u128_div_rem`) backing all `compiler-builtins`
+// division functions. The signed functions like `i32_div_rem` will get inlined into the
+// `compiler-builtins` signed division functions, so that they directly call the three division
+// functions. Otherwise, LLVM may try to inline the unsigned division functions 4 times into the
+// signed division functions, which results in an explosion in code size.
+
 // Whether `trifecta` or `delegate` is faster for 128 bit division depends on the speed at which a
 // microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is
 // faster if the target pointer width is at least 64.
@@ -129,7 +136,9 @@ impl_trifecta!(
     u32,
     u64,
     u128,
-    i128,;
+    i128,
+    inline(never);
+    inline
 );
 
 // If the pointer width less than 64, then the target architecture almost certainly does not have
@@ -148,7 +157,9 @@ impl_delegate!(
     u32,
     u64,
     u128,
-    i128,;
+    i128,
+    inline(never);
+    inline
 );
 
 /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@@ -190,7 +201,9 @@ impl_asymmetric!(
     u32,
     u64,
     u128,
-    i128,;
+    i128,
+    inline(never);
+    inline
 );
 
 /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@@ -223,7 +236,9 @@ impl_delegate!(
     u16,
     u32,
     u64,
-    i64,;
+    i64,
+    inline(never);
+    inline
 );
 
 // When not on x86 and the pointer width is 64, use `binary_long`.
@@ -238,7 +253,9 @@ impl_binary_long!(
     u64_normalization_shift,
     64,
     u64,
-    i64,;
+    i64,
+    inline(never);
+    inline
 );
 
 /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@@ -280,7 +297,9 @@ impl_asymmetric!(
     u16,
     u32,
     u64,
-    i64,;
+    i64,
+    inline(never);
+    inline
 );
 
 // 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division
@@ -291,5 +310,7 @@ impl_binary_long!(
     u32_normalization_shift,
     32,
     u32,
-    i32,;
+    i32,
+    inline(never);
+    inline
 );