@@ -291,6 +291,9 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
291291void PassManagerBuilder::addFunctionSimplificationPasses (
292292 legacy::PassManagerBase &MPM) {
293293 // Start of function pass.
294+
295+ // **** Initial canonicalization sequence, clean up the code after inlining.
296+
294297 // Break up aggregate allocas, using SSAUpdater.
295298 MPM.add (createSROAPass ());
296299 MPM.add (createEarlyCSEPass ()); // Catch trivial redundancies
@@ -299,21 +302,62 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
299302 MPM.add (createJumpThreadingPass ()); // Thread jumps.
300303 MPM.add (createCorrelatedValuePropagationPass ()); // Propagate conditionals
301304 MPM.add (createCFGSimplificationPass ()); // Merge & remove BBs
302- // Combine silly seq's
303- addInstructionCombiningPass (MPM);
305+
306+ // **** After we have taken out the trash, we can do more expensive and
307+ // aggressive optimizations.
308+ //
309+ // I have no idea what is the best order of these passes.
310+ //
311+ // ($) there might be some `br i1 false` here that InstCombine discovered
312+ // that we might want to kill somehow. However, every SimplifyCfg or
313+ // JumpThreading I add takes 1% of the compiler's performance even if
314+ // it does nothing.
315+ //
316+ // I believe we could have some sort of "br i1 false"-removal pass
317+ // in strategic places, that should not be too slow. Ideally, in
318+ // 90% of the inter-pass transitions the pass would have
319+ // nothing to do and therefore be fast (there's an O(N*M) problem,
320+ // where for a large function we might get hit with the full
321+ // cost). That needs to be further investigated.
322+
323+ addInstructionCombiningPass (MPM); // Combine silly seq's
304324 if (SizeLevel == 0 && !DisableLibCallsShrinkWrap)
305325 MPM.add (createLibCallsShrinkWrapPass ());
306326 addExtensionsToPM (EP_Peephole, MPM);
307-
308327 MPM.add (createTailCallEliminationPass ()); // Eliminate tail calls
328+ if (OptLevel > 1 ) {
329+ // Merge duplicate loads and do cross-BB load/store forwarding. This should
330+ // happen before the loop passes. This is done earlier than in C++ because
331+ // these optimizations are much more useful in Rust, because of noalias.
332+ MPM.add (NewGVN ? createNewGVNPass ()
333+ : createGVNPass (DisableGVNLoadPRE)); // Remove redundancies
334+ }
309335 MPM.add (createCFGSimplificationPass ()); // Merge & remove BBs
336+
337+ // **** Loop optimizations. There are 2 loop optimization "sequences",
338+ // with an InstCombine+SimplifyCfg in the middle.
339+
340+ // Seq #1
341+
310342 MPM.add (createReassociatePass ()); // Reassociate expressions
311343 // Rotate Loop - disable header duplication at -Oz
312344 MPM.add (createLoopRotatePass (SizeLevel == 2 ? 0 : -1 ));
313345 MPM.add (createLICMPass ()); // Hoist loop invariants
346+ MPM.add (createIndVarSimplifyPass ()); // Simplify Indvars
314347 MPM.add (createLoopUnswitchPass (SizeLevel || OptLevel < 3 ));
348+
349+ // Cleanup between seqs.
350+
315351 MPM.add (createCFGSimplificationPass ());
316352 addInstructionCombiningPass (MPM);
353+
354+ // Seq #2
355+
356+ // I am intentionally duplicating IndVarSimplify. The SimplifyCfg pass after
357+ // the first IndVarSimplify gets rid of a bunch of junk that interferes
358+ // with loop idiom recognition, and the second IndVarSimplify was present
359+ // in C++ so I don't want to remove it much.
360+
317361 MPM.add (createIndVarSimplifyPass ()); // Canonicalize indvars
318362 MPM.add (createLoopIdiomPass ()); // Recognize idioms like memset.
319363 MPM.add (createLoopDeletionPass ()); // Delete dead loops
@@ -325,25 +369,31 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
325369 MPM.add (createSimpleLoopUnrollPass ()); // Unroll small loops
326370 addExtensionsToPM (EP_LoopOptimizerEnd, MPM);
327371
372+ // End of loop optimization sequence.
373+
374+ // Optimization sequences I know we need:
375+ // UNROLL -> SIMPLIFY -> MEMCPYOPT -> INSTCOMBINE -> GVN - needed for
376+
377+ // Exit out of LCSSA, and do some cleanup after loop unrolling.
378+ MPM.add (createCFGSimplificationPass ());
379+
380+ MPM.add (createMemCpyOptPass ()); // Remove memcpy / form memset
328381 if (OptLevel > 1 ) {
329382 if (EnableMLSM)
330383 MPM.add (createMergedLoadStoreMotionPass ()); // Merge ld/st in diamonds
331384 MPM.add (NewGVN ? createNewGVNPass ()
332385 : createGVNPass (DisableGVNLoadPRE)); // Remove redundancies
333386 }
334- MPM.add (createMemCpyOptPass ()); // Remove memcpy / form memset
335387 MPM.add (createSCCPPass ()); // Constant prop with SCCP
336388
337389 // Delete dead bit computations (instcombine runs after to fold away the dead
338390 // computations, and then ADCE will run later to exploit any new DCE
339391 // opportunities that creates).
340392 MPM.add (createBitTrackingDCEPass ()); // Delete dead bit computations
341-
342393 // Run instcombine after redundancy elimination to exploit opportunities
343394 // opened up by them.
344395 addInstructionCombiningPass (MPM);
345- if (OptLevel > 1 )
346- MPM.add (createGVNPass (DisableGVNLoadPRE)); // Remove redundancies
396+
347397 addExtensionsToPM (EP_Peephole, MPM);
348398 MPM.add (createJumpThreadingPass ()); // Thread jumps
349399 MPM.add (createCorrelatedValuePropagationPass ());
0 commit comments