@@ -329,27 +329,34 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
329329; CHECK-NEXT: .cfi_offset w29, -32
330330; CHECK-NEXT: addvl sp, sp, #-18
331331; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG
332- ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
333- ; CHECK-NEXT: ptrue pn8.b
334332; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
335- ; CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
336- ; CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
337333; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
338- ; CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
339- ; CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #16, mul vl] // 32-byte Folded Spill
340334; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
341- ; CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #20, mul vl] // 32-byte Folded Spill
342- ; CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
343335; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
344- ; CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #28, mul vl] // 32-byte Folded Spill
345336; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
346337; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
347338; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
339+ ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
348340; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
349341; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
350342; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
351343; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
352- ; CHECK-NEXT: st1b { z8.b, z9.b }, pn8, [sp, #32, mul vl] // 32-byte Folded Spill
344+ ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
345+ ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
346+ ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
347+ ; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
348+ ; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
349+ ; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
350+ ; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
351+ ; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
352+ ; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
353+ ; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
354+ ; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
355+ ; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
356+ ; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
357+ ; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
358+ ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
359+ ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
353360; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG
354361; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 32 - 16 * VG
355362; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 32 - 24 * VG
@@ -371,16 +378,23 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
371378; CHECK-NEXT: .cfi_restore vg
372379; CHECK-NEXT: addvl sp, sp, #1
373380; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG
374- ; CHECK-NEXT: ptrue pn8.b
381+ ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
382+ ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
383+ ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
384+ ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
385+ ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
386+ ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
387+ ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
388+ ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
389+ ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
390+ ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
391+ ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
392+ ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
393+ ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
394+ ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
395+ ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
396+ ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
375397; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
376- ; CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
377- ; CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
378- ; CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
379- ; CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #16, mul vl] // 32-byte Folded Reload
380- ; CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #20, mul vl] // 32-byte Folded Reload
381- ; CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
382- ; CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #28, mul vl] // 32-byte Folded Reload
383- ; CHECK-NEXT: ld1b { z8.b, z9.b }, pn8/z, [sp, #32, mul vl] // 32-byte Folded Reload
384398; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
385399; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
386400; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
@@ -424,27 +438,34 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
424438; FP-CHECK-NEXT: .cfi_offset w30, -40
425439; FP-CHECK-NEXT: .cfi_offset w29, -48
426440; FP-CHECK-NEXT: addvl sp, sp, #-18
427- ; FP-CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
428- ; FP-CHECK-NEXT: ptrue pn8.b
429441; FP-CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
430- ; FP-CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
431- ; FP-CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
432442; FP-CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
433- ; FP-CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
434- ; FP-CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #16, mul vl] // 32-byte Folded Spill
435443; FP-CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
436- ; FP-CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #20, mul vl] // 32-byte Folded Spill
437- ; FP-CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
438444; FP-CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
439- ; FP-CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #28, mul vl] // 32-byte Folded Spill
440445; FP-CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
441446; FP-CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
442447; FP-CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
448+ ; FP-CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
443449; FP-CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
444450; FP-CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
445451; FP-CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
446452; FP-CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
447- ; FP-CHECK-NEXT: st1b { z8.b, z9.b }, pn8, [sp, #32, mul vl] // 32-byte Folded Spill
453+ ; FP-CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
454+ ; FP-CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
455+ ; FP-CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
456+ ; FP-CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
457+ ; FP-CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
458+ ; FP-CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
459+ ; FP-CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
460+ ; FP-CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
461+ ; FP-CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
462+ ; FP-CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
463+ ; FP-CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
464+ ; FP-CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
465+ ; FP-CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
466+ ; FP-CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
467+ ; FP-CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
468+ ; FP-CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
448469; FP-CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 48 - 8 * VG
449470; FP-CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG
450471; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 48 - 24 * VG
@@ -464,16 +485,23 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
464485; FP-CHECK-NEXT: smstart sm
465486; FP-CHECK-NEXT: .cfi_restore vg
466487; FP-CHECK-NEXT: addvl sp, sp, #1
467- ; FP-CHECK-NEXT: ptrue pn8.b
488+ ; FP-CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
489+ ; FP-CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
490+ ; FP-CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
491+ ; FP-CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
492+ ; FP-CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
493+ ; FP-CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
494+ ; FP-CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
495+ ; FP-CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
496+ ; FP-CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
497+ ; FP-CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
498+ ; FP-CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
499+ ; FP-CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
500+ ; FP-CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
501+ ; FP-CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
502+ ; FP-CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
503+ ; FP-CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
468504; FP-CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
469- ; FP-CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
470- ; FP-CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
471- ; FP-CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
472- ; FP-CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #16, mul vl] // 32-byte Folded Reload
473- ; FP-CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #20, mul vl] // 32-byte Folded Reload
474- ; FP-CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
475- ; FP-CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #28, mul vl] // 32-byte Folded Reload
476- ; FP-CHECK-NEXT: ld1b { z8.b, z9.b }, pn8/z, [sp, #32, mul vl] // 32-byte Folded Reload
477505; FP-CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
478506; FP-CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
479507; FP-CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
0 commit comments