11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s --check-prefix=CHECK-128
3- ; RUN: llc -mtriple=aarch64_be-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-max=128 < %s | not grep -e ldr -e str
3+ ; RUN: llc -mtriple=aarch64_be-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s --check-prefix=CHECK-BE-128
44
55define <vscale x 16 x i8 > @ld_nxv16i8 (ptr %0 ) {
66; CHECK-128-LABEL: ld_nxv16i8:
77; CHECK-128: // %bb.0:
88; CHECK-128-NEXT: ldr q0, [x0]
99; CHECK-128-NEXT: ret
10+ ;
11+ ; CHECK-BE-128-LABEL: ld_nxv16i8:
12+ ; CHECK-BE-128: // %bb.0:
13+ ; CHECK-BE-128-NEXT: ptrue p0.b
14+ ; CHECK-BE-128-NEXT: ld1b { z0.b }, p0/z, [x0]
15+ ; CHECK-BE-128-NEXT: ret
1016 %2 = load <vscale x 16 x i8 >, ptr %0 , align 1
1117 ret <vscale x 16 x i8 > %2
1218}
@@ -16,6 +22,12 @@ define void @st_nxv16i8(ptr %0, <vscale x 16 x i8> %1) {
1622; CHECK-128: // %bb.0:
1723; CHECK-128-NEXT: str q0, [x0]
1824; CHECK-128-NEXT: ret
25+ ;
26+ ; CHECK-BE-128-LABEL: st_nxv16i8:
27+ ; CHECK-BE-128: // %bb.0:
28+ ; CHECK-BE-128-NEXT: ptrue p0.b
29+ ; CHECK-BE-128-NEXT: st1b { z0.b }, p0, [x0]
30+ ; CHECK-BE-128-NEXT: ret
1931 store <vscale x 16 x i8 > %1 , ptr %0 , align 1
2032 ret void
2133}
@@ -25,6 +37,12 @@ define <vscale x 8 x i16> @ld_nxv8i16(ptr %0) {
2537; CHECK-128: // %bb.0:
2638; CHECK-128-NEXT: ldr q0, [x0]
2739; CHECK-128-NEXT: ret
40+ ;
41+ ; CHECK-BE-128-LABEL: ld_nxv8i16:
42+ ; CHECK-BE-128: // %bb.0:
43+ ; CHECK-BE-128-NEXT: ptrue p0.h
44+ ; CHECK-BE-128-NEXT: ld1h { z0.h }, p0/z, [x0]
45+ ; CHECK-BE-128-NEXT: ret
2846 %2 = load <vscale x 8 x i16 >, ptr %0 , align 2
2947 ret <vscale x 8 x i16 > %2
3048}
@@ -34,6 +52,12 @@ define void @st_nxv8i16(ptr %0, <vscale x 8 x i16> %1) {
3452; CHECK-128: // %bb.0:
3553; CHECK-128-NEXT: str q0, [x0]
3654; CHECK-128-NEXT: ret
55+ ;
56+ ; CHECK-BE-128-LABEL: st_nxv8i16:
57+ ; CHECK-BE-128: // %bb.0:
58+ ; CHECK-BE-128-NEXT: ptrue p0.h
59+ ; CHECK-BE-128-NEXT: st1h { z0.h }, p0, [x0]
60+ ; CHECK-BE-128-NEXT: ret
3761 store <vscale x 8 x i16 > %1 , ptr %0 , align 2
3862 ret void
3963}
@@ -43,6 +67,12 @@ define <vscale x 4 x i32> @ld_nxv4i32(ptr %0) {
4367; CHECK-128: // %bb.0:
4468; CHECK-128-NEXT: ldr q0, [x0]
4569; CHECK-128-NEXT: ret
70+ ;
71+ ; CHECK-BE-128-LABEL: ld_nxv4i32:
72+ ; CHECK-BE-128: // %bb.0:
73+ ; CHECK-BE-128-NEXT: ptrue p0.s
74+ ; CHECK-BE-128-NEXT: ld1w { z0.s }, p0/z, [x0]
75+ ; CHECK-BE-128-NEXT: ret
4676 %2 = load <vscale x 4 x i32 >, ptr %0 , align 4
4777 ret <vscale x 4 x i32 > %2
4878}
@@ -52,6 +82,12 @@ define void @st_nxv4i32(ptr %0, <vscale x 4 x i32> %1) {
5282; CHECK-128: // %bb.0:
5383; CHECK-128-NEXT: str q0, [x0]
5484; CHECK-128-NEXT: ret
85+ ;
86+ ; CHECK-BE-128-LABEL: st_nxv4i32:
87+ ; CHECK-BE-128: // %bb.0:
88+ ; CHECK-BE-128-NEXT: ptrue p0.s
89+ ; CHECK-BE-128-NEXT: st1w { z0.s }, p0, [x0]
90+ ; CHECK-BE-128-NEXT: ret
5591 store <vscale x 4 x i32 > %1 , ptr %0 , align 4
5692 ret void
5793}
@@ -61,6 +97,12 @@ define <vscale x 2 x i64> @ld_nxv2i64(ptr %0) {
6197; CHECK-128: // %bb.0:
6298; CHECK-128-NEXT: ldr q0, [x0]
6399; CHECK-128-NEXT: ret
100+ ;
101+ ; CHECK-BE-128-LABEL: ld_nxv2i64:
102+ ; CHECK-BE-128: // %bb.0:
103+ ; CHECK-BE-128-NEXT: ptrue p0.d
104+ ; CHECK-BE-128-NEXT: ld1d { z0.d }, p0/z, [x0]
105+ ; CHECK-BE-128-NEXT: ret
64106 %2 = load <vscale x 2 x i64 >, ptr %0 , align 8
65107 ret <vscale x 2 x i64 > %2
66108}
@@ -70,6 +112,12 @@ define void @st_nxv2i64(ptr %0, <vscale x 2 x i64> %1) {
70112; CHECK-128: // %bb.0:
71113; CHECK-128-NEXT: str q0, [x0]
72114; CHECK-128-NEXT: ret
115+ ;
116+ ; CHECK-BE-128-LABEL: st_nxv2i64:
117+ ; CHECK-BE-128: // %bb.0:
118+ ; CHECK-BE-128-NEXT: ptrue p0.d
119+ ; CHECK-BE-128-NEXT: st1d { z0.d }, p0, [x0]
120+ ; CHECK-BE-128-NEXT: ret
73121 store <vscale x 2 x i64 > %1 , ptr %0 , align 8
74122 ret void
75123}
@@ -79,6 +127,12 @@ define <vscale x 8 x half> @ld_nxv8f16(ptr %0) {
79127; CHECK-128: // %bb.0:
80128; CHECK-128-NEXT: ldr q0, [x0]
81129; CHECK-128-NEXT: ret
130+ ;
131+ ; CHECK-BE-128-LABEL: ld_nxv8f16:
132+ ; CHECK-BE-128: // %bb.0:
133+ ; CHECK-BE-128-NEXT: ptrue p0.h
134+ ; CHECK-BE-128-NEXT: ld1h { z0.h }, p0/z, [x0]
135+ ; CHECK-BE-128-NEXT: ret
82136 %2 = load <vscale x 8 x half >, ptr %0 , align 2
83137 ret <vscale x 8 x half > %2
84138}
@@ -88,6 +142,12 @@ define void @st_nxv8f16(ptr %0, <vscale x 8 x half> %1) {
88142; CHECK-128: // %bb.0:
89143; CHECK-128-NEXT: str q0, [x0]
90144; CHECK-128-NEXT: ret
145+ ;
146+ ; CHECK-BE-128-LABEL: st_nxv8f16:
147+ ; CHECK-BE-128: // %bb.0:
148+ ; CHECK-BE-128-NEXT: ptrue p0.h
149+ ; CHECK-BE-128-NEXT: st1h { z0.h }, p0, [x0]
150+ ; CHECK-BE-128-NEXT: ret
91151 store <vscale x 8 x half > %1 , ptr %0 , align 2
92152 ret void
93153}
@@ -97,6 +157,12 @@ define <vscale x 4 x float> @ld_nxv4f32(ptr %0) {
97157; CHECK-128: // %bb.0:
98158; CHECK-128-NEXT: ldr q0, [x0]
99159; CHECK-128-NEXT: ret
160+ ;
161+ ; CHECK-BE-128-LABEL: ld_nxv4f32:
162+ ; CHECK-BE-128: // %bb.0:
163+ ; CHECK-BE-128-NEXT: ptrue p0.s
164+ ; CHECK-BE-128-NEXT: ld1w { z0.s }, p0/z, [x0]
165+ ; CHECK-BE-128-NEXT: ret
100166 %2 = load <vscale x 4 x float >, ptr %0 , align 4
101167 ret <vscale x 4 x float > %2
102168}
@@ -106,6 +172,12 @@ define void @st_nxv4f32(ptr %0, <vscale x 4 x float> %1) {
106172; CHECK-128: // %bb.0:
107173; CHECK-128-NEXT: str q0, [x0]
108174; CHECK-128-NEXT: ret
175+ ;
176+ ; CHECK-BE-128-LABEL: st_nxv4f32:
177+ ; CHECK-BE-128: // %bb.0:
178+ ; CHECK-BE-128-NEXT: ptrue p0.s
179+ ; CHECK-BE-128-NEXT: st1w { z0.s }, p0, [x0]
180+ ; CHECK-BE-128-NEXT: ret
109181 store <vscale x 4 x float > %1 , ptr %0 , align 4
110182 ret void
111183}
@@ -115,6 +187,12 @@ define <vscale x 2 x double> @ld_nxv2f64(ptr %0) {
115187; CHECK-128: // %bb.0:
116188; CHECK-128-NEXT: ldr q0, [x0]
117189; CHECK-128-NEXT: ret
190+ ;
191+ ; CHECK-BE-128-LABEL: ld_nxv2f64:
192+ ; CHECK-BE-128: // %bb.0:
193+ ; CHECK-BE-128-NEXT: ptrue p0.d
194+ ; CHECK-BE-128-NEXT: ld1d { z0.d }, p0/z, [x0]
195+ ; CHECK-BE-128-NEXT: ret
118196 %2 = load <vscale x 2 x double >, ptr %0 , align 8
119197 ret <vscale x 2 x double > %2
120198}
@@ -124,6 +202,38 @@ define void @st_nxv2f64(ptr %0, <vscale x 2 x double> %1) {
124202; CHECK-128: // %bb.0:
125203; CHECK-128-NEXT: str q0, [x0]
126204; CHECK-128-NEXT: ret
205+ ;
206+ ; CHECK-BE-128-LABEL: st_nxv2f64:
207+ ; CHECK-BE-128: // %bb.0:
208+ ; CHECK-BE-128-NEXT: ptrue p0.d
209+ ; CHECK-BE-128-NEXT: st1d { z0.d }, p0, [x0]
210+ ; CHECK-BE-128-NEXT: ret
127211 store <vscale x 2 x double > %1 , ptr %0 , align 8
128212 ret void
129213}
214+
215+ ; Test LDP/STP fold.
216+ define void @ldp_stp_nxv16i8_offset (ptr %ldptr , ptr %stptr ) {
217+ ; CHECK-128-LABEL: ldp_stp_nxv16i8_offset:
218+ ; CHECK-128: // %bb.0:
219+ ; CHECK-128-NEXT: ldp q0, q1, [x0, #-16]
220+ ; CHECK-128-NEXT: stp q0, q1, [x1, #-16]
221+ ; CHECK-128-NEXT: ret
222+ ;
223+ ; CHECK-BE-128-LABEL: ldp_stp_nxv16i8_offset:
224+ ; CHECK-BE-128: // %bb.0:
225+ ; CHECK-BE-128-NEXT: ptrue p0.b
226+ ; CHECK-BE-128-NEXT: mov x8, #-16 // =0xfffffffffffffff0
227+ ; CHECK-BE-128-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
228+ ; CHECK-BE-128-NEXT: ld1b { z1.b }, p0/z, [x0]
229+ ; CHECK-BE-128-NEXT: st1b { z0.b }, p0, [x1, x8]
230+ ; CHECK-BE-128-NEXT: st1b { z1.b }, p0, [x1]
231+ ; CHECK-BE-128-NEXT: ret
232+ %ldptr.1 = getelementptr inbounds i8 , ptr %ldptr , i64 -16
233+ %ld1 = load <vscale x 16 x i8 >, ptr %ldptr.1 , align 1
234+ %ld2 = load <vscale x 16 x i8 >, ptr %ldptr , align 1
235+ %stptr.1 = getelementptr inbounds i8 , ptr %stptr , i64 -16
236+ store <vscale x 16 x i8 > %ld1 , ptr %stptr.1 , align 1
237+ store <vscale x 16 x i8 > %ld2 , ptr %stptr , align 1
238+ ret void
239+ }
0 commit comments