11; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2- ; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64-linux-gnu | FileCheck -check-prefixes=CHECK,CHECK-LE %s
3- ; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck -check-prefixes=CHECK,CHECK-LE %s
4- ; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64_be-linux-gnu -data-layout="E-m:o-i64:64-i128:128-n32:64-S128" | FileCheck -check-prefixes=CHECK,CHECK -BE %s
2+ ; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64-linux-gnu | FileCheck -check-prefixes=CHECK-LE-COMMON ,CHECK-LE %s
3+ ; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck -check-prefixes=CHECK-LE-COMMON ,CHECK-LE-SVE %s
4+ ; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64_be-linux-gnu -data-layout="E-m:o-i64:64-i128:128-n32:64-S128" | FileCheck -check-prefixes=CHECK-BE %s
55
66define <2 x i64 > @scalarize_v2i64 (ptr %p , <2 x i1 > %mask , <2 x i64 > %passthru ) {
77; CHECK-LE-LABEL: @scalarize_v2i64(
@@ -28,6 +28,10 @@ define <2 x i64> @scalarize_v2i64(ptr %p, <2 x i1> %mask, <2 x i64> %passthru) {
2828; CHECK-LE-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
2929; CHECK-LE-NEXT: ret <2 x i64> [[RES_PHI_ELSE3]]
3030;
31+ ; CHECK-LE-SVE-LABEL: @scalarize_v2i64(
32+ ; CHECK-LE-SVE-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[P:%.*]], i32 128, <2 x i1> [[MASK:%.*]], <2 x i64> [[PASSTHRU:%.*]])
33+ ; CHECK-LE-SVE-NEXT: ret <2 x i64> [[RET]]
34+ ;
3135; CHECK-BE-LABEL: @scalarize_v2i64(
3236; CHECK-BE-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
3337; CHECK-BE-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], -2
@@ -57,58 +61,83 @@ define <2 x i64> @scalarize_v2i64(ptr %p, <2 x i1> %mask, <2 x i64> %passthru) {
5761}
5862
5963define <2 x i64 > @scalarize_v2i64_ones_mask (ptr %p , <2 x i64 > %passthru ) {
60- ; CHECK-LABEL: @scalarize_v2i64_ones_mask(
61- ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[P:%.*]], align 8
62- ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
64+ ; CHECK-LE-LABEL: @scalarize_v2i64_ones_mask(
65+ ; CHECK-LE-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[P:%.*]], align 8
66+ ; CHECK-LE-NEXT: ret <2 x i64> [[TMP1]]
67+ ;
68+ ; CHECK-LE-SVE-LABEL: @scalarize_v2i64_ones_mask(
69+ ; CHECK-LE-SVE-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[P:%.*]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> [[PASSTHRU:%.*]])
70+ ; CHECK-LE-SVE-NEXT: ret <2 x i64> [[RET]]
71+ ;
72+ ; CHECK-BE-LABEL: @scalarize_v2i64_ones_mask(
73+ ; CHECK-BE-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[P:%.*]], align 8
74+ ; CHECK-BE-NEXT: ret <2 x i64> [[TMP1]]
6375;
6476 %ret = call <2 x i64 > @llvm.masked.load.v2i64.p0 (ptr %p , i32 8 , <2 x i1 > <i1 true , i1 true >, <2 x i64 > %passthru )
6577 ret <2 x i64 > %ret
6678}
6779
6880define <2 x i64 > @scalarize_v2i64_zero_mask (ptr %p , <2 x i64 > %passthru ) {
69- ; CHECK-LABEL: @scalarize_v2i64_zero_mask(
70- ; CHECK-NEXT: ret <2 x i64> [[PASSTHRU:%.*]]
81+ ; CHECK-LE-LABEL: @scalarize_v2i64_zero_mask(
82+ ; CHECK-LE-NEXT: ret <2 x i64> [[PASSTHRU:%.*]]
83+ ;
84+ ; CHECK-LE-SVE-LABEL: @scalarize_v2i64_zero_mask(
85+ ; CHECK-LE-SVE-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[P:%.*]], i32 8, <2 x i1> zeroinitializer, <2 x i64> [[PASSTHRU:%.*]])
86+ ; CHECK-LE-SVE-NEXT: ret <2 x i64> [[RET]]
87+ ;
88+ ; CHECK-BE-LABEL: @scalarize_v2i64_zero_mask(
89+ ; CHECK-BE-NEXT: ret <2 x i64> [[PASSTHRU:%.*]]
7190;
7291 %ret = call <2 x i64 > @llvm.masked.load.v2i64.p0 (ptr %p , i32 8 , <2 x i1 > <i1 false , i1 false >, <2 x i64 > %passthru )
7392 ret <2 x i64 > %ret
7493}
7594
7695define <2 x i64 > @scalarize_v2i64_const_mask (ptr %p , <2 x i64 > %passthru ) {
77- ; CHECK-LABEL: @scalarize_v2i64_const_mask(
78- ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i32 1
79- ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
80- ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP2]], i64 1
81- ; CHECK-NEXT: ret <2 x i64> [[TMP3]]
96+ ; CHECK-LE-LABEL: @scalarize_v2i64_const_mask(
97+ ; CHECK-LE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i32 1
98+ ; CHECK-LE-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
99+ ; CHECK-LE-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP2]], i64 1
100+ ; CHECK-LE-NEXT: ret <2 x i64> [[TMP3]]
101+ ;
102+ ; CHECK-LE-SVE-LABEL: @scalarize_v2i64_const_mask(
103+ ; CHECK-LE-SVE-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[P:%.*]], i32 8, <2 x i1> <i1 false, i1 true>, <2 x i64> [[PASSTHRU:%.*]])
104+ ; CHECK-LE-SVE-NEXT: ret <2 x i64> [[RET]]
105+ ;
106+ ; CHECK-BE-LABEL: @scalarize_v2i64_const_mask(
107+ ; CHECK-BE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i32 1
108+ ; CHECK-BE-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
109+ ; CHECK-BE-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP2]], i64 1
110+ ; CHECK-BE-NEXT: ret <2 x i64> [[TMP3]]
82111;
83112 %ret = call <2 x i64 > @llvm.masked.load.v2i64.p0 (ptr %p , i32 8 , <2 x i1 > <i1 false , i1 true >, <2 x i64 > %passthru )
84113 ret <2 x i64 > %ret
85114}
86115
87116; This use a byte sized but non power of 2 element size. This used to crash due to bad alignment calculation.
88117define <2 x i24 > @scalarize_v2i24 (ptr %p , <2 x i1 > %mask , <2 x i24 > %passthru ) {
89- ; CHECK-LE-LABEL: @scalarize_v2i24(
90- ; CHECK-LE-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
91- ; CHECK-LE-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
92- ; CHECK-LE-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
93- ; CHECK-LE-NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
94- ; CHECK-LE: cond.load:
95- ; CHECK-LE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i24, ptr [[P:%.*]], i32 0
96- ; CHECK-LE-NEXT: [[TMP4:%.*]] = load i24, ptr [[TMP3]], align 1
97- ; CHECK-LE-NEXT: [[TMP5:%.*]] = insertelement <2 x i24> [[PASSTHRU:%.*]], i24 [[TMP4]], i64 0
98- ; CHECK-LE-NEXT: br label [[ELSE]]
99- ; CHECK-LE: else:
100- ; CHECK-LE-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i24> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
101- ; CHECK-LE-NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
102- ; CHECK-LE-NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
103- ; CHECK-LE-NEXT: br i1 [[TMP7]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
104- ; CHECK-LE: cond.load1:
105- ; CHECK-LE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i24, ptr [[P]], i32 1
106- ; CHECK-LE-NEXT: [[TMP9:%.*]] = load i24, ptr [[TMP8]], align 1
107- ; CHECK-LE-NEXT: [[TMP10:%.*]] = insertelement <2 x i24> [[RES_PHI_ELSE]], i24 [[TMP9]], i64 1
108- ; CHECK-LE-NEXT: br label [[ELSE2]]
109- ; CHECK-LE: else2:
110- ; CHECK-LE-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i24> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
111- ; CHECK-LE-NEXT: ret <2 x i24> [[RES_PHI_ELSE3]]
118+ ; CHECK-LE-COMMON- LABEL: @scalarize_v2i24(
119+ ; CHECK-LE-COMMON- NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
120+ ; CHECK-LE-COMMON- NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
121+ ; CHECK-LE-COMMON- NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
122+ ; CHECK-LE-COMMON- NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
123+ ; CHECK-LE-COMMON : cond.load:
124+ ; CHECK-LE-COMMON- NEXT: [[TMP3:%.*]] = getelementptr inbounds i24, ptr [[P:%.*]], i32 0
125+ ; CHECK-LE-COMMON- NEXT: [[TMP4:%.*]] = load i24, ptr [[TMP3]], align 1
126+ ; CHECK-LE-COMMON- NEXT: [[TMP5:%.*]] = insertelement <2 x i24> [[PASSTHRU:%.*]], i24 [[TMP4]], i64 0
127+ ; CHECK-LE-COMMON- NEXT: br label [[ELSE]]
128+ ; CHECK-LE-COMMON : else:
129+ ; CHECK-LE-COMMON- NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i24> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
130+ ; CHECK-LE-COMMON- NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
131+ ; CHECK-LE-COMMON- NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
132+ ; CHECK-LE-COMMON- NEXT: br i1 [[TMP7]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
133+ ; CHECK-LE-COMMON : cond.load1:
134+ ; CHECK-LE-COMMON- NEXT: [[TMP8:%.*]] = getelementptr inbounds i24, ptr [[P]], i32 1
135+ ; CHECK-LE-COMMON- NEXT: [[TMP9:%.*]] = load i24, ptr [[TMP8]], align 1
136+ ; CHECK-LE-COMMON- NEXT: [[TMP10:%.*]] = insertelement <2 x i24> [[RES_PHI_ELSE]], i24 [[TMP9]], i64 1
137+ ; CHECK-LE-COMMON- NEXT: br label [[ELSE2]]
138+ ; CHECK-LE-COMMON : else2:
139+ ; CHECK-LE-COMMON- NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i24> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
140+ ; CHECK-LE-COMMON- NEXT: ret <2 x i24> [[RES_PHI_ELSE3]]
112141;
113142; CHECK-BE-LABEL: @scalarize_v2i24(
114143; CHECK-BE-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
@@ -140,29 +169,29 @@ define <2 x i24> @scalarize_v2i24(ptr %p, <2 x i1> %mask, <2 x i24> %passthru) {
140169
141170; This use a byte sized but non power of 2 element size. This used to crash due to bad alignment calculation.
142171define <2 x i48 > @scalarize_v2i48 (ptr %p , <2 x i1 > %mask , <2 x i48 > %passthru ) {
143- ; CHECK-LE-LABEL: @scalarize_v2i48(
144- ; CHECK-LE-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
145- ; CHECK-LE-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
146- ; CHECK-LE-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
147- ; CHECK-LE-NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
148- ; CHECK-LE: cond.load:
149- ; CHECK-LE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i48, ptr [[P:%.*]], i32 0
150- ; CHECK-LE-NEXT: [[TMP4:%.*]] = load i48, ptr [[TMP3]], align 2
151- ; CHECK-LE-NEXT: [[TMP5:%.*]] = insertelement <2 x i48> [[PASSTHRU:%.*]], i48 [[TMP4]], i64 0
152- ; CHECK-LE-NEXT: br label [[ELSE]]
153- ; CHECK-LE: else:
154- ; CHECK-LE-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i48> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
155- ; CHECK-LE-NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
156- ; CHECK-LE-NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
157- ; CHECK-LE-NEXT: br i1 [[TMP7]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
158- ; CHECK-LE: cond.load1:
159- ; CHECK-LE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i48, ptr [[P]], i32 1
160- ; CHECK-LE-NEXT: [[TMP9:%.*]] = load i48, ptr [[TMP8]], align 2
161- ; CHECK-LE-NEXT: [[TMP10:%.*]] = insertelement <2 x i48> [[RES_PHI_ELSE]], i48 [[TMP9]], i64 1
162- ; CHECK-LE-NEXT: br label [[ELSE2]]
163- ; CHECK-LE: else2:
164- ; CHECK-LE-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i48> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
165- ; CHECK-LE-NEXT: ret <2 x i48> [[RES_PHI_ELSE3]]
172+ ; CHECK-LE-COMMON- LABEL: @scalarize_v2i48(
173+ ; CHECK-LE-COMMON- NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
174+ ; CHECK-LE-COMMON- NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
175+ ; CHECK-LE-COMMON- NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
176+ ; CHECK-LE-COMMON- NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
177+ ; CHECK-LE-COMMON : cond.load:
178+ ; CHECK-LE-COMMON- NEXT: [[TMP3:%.*]] = getelementptr inbounds i48, ptr [[P:%.*]], i32 0
179+ ; CHECK-LE-COMMON- NEXT: [[TMP4:%.*]] = load i48, ptr [[TMP3]], align 2
180+ ; CHECK-LE-COMMON- NEXT: [[TMP5:%.*]] = insertelement <2 x i48> [[PASSTHRU:%.*]], i48 [[TMP4]], i64 0
181+ ; CHECK-LE-COMMON- NEXT: br label [[ELSE]]
182+ ; CHECK-LE-COMMON : else:
183+ ; CHECK-LE-COMMON- NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i48> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
184+ ; CHECK-LE-COMMON- NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
185+ ; CHECK-LE-COMMON- NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
186+ ; CHECK-LE-COMMON- NEXT: br i1 [[TMP7]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
187+ ; CHECK-LE-COMMON : cond.load1:
188+ ; CHECK-LE-COMMON- NEXT: [[TMP8:%.*]] = getelementptr inbounds i48, ptr [[P]], i32 1
189+ ; CHECK-LE-COMMON- NEXT: [[TMP9:%.*]] = load i48, ptr [[TMP8]], align 2
190+ ; CHECK-LE-COMMON- NEXT: [[TMP10:%.*]] = insertelement <2 x i48> [[RES_PHI_ELSE]], i48 [[TMP9]], i64 1
191+ ; CHECK-LE-COMMON- NEXT: br label [[ELSE2]]
192+ ; CHECK-LE-COMMON : else2:
193+ ; CHECK-LE-COMMON- NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i48> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
194+ ; CHECK-LE-COMMON- NEXT: ret <2 x i48> [[RES_PHI_ELSE3]]
166195;
167196; CHECK-BE-LABEL: @scalarize_v2i48(
168197; CHECK-BE-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
0 commit comments