• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s
2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
3
4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
5; WARN-NOT: warning
6
7; NOTE: invalid, upper and lower bound immediate values of the reg+imm
8; addressing mode are checked only for the byte version of each
9; instruction (`st<N>b`), as the code for detecting the immediate is
10; common to all instructions, and varies only for the number of
11; elements of the structured store, which is <N> = 2, 3, 4.
12
13;
14; ST2B
15;
16
17define void @st2b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
18; CHECK-LABEL: st2b_i8_valid_imm:
19; CHECK: st2b { z0.b, z1.b }, p0, [x0, #2, mul vl]
20; CHECK-NEXT: ret
21  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 2, i64 0
22  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
23                                          <vscale x 16 x i8> %v1,
24                                          <vscale x 16 x i1> %pred,
25                                          i8* %base)
26  ret void
27}
28
29define void @st2b_i8_invalid_imm_not_multiple_of_2(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
30; CHECK-LABEL: st2b_i8_invalid_imm_not_multiple_of_2:
31; CHECK: rdvl x[[N:[0-9]+]], #3
32; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x[[N]]]
33; CHECK-NEXT: ret
34  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3, i64 0
35  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
36                                          <vscale x 16 x i8> %v1,
37                                          <vscale x 16 x i1> %pred,
38                                          i8* %base)
39  ret void
40}
41
42define void @st2b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
43; CHECK-LABEL: st2b_i8_invalid_imm_out_of_lower_bound:
44; CHECK: rdvl x[[N:[0-9]+]], #-18
45; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x[[N]]]
46; CHECK-NEXT: ret
47  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -18, i64 0
48  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
49                                          <vscale x 16 x i8> %v1,
50                                          <vscale x 16 x i1> %pred,
51                                          i8* %base)
52  ret void
53}
54
55define void @st2b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
56; CHECK-LABEL: st2b_i8_invalid_imm_out_of_upper_bound:
57; CHECK: rdvl x[[N:[0-9]+]], #16
58; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x[[N]]]
59; CHECK-NEXT: ret
60  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 16, i64 0
61  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
62                                          <vscale x 16 x i8> %v1,
63                                          <vscale x 16 x i1> %pred,
64                                          i8* %base)
65  ret void
66}
67
68define void @st2b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
69; CHECK-LABEL: st2b_i8_valid_imm_lower_bound:
70; CHECK: st2b { z0.b, z1.b }, p0, [x0, #-16, mul vl]
71; CHECK-NEXT: ret
72  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -16, i64 0
73  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
74                                          <vscale x 16 x i8> %v1,
75                                          <vscale x 16 x i1> %pred,
76                                          i8* %base)
77  ret void
78}
79
80define void @st2b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
81; CHECK-LABEL: st2b_i8_valid_imm_upper_bound:
82; CHECK: st2b { z0.b, z1.b }, p0, [x0, #14, mul vl]
83; CHECK-NEXT: ret
84  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 14, i64 0
85  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
86                                          <vscale x 16 x i8> %v1,
87                                          <vscale x 16 x i1> %pred,
88                                          i8* %base)
89  ret void
90}
91
92;
93; ST2H
94;
95
96define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
97; CHECK-LABEL: st2h_i16:
98; CHECK: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
99; CHECK-NEXT: ret
100  %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 2, i64 0
101  call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
102                                          <vscale x 8 x i16> %v1,
103                                          <vscale x 8 x i1> %pred,
104                                          i16* %base)
105  ret void
106}
107
108define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
109; CHECK-LABEL: st2h_f16:
110; CHECK: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
111; CHECK-NEXT: ret
112  %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 2, i64 0
113  call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
114                                          <vscale x 8 x half> %v1,
115                                          <vscale x 8 x i1> %pred,
116                                          half* %base)
117  ret void
118}
119
120;
121; ST2W
122;
123
124define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
125; CHECK-LABEL: st2w_i32:
126; CHECK: st2w { z0.s, z1.s }, p0, [x0, #4, mul vl]
127; CHECK-NEXT: ret
128  %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 4, i64 0
129  call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
130                                          <vscale x 4 x i32> %v1,
131                                          <vscale x 4 x i1> %pred,
132                                          i32* %base)
133  ret void
134}
135
136define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
137; CHECK-LABEL: st2w_f32:
138; CHECK: st2w { z0.s, z1.s }, p0, [x0, #6, mul vl]
139; CHECK-NEXT: ret
140  %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 6, i64 0
141  call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
142                                          <vscale x 4 x float> %v1,
143                                          <vscale x 4 x i1> %pred,
144                                          float* %base)
145  ret void
146}
147
148;
149; ST2D
150;
151
152define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
153; CHECK-LABEL: st2d_i64:
154; CHECK: st2d { z0.d, z1.d }, p0, [x0, #8, mul vl]
155; CHECK-NEXT: ret
156  %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 8, i64 0
157  call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
158                                          <vscale x 2 x i64> %v1,
159                                          <vscale x 2 x i1> %pred,
160                                          i64* %base)
161  ret void
162}
163
164define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
165; CHECK-LABEL: st2d_f64:
166; CHECK: st2d { z0.d, z1.d }, p0, [x0, #10, mul vl]
167; CHECK-NEXT: ret
168  %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 10, i64 0
169  call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
170                                          <vscale x 2 x double> %v1,
171                                          <vscale x 2 x i1> %pred,
172                                          double* %base)
173  ret void
174}
175
176;
177; ST3B
178;
179
180define void @st3b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
181; CHECK-LABEL: st3b_i8_valid_imm:
182; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, #3, mul vl]
183; CHECK-NEXT: ret
184  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3, i64 0
185  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
186                                          <vscale x 16 x i8> %v1,
187                                          <vscale x 16 x i8> %v2,
188                                          <vscale x 16 x i1> %pred,
189                                          i8* %base)
190  ret void
191}
192
193define void @st3b_i8_invalid_imm_not_multiple_of_3_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
194; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_01:
195; CHECK: rdvl x[[N:[0-9]+]], #4
196; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
197; CHECK-NEXT: ret
198  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4, i64 0
199  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
200                                          <vscale x 16 x i8> %v1,
201                                          <vscale x 16 x i8> %v2,
202                                          <vscale x 16 x i1> %pred,
203                                          i8* %base)
204  ret void
205}
206
207define void @st3b_i8_invalid_imm_not_multiple_of_3_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
208; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_02:
209; CHECK: rdvl x[[N:[0-9]+]], #5
210; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
211; CHECK-NEXT: ret
212  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5, i64 0
213  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
214                                          <vscale x 16 x i8> %v1,
215                                          <vscale x 16 x i8> %v2,
216                                          <vscale x 16 x i1> %pred,
217                                          i8* %base)
218  ret void
219}
220
221define void @st3b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
222; CHECK-LABEL: st3b_i8_invalid_imm_out_of_lower_bound:
223; CHECK: rdvl x[[N:[0-9]+]], #-27
224; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
225; CHECK-NEXT: ret
226  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -27, i64 0
227  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
228                                          <vscale x 16 x i8> %v1,
229                                          <vscale x 16 x i8> %v2,
230                                          <vscale x 16 x i1> %pred,
231                                          i8* %base)
232  ret void
233}
234
235define void @st3b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
236; CHECK-LABEL: st3b_i8_invalid_imm_out_of_upper_bound:
237; CHECK: rdvl x[[N:[0-9]+]], #24
238; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
239; CHECK-NEXT: ret
240  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 24, i64 0
241  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
242                                          <vscale x 16 x i8> %v1,
243                                          <vscale x 16 x i8> %v2,
244                                          <vscale x 16 x i1> %pred,
245                                          i8* %base)
246  ret void
247}
248
249define void @st3b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
250; CHECK-LABEL: st3b_i8_valid_imm_lower_bound:
251; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, #-24, mul vl]
252; CHECK-NEXT: ret
253  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -24, i64 0
254  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
255                                          <vscale x 16 x i8> %v1,
256                                          <vscale x 16 x i8> %v2,
257                                          <vscale x 16 x i1> %pred,
258                                          i8* %base)
259  ret void
260}
261
262define void @st3b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
263; CHECK-LABEL: st3b_i8_valid_imm_upper_bound:
264; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, #21, mul vl]
265; CHECK-NEXT: ret
266  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 21, i64 0
267  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
268                                          <vscale x 16 x i8> %v1,
269                                          <vscale x 16 x i8> %v2,
270                                          <vscale x 16 x i1> %pred,
271                                          i8* %base)
272  ret void
273}
274
275;
276; ST3H
277;
278
279define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
280; CHECK-LABEL: st3h_i16:
281; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, #6, mul vl]
282; CHECK-NEXT: ret
283  %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 6, i64 0
284  call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
285                                          <vscale x 8 x i16> %v1,
286                                          <vscale x 8 x i16> %v2,
287                                          <vscale x 8 x i1> %pred,
288                                          i16* %base)
289  ret void
290}
291
292define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
293; CHECK-LABEL: st3h_f16:
294; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, #9, mul vl]
295; CHECK-NEXT: ret
296  %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 9, i64 0
297  call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
298                                          <vscale x 8 x half> %v1,
299                                          <vscale x 8 x half> %v2,
300                                          <vscale x 8 x i1> %pred,
301                                          half* %base)
302  ret void
303}
304
305;
306; ST3W
307;
308
309define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
310; CHECK-LABEL: st3w_i32:
311; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, #12, mul vl]
312; CHECK-NEXT: ret
313  %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 12, i64 0
314  call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
315                                          <vscale x 4 x i32> %v1,
316                                          <vscale x 4 x i32> %v2,
317                                          <vscale x 4 x i1> %pred,
318                                          i32* %base)
319  ret void
320}
321
322define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
323; CHECK-LABEL: st3w_f32:
324; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, #15, mul vl]
325; CHECK-NEXT: ret
326  %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 15, i64 0
327  call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
328                                          <vscale x 4 x float> %v1,
329                                          <vscale x 4 x float> %v2,
330                                          <vscale x 4 x i1> %pred,
331                                          float* %base)
332  ret void
333}
334
335;
336; ST3D
337;
338
339define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
340; CHECK-LABEL: st3d_i64:
341; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, #18, mul vl]
342; CHECK-NEXT: ret
343  %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 18, i64 0
344  call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
345                                          <vscale x 2 x i64> %v1,
346                                          <vscale x 2 x i64> %v2,
347                                          <vscale x 2 x i1> %pred,
348                                          i64* %base)
349  ret void
350}
351
352define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
353; CHECK-LABEL: st3d_f64:
354; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, #-3, mul vl]
355; CHECK-NEXT: ret
356  %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 -3, i64 0
357  call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
358                                          <vscale x 2 x double> %v1,
359                                          <vscale x 2 x double> %v2,
360                                          <vscale x 2 x i1> %pred,
361                                          double* %base)
362  ret void
363}
364
365;
366; ST4B
367;
368
369define void @st4b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
370; CHECK-LABEL: st4b_i8_valid_imm:
371; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #4, mul vl]
372; CHECK-NEXT: ret
373  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4, i64 0
374  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
375                                          <vscale x 16 x i8> %v1,
376                                          <vscale x 16 x i8> %v2,
377                                          <vscale x 16 x i8> %v3,
378                                          <vscale x 16 x i1> %pred,
379                                          i8* %base)
380  ret void
381}
382
383define void @st4b_i8_invalid_imm_not_multiple_of_4_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
384; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_01:
385; CHECK: rdvl x[[N:[0-9]+]], #5
386; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[N]]]
387; CHECK-NEXT: ret
388  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5, i64 0
389  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
390                                          <vscale x 16 x i8> %v1,
391                                          <vscale x 16 x i8> %v2,
392                                          <vscale x 16 x i8> %v3,
393                                          <vscale x 16 x i1> %pred,
394                                          i8* %base)
395  ret void
396}
397
398define void @st4b_i8_invalid_imm_not_multiple_of_4_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
399; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_02:
400; CHECK: rdvl x[[N:[0-9]+]], #6
401; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[N]]]
402; CHECK-NEXT: ret
403  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 6, i64 0
404  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
405                                          <vscale x 16 x i8> %v1,
406                                          <vscale x 16 x i8> %v2,
407                                          <vscale x 16 x i8> %v3,
408                                          <vscale x 16 x i1> %pred,
409                                          i8* %base)
410  ret void
411}
412
413define void @st4b_i8_invalid_imm_not_multiple_of_4_03(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
414; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_03:
415; CHECK: rdvl x[[N:[0-9]+]], #7
416; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[N]]]
417; CHECK-NEXT: ret
418  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 7, i64 0
419  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
420                                          <vscale x 16 x i8> %v1,
421                                          <vscale x 16 x i8> %v2,
422                                          <vscale x 16 x i8> %v3,
423                                          <vscale x 16 x i1> %pred,
424                                          i8* %base)
425  ret void
426}
427
428define void @st4b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
429; CHECK-LABEL: st4b_i8_invalid_imm_out_of_lower_bound:
430; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #4) #9)
431; xM = -9 * 2^6
432; xP = RDVL * 2^-4
433; xBASE = RDVL * 2^-4 * -9 * 2^6 = RDVL * -36
434; CHECK: rdvl x[[N:[0-9]+]], #1
435; CHECK-DAG:  mov  x[[M:[0-9]+]], #-576
436; CHECK-DAG:  lsr  x[[P:[0-9]+]], x[[N]], #4
437; CHECK-DAG:  mul  x[[OFFSET:[0-9]+]], x[[P]], x[[M]]
438; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[OFFSET]]]
439; CHECK-NEXT: ret
440  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -36, i64 0
441  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
442                                          <vscale x 16 x i8> %v1,
443                                          <vscale x 16 x i8> %v2,
444                                          <vscale x 16 x i8> %v3,
445                                          <vscale x 16 x i1> %pred,
446                                          i8* %base)
447  ret void
448}
449
450define void @st4b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
451; CHECK-LABEL: st4b_i8_invalid_imm_out_of_upper_bound:
452; FIXME: optimize OFFSET computation so that xOFFSET = (shl (RDVL #16) #1)
453; xM = 2^9
454; xP = RDVL * 2^-4
455; xOFFSET = RDVL * 2^-4 * 2^9 = RDVL * 32
456; CHECK: rdvl x[[N:[0-9]+]], #1
457; CHECK-DAG:  mov  w[[M:[0-9]+]], #512
458; CHECK-DAG:  lsr  x[[P:[0-9]+]], x[[N]], #4
459; CHECK-DAG:  mul  x[[OFFSET:[0-9]+]], x[[P]], x[[M]]
460; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[OFFSET]]]
461; CHECK-NEXT: ret
462  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 32, i64 0
463  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
464                                          <vscale x 16 x i8> %v1,
465                                          <vscale x 16 x i8> %v2,
466                                          <vscale x 16 x i8> %v3,
467                                          <vscale x 16 x i1> %pred,
468                                          i8* %base)
469  ret void
470}
471
472define void @st4b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
473; CHECK-LABEL: st4b_i8_valid_imm_lower_bound:
474; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #-32, mul vl]
475; CHECK-NEXT: ret
476  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -32, i64 0
477  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
478                                          <vscale x 16 x i8> %v1,
479                                          <vscale x 16 x i8> %v2,
480                                          <vscale x 16 x i8> %v3,
481                                          <vscale x 16 x i1> %pred,
482                                          i8* %base)
483  ret void
484}
485
486define void @st4b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
487; CHECK-LABEL: st4b_i8_valid_imm_upper_bound:
488; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #28, mul vl]
489; CHECK-NEXT: ret
490  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 28, i64 0
491  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
492                                          <vscale x 16 x i8> %v1,
493                                          <vscale x 16 x i8> %v2,
494                                          <vscale x 16 x i8> %v3,
495                                          <vscale x 16 x i1> %pred,
496                                          i8* %base)
497  ret void
498}
499
500;
501; ST4H
502;
503
504define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
505; CHECK-LABEL: st4h_i16:
506; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #8, mul vl]
507; CHECK-NEXT: ret
508  %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 8, i64 0
509  call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
510                                          <vscale x 8 x i16> %v1,
511                                          <vscale x 8 x i16> %v2,
512                                          <vscale x 8 x i16> %v3,
513                                          <vscale x 8 x i1> %pred,
514                                          i16* %base)
515  ret void
516}
517
518define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
519; CHECK-LABEL: st4h_f16:
520; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #12, mul vl]
521; CHECK-NEXT: ret
522  %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 12, i64 0
523  call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
524                                          <vscale x 8 x half> %v1,
525                                          <vscale x 8 x half> %v2,
526                                          <vscale x 8 x half> %v3,
527                                          <vscale x 8 x i1> %pred,
528                                          half* %base)
529  ret void
530}
531
532;
533; ST4W
534;
535
536define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
537; CHECK-LABEL: st4w_i32:
538; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #16, mul vl]
539; CHECK-NEXT: ret
540  %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 16, i64 0
541  call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
542                                          <vscale x 4 x i32> %v1,
543                                          <vscale x 4 x i32> %v2,
544                                          <vscale x 4 x i32> %v3,
545                                          <vscale x 4 x i1> %pred,
546                                          i32* %base)
547  ret void
548}
549
550define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
551; CHECK-LABEL: st4w_f32:
552; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #20, mul vl]
553; CHECK-NEXT: ret
554  %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 20, i64 0
555  call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
556                                          <vscale x 4 x float> %v1,
557                                          <vscale x 4 x float> %v2,
558                                          <vscale x 4 x float> %v3,
559                                          <vscale x 4 x i1> %pred,
560                                          float* %base)
561  ret void
562}
563
564;
565; ST4D
566;
567
568define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
569; CHECK-LABEL: st4d_i64:
570; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #24, mul vl]
571; CHECK-NEXT: ret
572  %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 24, i64 0
573  call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
574                                          <vscale x 2 x i64> %v1,
575                                          <vscale x 2 x i64> %v2,
576                                          <vscale x 2 x i64> %v3,
577                                          <vscale x 2 x i1> %pred,
578                                          i64* %base)
579  ret void
580}
581
582define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
583; CHECK-LABEL: st4d_f64:
584; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #28, mul vl]
585; CHECK-NEXT: ret
586  %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 28, i64 0
587  call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
588                                          <vscale x 2 x double> %v1,
589                                          <vscale x 2 x double> %v2,
590                                          <vscale x 2 x double> %v3,
591                                          <vscale x 2 x i1> %pred,
592                                          double* %base)
593  ret void
594}
595
596declare void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
597declare void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
598declare void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
599declare void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
600declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
601declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
602declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
603
604declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
605declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
606declare void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
607declare void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
608declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
609declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
610declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
611
612declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
613declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
614declare void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
615declare void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
616declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
617declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
618declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
619