• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s
2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
3
4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
5; WARN-NOT: warning
6
7;
8; ST2B
9;
10
11define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, i8* %addr, i64 %offset) {
12; CHECK-LABEL: st2b_i8:
13; CHECK: st2b { z0.b, z1.b }, p0, [x0, x1]
14; CHECK-NEXT: ret
15  %1 = getelementptr i8, i8* %addr, i64 %offset
16  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
17                                          <vscale x 16 x i8> %v1,
18                                          <vscale x 16 x i1> %pred,
19                                          i8* %1)
20  ret void
21}
22
23;
24; ST2H
25;
26
27define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, i16* %addr, i64 %offset) {
28; CHECK-LABEL: st2h_i16:
29; CHECK: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
30; CHECK-NEXT: ret
31  %1 = getelementptr i16, i16* %addr, i64 %offset
32  call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
33                                          <vscale x 8 x i16> %v1,
34                                          <vscale x 8 x i1> %pred,
35                                          i16* %1)
36  ret void
37}
38
39define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, half* %addr, i64 %offset) {
40; CHECK-LABEL: st2h_f16:
41; CHECK: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
42; CHECK-NEXT: ret
43  %1 = getelementptr half, half* %addr, i64 %offset
44  call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
45                                          <vscale x 8 x half> %v1,
46                                          <vscale x 8 x i1> %pred,
47                                          half* %1)
48  ret void
49}
50
51;
52; ST2W
53;
54
55define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, i32* %addr, i64 %offset) {
56; CHECK-LABEL: st2w_i32:
57; CHECK: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
58; CHECK-NEXT: ret
59  %1 = getelementptr i32, i32* %addr, i64 %offset
60  call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
61                                          <vscale x 4 x i32> %v1,
62                                          <vscale x 4 x i1> %pred,
63                                          i32* %1)
64  ret void
65}
66
67define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, float* %addr, i64 %offset) {
68; CHECK-LABEL: st2w_f32:
69; CHECK: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
70; CHECK-NEXT: ret
71  %1 = getelementptr float, float* %addr, i64 %offset
72  call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
73                                          <vscale x 4 x float> %v1,
74                                          <vscale x 4 x i1> %pred,
75                                          float* %1)
76  ret void
77}
78
79;
80; ST2D
81;
82
83define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, i64* %addr, i64 %offset) {
84; CHECK-LABEL: st2d_i64:
85; CHECK: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
86; CHECK-NEXT: ret
87  %1 = getelementptr i64, i64* %addr, i64 %offset
88  call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
89                                          <vscale x 2 x i64> %v1,
90                                          <vscale x 2 x i1> %pred,
91                                          i64* %1)
92  ret void
93}
94
95define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, double* %addr, i64 %offset) {
96; CHECK-LABEL: st2d_f64:
97; CHECK: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
98; CHECK-NEXT: ret
99  %1 = getelementptr double, double* %addr, i64 %offset
100  call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
101                                          <vscale x 2 x double> %v1,
102                                          <vscale x 2 x i1> %pred,
103                                          double* %1)
104  ret void
105}
106
107;
108; ST3B
109;
110
111define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, i8* %addr, i64 %offset) {
112; CHECK-LABEL: st3b_i8:
113; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, x1]
114; CHECK-NEXT: ret
115  %1 = getelementptr i8, i8* %addr, i64 %offset
116  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
117                                          <vscale x 16 x i8> %v1,
118                                          <vscale x 16 x i8> %v2,
119                                          <vscale x 16 x i1> %pred,
120                                          i8* %1)
121  ret void
122}
123
124;
125; ST3H
126;
127
128define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, i16* %addr, i64 %offset) {
129; CHECK-LABEL: st3h_i16:
130; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1]
131; CHECK-NEXT: ret
132  %1 = getelementptr i16, i16* %addr, i64 %offset
133  call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
134                                          <vscale x 8 x i16> %v1,
135                                          <vscale x 8 x i16> %v2,
136                                          <vscale x 8 x i1> %pred,
137                                          i16* %1)
138  ret void
139}
140
141define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, half* %addr, i64 %offset) {
142; CHECK-LABEL: st3h_f16:
143; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1]
144; CHECK-NEXT: ret
145  %1 = getelementptr half, half* %addr, i64 %offset
146  call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
147                                          <vscale x 8 x half> %v1,
148                                          <vscale x 8 x half> %v2,
149                                          <vscale x 8 x i1> %pred,
150                                          half* %1)
151  ret void
152}
153
154;
155; ST3W
156;
157
158define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, i32* %addr, i64 %offset) {
159; CHECK-LABEL: st3w_i32:
160; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2]
161; CHECK-NEXT: ret
162  %1 = getelementptr i32, i32* %addr, i64 %offset
163  call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
164                                          <vscale x 4 x i32> %v1,
165                                          <vscale x 4 x i32> %v2,
166                                          <vscale x 4 x i1> %pred,
167                                          i32* %1)
168  ret void
169}
170
171define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, float* %addr, i64 %offset) {
172; CHECK-LABEL: st3w_f32:
173; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2]
174; CHECK-NEXT: ret
175  %1 = getelementptr float, float* %addr, i64 %offset
176  call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
177                                          <vscale x 4 x float> %v1,
178                                          <vscale x 4 x float> %v2,
179                                          <vscale x 4 x i1> %pred,
180                                          float* %1)
181  ret void
182}
183
184;
185; ST3D
186;
187
188define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, i64* %addr, i64 %offset) {
189; CHECK-LABEL: st3d_i64:
190; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3]
191; CHECK-NEXT: ret
192  %1 = getelementptr i64, i64* %addr, i64 %offset
193  call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
194                                          <vscale x 2 x i64> %v1,
195                                          <vscale x 2 x i64> %v2,
196                                          <vscale x 2 x i1> %pred,
197                                          i64* %1)
198  ret void
199}
200
201define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, double* %addr, i64 %offset) {
202; CHECK-LABEL: st3d_f64:
203; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3]
204; CHECK-NEXT: ret
205  %1 = getelementptr double, double* %addr, i64 %offset
206  call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
207                                          <vscale x 2 x double> %v1,
208                                          <vscale x 2 x double> %v2,
209                                          <vscale x 2 x i1> %pred,
210                                          double* %1)
211  ret void
212}
213
214;
215; ST4B
216;
217
218define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, i8* %addr, i64 %offset) {
219; CHECK-LABEL: st4b_i8:
220; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x1]
221; CHECK-NEXT: ret
222  %1 = getelementptr i8, i8* %addr, i64 %offset
223  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
224                                          <vscale x 16 x i8> %v1,
225                                          <vscale x 16 x i8> %v2,
226                                          <vscale x 16 x i8> %v3,
227                                          <vscale x 16 x i1> %pred,
228                                          i8* %1)
229  ret void
230}
231
232;
233; ST4H
234;
235
236define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, i16* %addr, i64 %offset) {
237; CHECK-LABEL: st4h_i16:
238; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1]
239; CHECK-NEXT: ret
240  %1 = getelementptr i16, i16* %addr, i64 %offset
241  call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
242                                          <vscale x 8 x i16> %v1,
243                                          <vscale x 8 x i16> %v2,
244                                          <vscale x 8 x i16> %v3,
245                                          <vscale x 8 x i1> %pred,
246                                          i16* %1)
247  ret void
248}
249
250define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, half* %addr, i64 %offset) {
251; CHECK-LABEL: st4h_f16:
252; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1]
253; CHECK-NEXT: ret
254  %1 = getelementptr half, half* %addr, i64 %offset
255  call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
256                                          <vscale x 8 x half> %v1,
257                                          <vscale x 8 x half> %v2,
258                                          <vscale x 8 x half> %v3,
259                                          <vscale x 8 x i1> %pred,
260                                          half* %1)
261  ret void
262}
263
264;
265; ST4W
266;
267
268define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, i32* %addr, i64 %offset) {
269; CHECK-LABEL: st4w_i32:
270; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2]
271; CHECK-NEXT: ret
272  %1 = getelementptr i32, i32* %addr, i64 %offset
273  call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
274                                          <vscale x 4 x i32> %v1,
275                                          <vscale x 4 x i32> %v2,
276                                          <vscale x 4 x i32> %v3,
277                                          <vscale x 4 x i1> %pred,
278                                          i32* %1)
279  ret void
280}
281
282define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, float* %addr, i64 %offset) {
283; CHECK-LABEL: st4w_f32:
284; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2]
285; CHECK-NEXT: ret
286  %1 = getelementptr float, float* %addr, i64 %offset
287  call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
288                                          <vscale x 4 x float> %v1,
289                                          <vscale x 4 x float> %v2,
290                                          <vscale x 4 x float> %v3,
291                                          <vscale x 4 x i1> %pred,
292                                          float* %1)
293  ret void
294}
295
296;
297; ST4D
298;
299
300define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, i64* %addr, i64 %offset) {
301; CHECK-LABEL: st4d_i64:
302; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3]
303; CHECK-NEXT: ret
304  %1 = getelementptr i64, i64* %addr, i64 %offset
305  call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
306                                          <vscale x 2 x i64> %v1,
307                                          <vscale x 2 x i64> %v2,
308                                          <vscale x 2 x i64> %v3,
309                                          <vscale x 2 x i1> %pred,
310                                          i64* %1)
311  ret void
312}
313
314define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, double* %addr, i64 %offset) {
315; CHECK-LABEL: st4d_f64:
316; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3]
317; CHECK-NEXT: ret
318  %1 = getelementptr double, double* %addr, i64 %offset
319  call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
320                                          <vscale x 2 x double> %v1,
321                                          <vscale x 2 x double> %v2,
322                                          <vscale x 2 x double> %v3,
323                                          <vscale x 2 x i1> %pred,
324                                          double* %1)
325  ret void
326}
327
328declare void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
329declare void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
330declare void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
331declare void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
332declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
333declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
334declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
335
336declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
337declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
338declare void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
339declare void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
340declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
341declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
342declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
343
344declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
345declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
346declare void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
347declare void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
348declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
349declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
350declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
351