• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs -mcpu=exynos-m1 | FileCheck --check-prefix=EXYNOS %s
3; The instruction latencies of Exynos-M1 trigger the transform we see under the Exynos check.
4
5define void @st1lane_16b(<16 x i8> %A, i8* %D) {
6; CHECK-LABEL: st1lane_16b
7; CHECK: st1.b { v0 }[1], [x{{[0-9]+}}]
8  %ptr = getelementptr i8, i8* %D, i64 1
9  %tmp = extractelement <16 x i8> %A, i32 1
10  store i8 %tmp, i8* %ptr
11  ret void
12}
13
14define void @st1lane0_16b(<16 x i8> %A, i8* %D) {
15; CHECK-LABEL: st1lane0_16b
16; CHECK: st1.b { v0 }[0], [x{{[0-9]+}}]
17  %ptr = getelementptr i8, i8* %D, i64 1
18  %tmp = extractelement <16 x i8> %A, i32 0
19  store i8 %tmp, i8* %ptr
20  ret void
21}
22
23define void @st1lane0u_16b(<16 x i8> %A, i8* %D) {
24; CHECK-LABEL: st1lane0u_16b
25; CHECK: st1.b { v0 }[0], [x{{[0-9]+}}]
26  %ptr = getelementptr i8, i8* %D, i64 -1
27  %tmp = extractelement <16 x i8> %A, i32 0
28  store i8 %tmp, i8* %ptr
29  ret void
30}
31
32define void @st1lane_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
33; CHECK-LABEL: st1lane_ro_16b
34; CHECK: add x[[XREG:[0-9]+]], x0, x1
35; CHECK: st1.b { v0 }[1], [x[[XREG]]]
36  %ptr = getelementptr i8, i8* %D, i64 %offset
37  %tmp = extractelement <16 x i8> %A, i32 1
38  store i8 %tmp, i8* %ptr
39  ret void
40}
41
42define void @st1lane0_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
43; CHECK-LABEL: st1lane0_ro_16b
44; CHECK: add x[[XREG:[0-9]+]], x0, x1
45; CHECK: st1.b { v0 }[0], [x[[XREG]]]
46  %ptr = getelementptr i8, i8* %D, i64 %offset
47  %tmp = extractelement <16 x i8> %A, i32 0
48  store i8 %tmp, i8* %ptr
49  ret void
50}
51
52define void @st1lane_8h(<8 x i16> %A, i16* %D) {
53; CHECK-LABEL: st1lane_8h
54; CHECK: st1.h { v0 }[1], [x{{[0-9]+}}]
55  %ptr = getelementptr i16, i16* %D, i64 1
56  %tmp = extractelement <8 x i16> %A, i32 1
57  store i16 %tmp, i16* %ptr
58  ret void
59}
60
61define void @st1lane0_8h(<8 x i16> %A, i16* %D) {
62; CHECK-LABEL: st1lane0_8h
63; CHECK: str h0, [x0, #2]
64  %ptr = getelementptr i16, i16* %D, i64 1
65  %tmp = extractelement <8 x i16> %A, i32 0
66  store i16 %tmp, i16* %ptr
67  ret void
68}
69
70define void @st1lane0u_8h(<8 x i16> %A, i16* %D) {
71; CHECK-LABEL: st1lane0u_8h
72; CHECK: stur h0, [x0, #-2]
73  %ptr = getelementptr i16, i16* %D, i64 -1
74  %tmp = extractelement <8 x i16> %A, i32 0
75  store i16 %tmp, i16* %ptr
76  ret void
77}
78
79define void @st1lane_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
80; CHECK-LABEL: st1lane_ro_8h
81; CHECK: add x[[XREG:[0-9]+]], x0, x1
82; CHECK: st1.h { v0 }[1], [x[[XREG]]]
83  %ptr = getelementptr i16, i16* %D, i64 %offset
84  %tmp = extractelement <8 x i16> %A, i32 1
85  store i16 %tmp, i16* %ptr
86  ret void
87}
88
89define void @st1lane0_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
90; CHECK-LABEL: st1lane0_ro_8h
91; CHECK: str h0, [x0, x1, lsl #1]
92  %ptr = getelementptr i16, i16* %D, i64 %offset
93  %tmp = extractelement <8 x i16> %A, i32 0
94  store i16 %tmp, i16* %ptr
95  ret void
96}
97
98define void @st1lane_4s(<4 x i32> %A, i32* %D) {
99; CHECK-LABEL: st1lane_4s
100; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}]
101  %ptr = getelementptr i32, i32* %D, i64 1
102  %tmp = extractelement <4 x i32> %A, i32 1
103  store i32 %tmp, i32* %ptr
104  ret void
105}
106
107define void @st1lane0_4s(<4 x i32> %A, i32* %D) {
108; CHECK-LABEL: st1lane0_4s
109; CHECK: str s0, [x0, #4]
110  %ptr = getelementptr i32, i32* %D, i64 1
111  %tmp = extractelement <4 x i32> %A, i32 0
112  store i32 %tmp, i32* %ptr
113  ret void
114}
115
116define void @st1lane0u_4s(<4 x i32> %A, i32* %D) {
117; CHECK-LABEL: st1lane0u_4s
118; CHECK: stur s0, [x0, #-4]
119  %ptr = getelementptr i32, i32* %D, i64 -1
120  %tmp = extractelement <4 x i32> %A, i32 0
121  store i32 %tmp, i32* %ptr
122  ret void
123}
124
125define void @st1lane_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
126; CHECK-LABEL: st1lane_ro_4s
127; CHECK: add x[[XREG:[0-9]+]], x0, x1
128; CHECK: st1.s { v0 }[1], [x[[XREG]]]
129  %ptr = getelementptr i32, i32* %D, i64 %offset
130  %tmp = extractelement <4 x i32> %A, i32 1
131  store i32 %tmp, i32* %ptr
132  ret void
133}
134
135define void @st1lane0_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
136; CHECK-LABEL: st1lane0_ro_4s
137; CHECK: str s0, [x0, x1, lsl #2]
138  %ptr = getelementptr i32, i32* %D, i64 %offset
139  %tmp = extractelement <4 x i32> %A, i32 0
140  store i32 %tmp, i32* %ptr
141  ret void
142}
143
144define void @st1lane_4s_float(<4 x float> %A, float* %D) {
145; CHECK-LABEL: st1lane_4s_float
146; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}]
147  %ptr = getelementptr float, float* %D, i64 1
148  %tmp = extractelement <4 x float> %A, i32 1
149  store float %tmp, float* %ptr
150  ret void
151}
152
153define void @st1lane0_4s_float(<4 x float> %A, float* %D) {
154; CHECK-LABEL: st1lane0_4s_float
155; CHECK: str s0, [x0, #4]
156  %ptr = getelementptr float, float* %D, i64 1
157  %tmp = extractelement <4 x float> %A, i32 0
158  store float %tmp, float* %ptr
159  ret void
160}
161
162define void @st1lane0u_4s_float(<4 x float> %A, float* %D) {
163; CHECK-LABEL: st1lane0u_4s_float
164; CHECK: stur s0, [x0, #-4]
165  %ptr = getelementptr float, float* %D, i64 -1
166  %tmp = extractelement <4 x float> %A, i32 0
167  store float %tmp, float* %ptr
168  ret void
169}
170
171define void @st1lane_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
172; CHECK-LABEL: st1lane_ro_4s_float
173; CHECK: add x[[XREG:[0-9]+]], x0, x1
174; CHECK: st1.s { v0 }[1], [x[[XREG]]]
175  %ptr = getelementptr float, float* %D, i64 %offset
176  %tmp = extractelement <4 x float> %A, i32 1
177  store float %tmp, float* %ptr
178  ret void
179}
180
181define void @st1lane0_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
182; CHECK-LABEL: st1lane0_ro_4s_float
183; CHECK: str s0, [x0, x1, lsl #2]
184  %ptr = getelementptr float, float* %D, i64 %offset
185  %tmp = extractelement <4 x float> %A, i32 0
186  store float %tmp, float* %ptr
187  ret void
188}
189
190define void @st1lane_2d(<2 x i64> %A, i64* %D) {
191; CHECK-LABEL: st1lane_2d
192; CHECK: st1.d { v0 }[1], [x{{[0-9]+}}]
193  %ptr = getelementptr i64, i64* %D, i64 1
194  %tmp = extractelement <2 x i64> %A, i32 1
195  store i64 %tmp, i64* %ptr
196  ret void
197}
198
199define void @st1lane0_2d(<2 x i64> %A, i64* %D) {
200; CHECK-LABEL: st1lane0_2d
201; CHECK: str d0, [x0, #8]
202  %ptr = getelementptr i64, i64* %D, i64 1
203  %tmp = extractelement <2 x i64> %A, i32 0
204  store i64 %tmp, i64* %ptr
205  ret void
206}
207
208define void @st1lane0u_2d(<2 x i64> %A, i64* %D) {
209; CHECK-LABEL: st1lane0u_2d
210; CHECK: stur d0, [x0, #-8]
211  %ptr = getelementptr i64, i64* %D, i64 -1
212  %tmp = extractelement <2 x i64> %A, i32 0
213  store i64 %tmp, i64* %ptr
214  ret void
215}
216
217define void @st1lane_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
218; CHECK-LABEL: st1lane_ro_2d
219; CHECK: add x[[XREG:[0-9]+]], x0, x1
220; CHECK: st1.d { v0 }[1], [x[[XREG]]]
221  %ptr = getelementptr i64, i64* %D, i64 %offset
222  %tmp = extractelement <2 x i64> %A, i32 1
223  store i64 %tmp, i64* %ptr
224  ret void
225}
226
227define void @st1lane0_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
228; CHECK-LABEL: st1lane0_ro_2d
229; CHECK: str d0, [x0, x1, lsl #3]
230  %ptr = getelementptr i64, i64* %D, i64 %offset
231  %tmp = extractelement <2 x i64> %A, i32 0
232  store i64 %tmp, i64* %ptr
233  ret void
234}
235
236define void @st1lane_2d_double(<2 x double> %A, double* %D) {
237; CHECK-LABEL: st1lane_2d_double
238; CHECK: st1.d { v0 }[1], [x{{[0-9]+}}]
239  %ptr = getelementptr double, double* %D, i64 1
240  %tmp = extractelement <2 x double> %A, i32 1
241  store double %tmp, double* %ptr
242  ret void
243}
244
245define void @st1lane0_2d_double(<2 x double> %A, double* %D) {
246; CHECK-LABEL: st1lane0_2d_double
247; CHECK: str d0, [x0, #8]
248  %ptr = getelementptr double, double* %D, i64 1
249  %tmp = extractelement <2 x double> %A, i32 0
250  store double %tmp, double* %ptr
251  ret void
252}
253
254define void @st1lane0u_2d_double(<2 x double> %A, double* %D) {
255; CHECK-LABEL: st1lane0u_2d_double
256; CHECK: stur d0, [x0, #-8]
257  %ptr = getelementptr double, double* %D, i64 -1
258  %tmp = extractelement <2 x double> %A, i32 0
259  store double %tmp, double* %ptr
260  ret void
261}
262
263define void @st1lane_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
264; CHECK-LABEL: st1lane_ro_2d_double
265; CHECK: add x[[XREG:[0-9]+]], x0, x1
266; CHECK: st1.d { v0 }[1], [x[[XREG]]]
267  %ptr = getelementptr double, double* %D, i64 %offset
268  %tmp = extractelement <2 x double> %A, i32 1
269  store double %tmp, double* %ptr
270  ret void
271}
272
273define void @st1lane0_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
274; CHECK-LABEL: st1lane0_ro_2d_double
275; CHECK: str d0, [x0, x1, lsl #3]
276  %ptr = getelementptr double, double* %D, i64 %offset
277  %tmp = extractelement <2 x double> %A, i32 0
278  store double %tmp, double* %ptr
279  ret void
280}
281
282define void @st1lane_8b(<8 x i8> %A, i8* %D) {
283; CHECK-LABEL: st1lane_8b
284; CHECK: st1.b { v0 }[1], [x{{[0-9]+}}]
285  %ptr = getelementptr i8, i8* %D, i64 1
286  %tmp = extractelement <8 x i8> %A, i32 1
287  store i8 %tmp, i8* %ptr
288  ret void
289}
290
291define void @st1lane_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
292; CHECK-LABEL: st1lane_ro_8b
293; CHECK: add x[[XREG:[0-9]+]], x0, x1
294; CHECK: st1.b { v0 }[1], [x[[XREG]]]
295  %ptr = getelementptr i8, i8* %D, i64 %offset
296  %tmp = extractelement <8 x i8> %A, i32 1
297  store i8 %tmp, i8* %ptr
298  ret void
299}
300
301define void @st1lane0_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
302; CHECK-LABEL: st1lane0_ro_8b
303; CHECK: add x[[XREG:[0-9]+]], x0, x1
304; CHECK: st1.b { v0 }[0], [x[[XREG]]]
305  %ptr = getelementptr i8, i8* %D, i64 %offset
306  %tmp = extractelement <8 x i8> %A, i32 0
307  store i8 %tmp, i8* %ptr
308  ret void
309}
310
311define void @st1lane_4h(<4 x i16> %A, i16* %D) {
312; CHECK-LABEL: st1lane_4h
313; CHECK: st1.h { v0 }[1], [x{{[0-9]+}}]
314  %ptr = getelementptr i16, i16* %D, i64 1
315  %tmp = extractelement <4 x i16> %A, i32 1
316  store i16 %tmp, i16* %ptr
317  ret void
318}
319
320define void @st1lane0_4h(<4 x i16> %A, i16* %D) {
321; CHECK-LABEL: st1lane0_4h
322; CHECK: str h0, [x0, #2]
323  %ptr = getelementptr i16, i16* %D, i64 1
324  %tmp = extractelement <4 x i16> %A, i32 0
325  store i16 %tmp, i16* %ptr
326  ret void
327}
328
329define void @st1lane0u_4h(<4 x i16> %A, i16* %D) {
330; CHECK-LABEL: st1lane0u_4h
331; CHECK: stur h0, [x0, #-2]
332  %ptr = getelementptr i16, i16* %D, i64 -1
333  %tmp = extractelement <4 x i16> %A, i32 0
334  store i16 %tmp, i16* %ptr
335  ret void
336}
337
338define void @st1lane_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
339; CHECK-LABEL: st1lane_ro_4h
340; CHECK: add x[[XREG:[0-9]+]], x0, x1
341; CHECK: st1.h { v0 }[1], [x[[XREG]]]
342  %ptr = getelementptr i16, i16* %D, i64 %offset
343  %tmp = extractelement <4 x i16> %A, i32 1
344  store i16 %tmp, i16* %ptr
345  ret void
346}
347
348define void @st1lane0_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
349; CHECK-LABEL: st1lane0_ro_4h
350; CHECK: str h0, [x0, x1, lsl #1]
351  %ptr = getelementptr i16, i16* %D, i64 %offset
352  %tmp = extractelement <4 x i16> %A, i32 0
353  store i16 %tmp, i16* %ptr
354  ret void
355}
356
357define void @st1lane_2s(<2 x i32> %A, i32* %D) {
358; CHECK-LABEL: st1lane_2s
359; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}]
360  %ptr = getelementptr i32, i32* %D, i64 1
361  %tmp = extractelement <2 x i32> %A, i32 1
362  store i32 %tmp, i32* %ptr
363  ret void
364}
365
366define void @st1lane0_2s(<2 x i32> %A, i32* %D) {
367; CHECK-LABEL: st1lane0_2s
368; CHECK: str s0, [x0, #4]
369  %ptr = getelementptr i32, i32* %D, i64 1
370  %tmp = extractelement <2 x i32> %A, i32 0
371  store i32 %tmp, i32* %ptr
372  ret void
373}
374
375define void @st1lane0u_2s(<2 x i32> %A, i32* %D) {
376; CHECK-LABEL: st1lane0u_2s
377; CHECK: stur s0, [x0, #-4]
378  %ptr = getelementptr i32, i32* %D, i64 -1
379  %tmp = extractelement <2 x i32> %A, i32 0
380  store i32 %tmp, i32* %ptr
381  ret void
382}
383
384define void @st1lane_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
385; CHECK-LABEL: st1lane_ro_2s
386; CHECK: add x[[XREG:[0-9]+]], x0, x1
387; CHECK: st1.s { v0 }[1], [x[[XREG]]]
388  %ptr = getelementptr i32, i32* %D, i64 %offset
389  %tmp = extractelement <2 x i32> %A, i32 1
390  store i32 %tmp, i32* %ptr
391  ret void
392}
393
394define void @st1lane0_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
395; CHECK-LABEL: st1lane0_ro_2s
396; CHECK: str s0, [x0, x1, lsl #2]
397  %ptr = getelementptr i32, i32* %D, i64 %offset
398  %tmp = extractelement <2 x i32> %A, i32 0
399  store i32 %tmp, i32* %ptr
400  ret void
401}
402
403define void @st1lane_2s_float(<2 x float> %A, float* %D) {
404; CHECK-LABEL: st1lane_2s_float
405; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}]
406  %ptr = getelementptr float, float* %D, i64 1
407  %tmp = extractelement <2 x float> %A, i32 1
408  store float %tmp, float* %ptr
409  ret void
410}
411
412define void @st1lane0_2s_float(<2 x float> %A, float* %D) {
413; CHECK-LABEL: st1lane0_2s_float
414; CHECK: str s0, [x0, #4]
415  %ptr = getelementptr float, float* %D, i64 1
416  %tmp = extractelement <2 x float> %A, i32 0
417  store float %tmp, float* %ptr
418  ret void
419}
420
421define void @st1lane0u_2s_float(<2 x float> %A, float* %D) {
422; CHECK-LABEL: st1lane0u_2s_float
423; CHECK: stur s0, [x0, #-4]
424  %ptr = getelementptr float, float* %D, i64 -1
425  %tmp = extractelement <2 x float> %A, i32 0
426  store float %tmp, float* %ptr
427  ret void
428}
429
430define void @st1lane_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
431; CHECK-LABEL: st1lane_ro_2s_float
432; CHECK: add x[[XREG:[0-9]+]], x0, x1
433; CHECK: st1.s { v0 }[1], [x[[XREG]]]
434  %ptr = getelementptr float, float* %D, i64 %offset
435  %tmp = extractelement <2 x float> %A, i32 1
436  store float %tmp, float* %ptr
437  ret void
438}
439
440define void @st1lane0_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
441; CHECK-LABEL: st1lane0_ro_2s_float
442; CHECK: str s0, [x0, x1, lsl #2]
443  %ptr = getelementptr float, float* %D, i64 %offset
444  %tmp = extractelement <2 x float> %A, i32 0
445  store float %tmp, float* %ptr
446  ret void
447}
448
449define void @st1lane0_1d(<1 x i64> %A, i64* %D) {
450; CHECK-LABEL: st1lane0_1d
451; CHECK: str d0, [x0, #8]
452  %ptr = getelementptr i64, i64* %D, i64 1
453  %tmp = extractelement <1 x i64> %A, i32 0
454  store i64 %tmp, i64* %ptr
455  ret void
456}
457
458define void @st1lane0u_1d(<1 x i64> %A, i64* %D) {
459; CHECK-LABEL: st1lane0u_1d
460; CHECK: stur d0, [x0, #-8]
461  %ptr = getelementptr i64, i64* %D, i64 -1
462  %tmp = extractelement <1 x i64> %A, i32 0
463  store i64 %tmp, i64* %ptr
464  ret void
465}
466
467define void @st1lane0_ro_1d(<1 x i64> %A, i64* %D, i64 %offset) {
468; CHECK-LABEL: st1lane0_ro_1d
469; CHECK: str d0, [x0, x1, lsl #3]
470  %ptr = getelementptr i64, i64* %D, i64 %offset
471  %tmp = extractelement <1 x i64> %A, i32 0
472  store i64 %tmp, i64* %ptr
473  ret void
474}
475
476define void @st1lane0_1d_double(<1 x double> %A, double* %D) {
477; CHECK-LABEL: st1lane0_1d_double
478; CHECK: str d0, [x0, #8]
479  %ptr = getelementptr double, double* %D, i64 1
480  %tmp = extractelement <1 x double> %A, i32 0
481  store double %tmp, double* %ptr
482  ret void
483}
484
485define void @st1lane0u_1d_double(<1 x double> %A, double* %D) {
486; CHECK-LABEL: st1lane0u_1d_double
487; CHECK: stur d0, [x0, #-8]
488  %ptr = getelementptr double, double* %D, i64 -1
489  %tmp = extractelement <1 x double> %A, i32 0
490  store double %tmp, double* %ptr
491  ret void
492}
493
494define void @st1lane0_ro_1d_double(<1 x double> %A, double* %D, i64 %offset) {
495; CHECK-LABEL: st1lane0_ro_1d_double
496; CHECK: str d0, [x0, x1, lsl #3]
497  %ptr = getelementptr double, double* %D, i64 %offset
498  %tmp = extractelement <1 x double> %A, i32 0
499  store double %tmp, double* %ptr
500  ret void
501}
502
503define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) {
504; CHECK-LABEL: st2lane_16b
505; CHECK: st2.b
506  call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i64 1, i8* %D)
507  ret void
508}
509
510define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, i16* %D) {
511; CHECK-LABEL: st2lane_8h
512; CHECK: st2.h
513  call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i64 1, i16* %D)
514  ret void
515}
516
517define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, i32* %D) {
518; CHECK-LABEL: st2lane_4s
519; CHECK: st2.s
520  call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i64 1, i32* %D)
521  ret void
522}
523
524define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, i64* %D) {
525; CHECK-LABEL: st2lane_2d
526; CHECK: st2.d
527  call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64 1, i64* %D)
528  ret void
529}
530
531declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
532declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
533declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
534declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
535
536define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %D) {
537; CHECK-LABEL: st3lane_16b
538; CHECK: st3.b
539  call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, i8* %D)
540  ret void
541}
542
543define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %D) {
544; CHECK-LABEL: st3lane_8h
545; CHECK: st3.h
546  call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, i16* %D)
547  ret void
548}
549
550define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %D) {
551; CHECK-LABEL: st3lane_4s
552; CHECK: st3.s
553  call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, i32* %D)
554  ret void
555}
556
557define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %D) {
558; CHECK-LABEL: st3lane_2d
559; CHECK: st3.d
560  call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, i64* %D)
561  ret void
562}
563
564declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
565declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
566declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
567declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
568
569define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %E) {
570; CHECK-LABEL: st4lane_16b
571; CHECK: st4.b
572  call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, i8* %E)
573  ret void
574}
575
576define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %E) {
577; CHECK-LABEL: st4lane_8h
578; CHECK: st4.h
579  call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, i16* %E)
580  ret void
581}
582
583define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %E) {
584; CHECK-LABEL: st4lane_4s
585; CHECK: st4.s
586  call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, i32* %E)
587  ret void
588}
589
590define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %E) {
591; CHECK-LABEL: st4lane_2d
592; CHECK: st4.d
593  call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, i64* %E)
594  ret void
595}
596
597declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
598declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
599declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
600declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
601
602
603define void @st2_8b(<8 x i8> %A, <8 x i8> %B, i8* %P) nounwind {
604; CHECK-LABEL: st2_8b
605; CHECK: st2.8b
606; EXYNOS-LABEL: st2_8b
607; EXYNOS: zip1.8b
608; EXYNOS: zip2.8b
609; EXYNOS: stp
610	call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %P)
611	ret void
612}
613
614define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) nounwind {
615; CHECK-LABEL: st3_8b
616; CHECK: st3.8b
617	call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P)
618	ret void
619}
620
621define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) nounwind {
622; CHECK-LABEL: st4_8b
623; CHECK: st4.8b
624; EXYNOS-LABEL: st4_8b
625; EXYNOS: zip1.8b
626; EXYNOS: zip2.8b
627; EXYNOS: zip1.8b
628; EXYNOS: zip2.8b
629; EXYNOS: zip1.8b
630; EXYNOS: zip2.8b
631; EXYNOS: stp
632; EXYNOS: zip1.8b
633; EXYNOS: zip2.8b
634; EXYNOS: stp
635	call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P)
636	ret void
637}
638
639declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly
640declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
641declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
642
643define void @st2_16b(<16 x i8> %A, <16 x i8> %B, i8* %P) nounwind {
644; CHECK-LABEL: st2_16b
645; CHECK: st2.16b
646; EXYNOS-LABEL: st2_16b
647; EXYNOS: zip1.16b
648; EXYNOS: zip2.16b
649; EXYNOS: stp
650	call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %P)
651	ret void
652}
653
654define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) nounwind {
655; CHECK-LABEL: st3_16b
656; CHECK: st3.16b
657	call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P)
658	ret void
659}
660
661define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) nounwind {
662; CHECK-LABEL: st4_16b
663; CHECK: st4.16b
664; EXYNOS-LABEL: st4_16b
665; EXYNOS: zip1.16b
666; EXYNOS: zip2.16b
667; EXYNOS: zip1.16b
668; EXYNOS: zip2.16b
669; EXYNOS: zip1.16b
670; EXYNOS: zip2.16b
671; EXYNOS: stp
672; EXYNOS: zip1.16b
673; EXYNOS: zip2.16b
674; EXYNOS: stp
675	call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P)
676	ret void
677}
678
679declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly
680declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
681declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
682
683define void @st2_4h(<4 x i16> %A, <4 x i16> %B, i16* %P) nounwind {
684; CHECK-LABEL: st2_4h
685; CHECK: st2.4h
686; EXYNOS-LABEL: st2_4h
687; EXYNOS: zip1.4h
688; EXYNOS: zip2.4h
689; EXYNOS: stp
690	call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %P)
691	ret void
692}
693
694define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) nounwind {
695; CHECK-LABEL: st3_4h
696; CHECK: st3.4h
697	call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P)
698	ret void
699}
700
701define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) nounwind {
702; CHECK-LABEL: st4_4h
703; CHECK: st4.4h
704; EXYNOS-LABEL: st4_4h
705; EXYNOS: zip1.4h
706; EXYNOS: zip2.4h
707; EXYNOS: zip1.4h
708; EXYNOS: zip2.4h
709; EXYNOS: zip1.4h
710; EXYNOS: zip2.4h
711; EXYNOS: stp
712; EXYNOS: zip1.4h
713; EXYNOS: zip2.4h
714; EXYNOS: stp
715	call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P)
716	ret void
717}
718
719declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly
720declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
721declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
722
723define void @st2_8h(<8 x i16> %A, <8 x i16> %B, i16* %P) nounwind {
724; CHECK-LABEL: st2_8h
725; CHECK: st2.8h
726; EXYNOS-LABEL: st2_8h
727; EXYNOS: zip1.8h
728; EXYNOS: zip2.8h
729; EXYNOS: stp
730	call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %P)
731	ret void
732}
733
734define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) nounwind {
735; CHECK-LABEL: st3_8h
736; CHECK: st3.8h
737	call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P)
738	ret void
739}
740
741define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) nounwind {
742; CHECK-LABEL: st4_8h
743; CHECK: st4.8h
744; EXYNOS-LABEL: st4_8h
745; EXYNOS: zip1.8h
746; EXYNOS: zip2.8h
747; EXYNOS: zip1.8h
748; EXYNOS: zip2.8h
749; EXYNOS: zip1.8h
750; EXYNOS: zip2.8h
751; EXYNOS: stp
752; EXYNOS: zip1.8h
753; EXYNOS: zip2.8h
754; EXYNOS: stp
755	call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P)
756	ret void
757}
758
759declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly
760declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
761declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
762
763define void @st2_2s(<2 x i32> %A, <2 x i32> %B, i32* %P) nounwind {
764; CHECK-LABEL: st2_2s
765; CHECK: st2.2s
766; EXYNOS-LABEL: st2_2s
767; EXYNOS: zip1.2s
768; EXYNOS: zip2.2s
769; EXYNOS: stp
770	call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %P)
771	ret void
772}
773
774define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) nounwind {
775; CHECK-LABEL: st3_2s
776; CHECK: st3.2s
777	call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P)
778	ret void
779}
780
781define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) nounwind {
782; CHECK-LABEL: st4_2s
783; CHECK: st4.2s
784; EXYNOS-LABEL: st4_2s
785; EXYNOS: zip1.2s
786; EXYNOS: zip2.2s
787; EXYNOS: zip1.2s
788; EXYNOS: zip2.2s
789; EXYNOS: zip1.2s
790; EXYNOS: zip2.2s
791; EXYNOS: stp
792; EXYNOS: zip1.2s
793; EXYNOS: zip2.2s
794; EXYNOS: stp
795	call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P)
796	ret void
797}
798
799declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly
800declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
801declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
802
803define void @st2_4s(<4 x i32> %A, <4 x i32> %B, i32* %P) nounwind {
804; CHECK-LABEL: st2_4s
805; CHECK: st2.4s
806; EXYNOS-LABEL: st2_4s
807; EXYNOS: zip1.4s
808; EXYNOS: zip2.4s
809; EXYNOS: stp
810	call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %P)
811	ret void
812}
813
814define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) nounwind {
815; CHECK-LABEL: st3_4s
816; CHECK: st3.4s
817	call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P)
818	ret void
819}
820
821define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) nounwind {
822; CHECK-LABEL: st4_4s
823; CHECK: st4.4s
824; EXYNOS-LABEL: st4_4s
825; EXYNOS: zip1.4s
826; EXYNOS: zip2.4s
827; EXYNOS: zip1.4s
828; EXYNOS: zip2.4s
829; EXYNOS: zip1.4s
830; EXYNOS: zip2.4s
831; EXYNOS: stp
832; EXYNOS: zip1.4s
833; EXYNOS: zip2.4s
834; EXYNOS: stp
835	call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P)
836	ret void
837}
838
839declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly
840declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
841declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
842
843; If there's only one element, st2/3/4 don't make much sense, stick to st1.
844define void @st2_1d(<1 x i64> %A, <1 x i64> %B, i64* %P) nounwind {
845; CHECK-LABEL: st2_1d
846; CHECK: st1.1d
847	call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %P)
848	ret void
849}
850
851define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) nounwind {
852; CHECK-LABEL: st3_1d
853; CHECK: st1.1d
854	call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P)
855	ret void
856}
857
858define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) nounwind {
859; CHECK-LABEL: st4_1d
860; CHECK: st1.1d
861	call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P)
862	ret void
863}
864
865declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly
866declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
867declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
868
869define void @st2_2d(<2 x i64> %A, <2 x i64> %B, i64* %P) nounwind {
870; CHECK-LABEL: st2_2d
871; CHECK: st2.2d
872; EXYNOS-LABEL: st2_2d
873; EXYNOS: zip1.2d
874; EXYNOS: zip2.2d
875; EXYNOS: stp
876	call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %P)
877	ret void
878}
879
880define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) nounwind {
881; CHECK-LABEL: st3_2d
882; CHECK: st3.2d
883	call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P)
884	ret void
885}
886
887define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) nounwind {
888; CHECK-LABEL: st4_2d
889; CHECK: st4.2d
890; EXYNOS-LABEL: st4_2d
891; EXYNOS: zip1.2d
892; EXYNOS: zip2.2d
893; EXYNOS: zip1.2d
894; EXYNOS: zip2.2d
895; EXYNOS: zip1.2d
896; EXYNOS: zip2.2d
897; EXYNOS: stp
898; EXYNOS: zip1.2d
899; EXYNOS: zip2.2d
900; EXYNOS: stp
901	call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P)
902	ret void
903}
904
905declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly
906declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
907declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
908
909declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly
910declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly
911declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly
912declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*) nounwind readonly
913declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly
914declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*) nounwind readonly
915
916define void @st1_x2_v8i8(<8 x i8> %A, <8 x i8> %B, i8* %addr) {
917; CHECK-LABEL: st1_x2_v8i8:
918; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
919  call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %addr)
920  ret void
921}
922
923define void @st1_x2_v4i16(<4 x i16> %A, <4 x i16> %B, i16* %addr) {
924; CHECK-LABEL: st1_x2_v4i16:
925; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
926  call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %addr)
927  ret void
928}
929
930define void @st1_x2_v2i32(<2 x i32> %A, <2 x i32> %B, i32* %addr) {
931; CHECK-LABEL: st1_x2_v2i32:
932; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
933  call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %addr)
934  ret void
935}
936
937define void @st1_x2_v2f32(<2 x float> %A, <2 x float> %B, float* %addr) {
938; CHECK-LABEL: st1_x2_v2f32:
939; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
940  call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %A, <2 x float> %B, float* %addr)
941  ret void
942}
943
944define void @st1_x2_v1i64(<1 x i64> %A, <1 x i64> %B, i64* %addr) {
945; CHECK-LABEL: st1_x2_v1i64:
946; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
947  call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %addr)
948  ret void
949}
950
951define void @st1_x2_v1f64(<1 x double> %A, <1 x double> %B, double* %addr) {
952; CHECK-LABEL: st1_x2_v1f64:
953; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
954  call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %A, <1 x double> %B, double* %addr)
955  ret void
956}
957
958declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly
959declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly
960declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly
961declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*) nounwind readonly
962declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly
963declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*) nounwind readonly
964
965define void @st1_x2_v16i8(<16 x i8> %A, <16 x i8> %B, i8* %addr) {
966; CHECK-LABEL: st1_x2_v16i8:
967; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
968  call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %addr)
969  ret void
970}
971
972define void @st1_x2_v8i16(<8 x i16> %A, <8 x i16> %B, i16* %addr) {
973; CHECK-LABEL: st1_x2_v8i16:
974; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
975  call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %addr)
976  ret void
977}
978
979define void @st1_x2_v4i32(<4 x i32> %A, <4 x i32> %B, i32* %addr) {
980; CHECK-LABEL: st1_x2_v4i32:
981; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
982  call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %addr)
983  ret void
984}
985
986define void @st1_x2_v4f32(<4 x float> %A, <4 x float> %B, float* %addr) {
987; CHECK-LABEL: st1_x2_v4f32:
988; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
989  call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %A, <4 x float> %B, float* %addr)
990  ret void
991}
992
993define void @st1_x2_v2i64(<2 x i64> %A, <2 x i64> %B, i64* %addr) {
994; CHECK-LABEL: st1_x2_v2i64:
995; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
996  call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %addr)
997  ret void
998}
999
1000define void @st1_x2_v2f64(<2 x double> %A, <2 x double> %B, double* %addr) {
1001; CHECK-LABEL: st1_x2_v2f64:
1002; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1003  call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %A, <2 x double> %B, double* %addr)
1004  ret void
1005}
1006
1007declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
1008declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
1009declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
1010declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly
1011declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
1012declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly
1013
1014define void @st1_x3_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr) {
1015; CHECK-LABEL: st1_x3_v8i8:
1016; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1017  call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr)
1018  ret void
1019}
1020
1021define void @st1_x3_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr) {
1022; CHECK-LABEL: st1_x3_v4i16:
1023; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1024  call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr)
1025  ret void
1026}
1027
1028define void @st1_x3_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr) {
1029; CHECK-LABEL: st1_x3_v2i32:
1030; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1031  call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr)
1032  ret void
1033}
1034
1035define void @st1_x3_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr) {
1036; CHECK-LABEL: st1_x3_v2f32:
1037; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1038  call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr)
1039  ret void
1040}
1041
1042define void @st1_x3_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr) {
1043; CHECK-LABEL: st1_x3_v1i64:
1044; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1045  call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr)
1046  ret void
1047}
1048
1049define void @st1_x3_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr) {
1050; CHECK-LABEL: st1_x3_v1f64:
1051; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1052  call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr)
1053  ret void
1054}
1055
1056declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
1057declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
1058declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
1059declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly
1060declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
1061declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly
1062
1063define void @st1_x3_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr) {
1064; CHECK-LABEL: st1_x3_v16i8:
1065; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1066  call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr)
1067  ret void
1068}
1069
1070define void @st1_x3_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr) {
1071; CHECK-LABEL: st1_x3_v8i16:
1072; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1073  call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr)
1074  ret void
1075}
1076
1077define void @st1_x3_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr) {
1078; CHECK-LABEL: st1_x3_v4i32:
1079; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1080  call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr)
1081  ret void
1082}
1083
1084define void @st1_x3_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr) {
1085; CHECK-LABEL: st1_x3_v4f32:
1086; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1087  call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr)
1088  ret void
1089}
1090
1091define void @st1_x3_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr) {
1092; CHECK-LABEL: st1_x3_v2i64:
1093; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1094  call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr)
1095  ret void
1096}
1097
1098define void @st1_x3_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr) {
1099; CHECK-LABEL: st1_x3_v2f64:
1100; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1101  call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr)
1102  ret void
1103}
1104
1105
1106declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
1107declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
1108declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
1109declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly
1110declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
1111declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly
1112
1113define void @st1_x4_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr) {
1114; CHECK-LABEL: st1_x4_v8i8:
1115; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1116  call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr)
1117  ret void
1118}
1119
1120define void @st1_x4_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr) {
1121; CHECK-LABEL: st1_x4_v4i16:
1122; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1123  call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr)
1124  ret void
1125}
1126
1127define void @st1_x4_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr) {
1128; CHECK-LABEL: st1_x4_v2i32:
1129; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1130  call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr)
1131  ret void
1132}
1133
1134define void @st1_x4_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr) {
1135; CHECK-LABEL: st1_x4_v2f32:
1136; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1137  call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr)
1138  ret void
1139}
1140
1141define void @st1_x4_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr) {
1142; CHECK-LABEL: st1_x4_v1i64:
1143; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1144  call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr)
1145  ret void
1146}
1147
1148define void @st1_x4_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr) {
1149; CHECK-LABEL: st1_x4_v1f64:
1150; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1151  call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr)
1152  ret void
1153}
1154
1155declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
1156declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
1157declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
1158declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly
1159declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
1160declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly
1161
1162define void @st1_x4_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr) {
1163; CHECK-LABEL: st1_x4_v16i8:
1164; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1165  call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr)
1166  ret void
1167}
1168
1169define void @st1_x4_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr) {
1170; CHECK-LABEL: st1_x4_v8i16:
1171; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1172  call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr)
1173  ret void
1174}
1175
1176define void @st1_x4_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr) {
1177; CHECK-LABEL: st1_x4_v4i32:
1178; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1179  call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr)
1180  ret void
1181}
1182
1183define void @st1_x4_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr) {
1184; CHECK-LABEL: st1_x4_v4f32:
1185; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1186  call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr)
1187  ret void
1188}
1189
1190define void @st1_x4_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr) {
1191; CHECK-LABEL: st1_x4_v2i64:
1192; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1193  call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr)
1194  ret void
1195}
1196
1197define void @st1_x4_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr) {
1198; CHECK-LABEL: st1_x4_v2f64:
1199; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1200  call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr)
1201  ret void
1202}
1203