• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -mtriple=arm64-apple-ios7.0 -disable-post-ra -o - %s | FileCheck %s
2
3@ptr = global i8* null
4
5define <8 x i8> @test_v8i8_pre_load(<8 x i8>* %addr) {
6; CHECK-LABEL: test_v8i8_pre_load:
7; CHECK: ldr d0, [x0, #40]!
8  %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
9  %val = load <8 x i8>, <8 x i8>* %newaddr, align 8
10  store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
11  ret <8 x i8> %val
12}
13
14define <8 x i8> @test_v8i8_post_load(<8 x i8>* %addr) {
15; CHECK-LABEL: test_v8i8_post_load:
16; CHECK: ldr d0, [x0], #40
17  %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
18  %val = load <8 x i8>, <8 x i8>* %addr, align 8
19  store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
20  ret <8 x i8> %val
21}
22
23define void @test_v8i8_pre_store(<8 x i8> %in, <8 x i8>* %addr) {
24; CHECK-LABEL: test_v8i8_pre_store:
25; CHECK: str d0, [x0, #40]!
26  %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
27  store <8 x i8> %in, <8 x i8>* %newaddr, align 8
28  store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
29  ret void
30}
31
32define void @test_v8i8_post_store(<8 x i8> %in, <8 x i8>* %addr) {
33; CHECK-LABEL: test_v8i8_post_store:
34; CHECK: str d0, [x0], #40
35  %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
36  store <8 x i8> %in, <8 x i8>* %addr, align 8
37  store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
38  ret void
39}
40
41define <4 x i16> @test_v4i16_pre_load(<4 x i16>* %addr) {
42; CHECK-LABEL: test_v4i16_pre_load:
43; CHECK: ldr d0, [x0, #40]!
44  %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
45  %val = load <4 x i16>, <4 x i16>* %newaddr, align 8
46  store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
47  ret <4 x i16> %val
48}
49
50define <4 x i16> @test_v4i16_post_load(<4 x i16>* %addr) {
51; CHECK-LABEL: test_v4i16_post_load:
52; CHECK: ldr d0, [x0], #40
53  %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
54  %val = load <4 x i16>, <4 x i16>* %addr, align 8
55  store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
56  ret <4 x i16> %val
57}
58
59define void @test_v4i16_pre_store(<4 x i16> %in, <4 x i16>* %addr) {
60; CHECK-LABEL: test_v4i16_pre_store:
61; CHECK: str d0, [x0, #40]!
62  %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
63  store <4 x i16> %in, <4 x i16>* %newaddr, align 8
64  store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
65  ret void
66}
67
68define void @test_v4i16_post_store(<4 x i16> %in, <4 x i16>* %addr) {
69; CHECK-LABEL: test_v4i16_post_store:
70; CHECK: str d0, [x0], #40
71  %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
72  store <4 x i16> %in, <4 x i16>* %addr, align 8
73  store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
74  ret void
75}
76
77define <2 x i32> @test_v2i32_pre_load(<2 x i32>* %addr) {
78; CHECK-LABEL: test_v2i32_pre_load:
79; CHECK: ldr d0, [x0, #40]!
80  %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
81  %val = load <2 x i32>, <2 x i32>* %newaddr, align 8
82  store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
83  ret <2 x i32> %val
84}
85
86define <2 x i32> @test_v2i32_post_load(<2 x i32>* %addr) {
87; CHECK-LABEL: test_v2i32_post_load:
88; CHECK: ldr d0, [x0], #40
89  %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
90  %val = load <2 x i32>, <2 x i32>* %addr, align 8
91  store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
92  ret <2 x i32> %val
93}
94
95define void @test_v2i32_pre_store(<2 x i32> %in, <2 x i32>* %addr) {
96; CHECK-LABEL: test_v2i32_pre_store:
97; CHECK: str d0, [x0, #40]!
98  %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
99  store <2 x i32> %in, <2 x i32>* %newaddr, align 8
100  store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
101  ret void
102}
103
104define void @test_v2i32_post_store(<2 x i32> %in, <2 x i32>* %addr) {
105; CHECK-LABEL: test_v2i32_post_store:
106; CHECK: str d0, [x0], #40
107  %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
108  store <2 x i32> %in, <2 x i32>* %addr, align 8
109  store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
110  ret void
111}
112
113define <2 x float> @test_v2f32_pre_load(<2 x float>* %addr) {
114; CHECK-LABEL: test_v2f32_pre_load:
115; CHECK: ldr d0, [x0, #40]!
116  %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
117  %val = load <2 x float>, <2 x float>* %newaddr, align 8
118  store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
119  ret <2 x float> %val
120}
121
122define <2 x float> @test_v2f32_post_load(<2 x float>* %addr) {
123; CHECK-LABEL: test_v2f32_post_load:
124; CHECK: ldr d0, [x0], #40
125  %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
126  %val = load <2 x float>, <2 x float>* %addr, align 8
127  store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
128  ret <2 x float> %val
129}
130
131define void @test_v2f32_pre_store(<2 x float> %in, <2 x float>* %addr) {
132; CHECK-LABEL: test_v2f32_pre_store:
133; CHECK: str d0, [x0, #40]!
134  %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
135  store <2 x float> %in, <2 x float>* %newaddr, align 8
136  store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
137  ret void
138}
139
140define void @test_v2f32_post_store(<2 x float> %in, <2 x float>* %addr) {
141; CHECK-LABEL: test_v2f32_post_store:
142; CHECK: str d0, [x0], #40
143  %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
144  store <2 x float> %in, <2 x float>* %addr, align 8
145  store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
146  ret void
147}
148
149define <1 x i64> @test_v1i64_pre_load(<1 x i64>* %addr) {
150; CHECK-LABEL: test_v1i64_pre_load:
151; CHECK: ldr d0, [x0, #40]!
152  %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
153  %val = load <1 x i64>, <1 x i64>* %newaddr, align 8
154  store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
155  ret <1 x i64> %val
156}
157
158define <1 x i64> @test_v1i64_post_load(<1 x i64>* %addr) {
159; CHECK-LABEL: test_v1i64_post_load:
160; CHECK: ldr d0, [x0], #40
161  %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
162  %val = load <1 x i64>, <1 x i64>* %addr, align 8
163  store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
164  ret <1 x i64> %val
165}
166
167define void @test_v1i64_pre_store(<1 x i64> %in, <1 x i64>* %addr) {
168; CHECK-LABEL: test_v1i64_pre_store:
169; CHECK: str d0, [x0, #40]!
170  %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
171  store <1 x i64> %in, <1 x i64>* %newaddr, align 8
172  store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
173  ret void
174}
175
176define void @test_v1i64_post_store(<1 x i64> %in, <1 x i64>* %addr) {
177; CHECK-LABEL: test_v1i64_post_store:
178; CHECK: str d0, [x0], #40
179  %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
180  store <1 x i64> %in, <1 x i64>* %addr, align 8
181  store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
182  ret void
183}
184
185define <16 x i8> @test_v16i8_pre_load(<16 x i8>* %addr) {
186; CHECK-LABEL: test_v16i8_pre_load:
187; CHECK: ldr q0, [x0, #80]!
188  %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
189  %val = load <16 x i8>, <16 x i8>* %newaddr, align 8
190  store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
191  ret <16 x i8> %val
192}
193
194define <16 x i8> @test_v16i8_post_load(<16 x i8>* %addr) {
195; CHECK-LABEL: test_v16i8_post_load:
196; CHECK: ldr q0, [x0], #80
197  %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
198  %val = load <16 x i8>, <16 x i8>* %addr, align 8
199  store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
200  ret <16 x i8> %val
201}
202
203define void @test_v16i8_pre_store(<16 x i8> %in, <16 x i8>* %addr) {
204; CHECK-LABEL: test_v16i8_pre_store:
205; CHECK: str q0, [x0, #80]!
206  %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
207  store <16 x i8> %in, <16 x i8>* %newaddr, align 8
208  store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
209  ret void
210}
211
212define void @test_v16i8_post_store(<16 x i8> %in, <16 x i8>* %addr) {
213; CHECK-LABEL: test_v16i8_post_store:
214; CHECK: str q0, [x0], #80
215  %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
216  store <16 x i8> %in, <16 x i8>* %addr, align 8
217  store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
218  ret void
219}
220
221define <8 x i16> @test_v8i16_pre_load(<8 x i16>* %addr) {
222; CHECK-LABEL: test_v8i16_pre_load:
223; CHECK: ldr q0, [x0, #80]!
224  %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
225  %val = load <8 x i16>, <8 x i16>* %newaddr, align 8
226  store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
227  ret <8 x i16> %val
228}
229
230define <8 x i16> @test_v8i16_post_load(<8 x i16>* %addr) {
231; CHECK-LABEL: test_v8i16_post_load:
232; CHECK: ldr q0, [x0], #80
233  %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
234  %val = load <8 x i16>, <8 x i16>* %addr, align 8
235  store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
236  ret <8 x i16> %val
237}
238
239define void @test_v8i16_pre_store(<8 x i16> %in, <8 x i16>* %addr) {
240; CHECK-LABEL: test_v8i16_pre_store:
241; CHECK: str q0, [x0, #80]!
242  %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
243  store <8 x i16> %in, <8 x i16>* %newaddr, align 8
244  store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
245  ret void
246}
247
248define void @test_v8i16_post_store(<8 x i16> %in, <8 x i16>* %addr) {
249; CHECK-LABEL: test_v8i16_post_store:
250; CHECK: str q0, [x0], #80
251  %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
252  store <8 x i16> %in, <8 x i16>* %addr, align 8
253  store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
254  ret void
255}
256
257define <4 x i32> @test_v4i32_pre_load(<4 x i32>* %addr) {
258; CHECK-LABEL: test_v4i32_pre_load:
259; CHECK: ldr q0, [x0, #80]!
260  %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
261  %val = load <4 x i32>, <4 x i32>* %newaddr, align 8
262  store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
263  ret <4 x i32> %val
264}
265
266define <4 x i32> @test_v4i32_post_load(<4 x i32>* %addr) {
267; CHECK-LABEL: test_v4i32_post_load:
268; CHECK: ldr q0, [x0], #80
269  %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
270  %val = load <4 x i32>, <4 x i32>* %addr, align 8
271  store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
272  ret <4 x i32> %val
273}
274
275define void @test_v4i32_pre_store(<4 x i32> %in, <4 x i32>* %addr) {
276; CHECK-LABEL: test_v4i32_pre_store:
277; CHECK: str q0, [x0, #80]!
278  %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
279  store <4 x i32> %in, <4 x i32>* %newaddr, align 8
280  store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
281  ret void
282}
283
284define void @test_v4i32_post_store(<4 x i32> %in, <4 x i32>* %addr) {
285; CHECK-LABEL: test_v4i32_post_store:
286; CHECK: str q0, [x0], #80
287  %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
288  store <4 x i32> %in, <4 x i32>* %addr, align 8
289  store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
290  ret void
291}
292
293
294define <4 x float> @test_v4f32_pre_load(<4 x float>* %addr) {
295; CHECK-LABEL: test_v4f32_pre_load:
296; CHECK: ldr q0, [x0, #80]!
297  %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
298  %val = load <4 x float>, <4 x float>* %newaddr, align 8
299  store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
300  ret <4 x float> %val
301}
302
303define <4 x float> @test_v4f32_post_load(<4 x float>* %addr) {
304; CHECK-LABEL: test_v4f32_post_load:
305; CHECK: ldr q0, [x0], #80
306  %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
307  %val = load <4 x float>, <4 x float>* %addr, align 8
308  store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
309  ret <4 x float> %val
310}
311
312define void @test_v4f32_pre_store(<4 x float> %in, <4 x float>* %addr) {
313; CHECK-LABEL: test_v4f32_pre_store:
314; CHECK: str q0, [x0, #80]!
315  %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
316  store <4 x float> %in, <4 x float>* %newaddr, align 8
317  store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
318  ret void
319}
320
321define void @test_v4f32_post_store(<4 x float> %in, <4 x float>* %addr) {
322; CHECK-LABEL: test_v4f32_post_store:
323; CHECK: str q0, [x0], #80
324  %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
325  store <4 x float> %in, <4 x float>* %addr, align 8
326  store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
327  ret void
328}
329
330
331define <2 x i64> @test_v2i64_pre_load(<2 x i64>* %addr) {
332; CHECK-LABEL: test_v2i64_pre_load:
333; CHECK: ldr q0, [x0, #80]!
334  %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
335  %val = load <2 x i64>, <2 x i64>* %newaddr, align 8
336  store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
337  ret <2 x i64> %val
338}
339
340define <2 x i64> @test_v2i64_post_load(<2 x i64>* %addr) {
341; CHECK-LABEL: test_v2i64_post_load:
342; CHECK: ldr q0, [x0], #80
343  %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
344  %val = load <2 x i64>, <2 x i64>* %addr, align 8
345  store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
346  ret <2 x i64> %val
347}
348
349define void @test_v2i64_pre_store(<2 x i64> %in, <2 x i64>* %addr) {
350; CHECK-LABEL: test_v2i64_pre_store:
351; CHECK: str q0, [x0, #80]!
352  %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
353  store <2 x i64> %in, <2 x i64>* %newaddr, align 8
354  store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
355  ret void
356}
357
358define void @test_v2i64_post_store(<2 x i64> %in, <2 x i64>* %addr) {
359; CHECK-LABEL: test_v2i64_post_store:
360; CHECK: str q0, [x0], #80
361  %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
362  store <2 x i64> %in, <2 x i64>* %addr, align 8
363  store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
364  ret void
365}
366
367
368define <2 x double> @test_v2f64_pre_load(<2 x double>* %addr) {
369; CHECK-LABEL: test_v2f64_pre_load:
370; CHECK: ldr q0, [x0, #80]!
371  %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
372  %val = load <2 x double>, <2 x double>* %newaddr, align 8
373  store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
374  ret <2 x double> %val
375}
376
377define <2 x double> @test_v2f64_post_load(<2 x double>* %addr) {
378; CHECK-LABEL: test_v2f64_post_load:
379; CHECK: ldr q0, [x0], #80
380  %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
381  %val = load <2 x double>, <2 x double>* %addr, align 8
382  store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
383  ret <2 x double> %val
384}
385
386define void @test_v2f64_pre_store(<2 x double> %in, <2 x double>* %addr) {
387; CHECK-LABEL: test_v2f64_pre_store:
388; CHECK: str q0, [x0, #80]!
389  %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
390  store <2 x double> %in, <2 x double>* %newaddr, align 8
391  store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
392  ret void
393}
394
395define void @test_v2f64_post_store(<2 x double> %in, <2 x double>* %addr) {
396; CHECK-LABEL: test_v2f64_post_store:
397; CHECK: str q0, [x0], #80
398  %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
399  store <2 x double> %in, <2 x double>* %addr, align 8
400  store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
401  ret void
402}
403
404define i8* @test_v16i8_post_imm_st1_lane(<16 x i8> %in, i8* %addr) {
405; CHECK-LABEL: test_v16i8_post_imm_st1_lane:
406; CHECK: st1.b { v0 }[3], [x0], #1
407  %elt = extractelement <16 x i8> %in, i32 3
408  store i8 %elt, i8* %addr
409
410  %newaddr = getelementptr i8, i8* %addr, i32 1
411  ret i8* %newaddr
412}
413
414define i8* @test_v16i8_post_reg_st1_lane(<16 x i8> %in, i8* %addr) {
415; CHECK-LABEL: test_v16i8_post_reg_st1_lane:
416; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x2
417; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]]
418  %elt = extractelement <16 x i8> %in, i32 3
419  store i8 %elt, i8* %addr
420
421  %newaddr = getelementptr i8, i8* %addr, i32 2
422  ret i8* %newaddr
423}
424
425
426define i16* @test_v8i16_post_imm_st1_lane(<8 x i16> %in, i16* %addr) {
427; CHECK-LABEL: test_v8i16_post_imm_st1_lane:
428; CHECK: st1.h { v0 }[3], [x0], #2
429  %elt = extractelement <8 x i16> %in, i32 3
430  store i16 %elt, i16* %addr
431
432  %newaddr = getelementptr i16, i16* %addr, i32 1
433  ret i16* %newaddr
434}
435
436define i16* @test_v8i16_post_reg_st1_lane(<8 x i16> %in, i16* %addr) {
437; CHECK-LABEL: test_v8i16_post_reg_st1_lane:
438; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x4
439; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]]
440  %elt = extractelement <8 x i16> %in, i32 3
441  store i16 %elt, i16* %addr
442
443  %newaddr = getelementptr i16, i16* %addr, i32 2
444  ret i16* %newaddr
445}
446
447define i32* @test_v4i32_post_imm_st1_lane(<4 x i32> %in, i32* %addr) {
448; CHECK-LABEL: test_v4i32_post_imm_st1_lane:
449; CHECK: st1.s { v0 }[3], [x0], #4
450  %elt = extractelement <4 x i32> %in, i32 3
451  store i32 %elt, i32* %addr
452
453  %newaddr = getelementptr i32, i32* %addr, i32 1
454  ret i32* %newaddr
455}
456
457define i32* @test_v4i32_post_reg_st1_lane(<4 x i32> %in, i32* %addr) {
458; CHECK-LABEL: test_v4i32_post_reg_st1_lane:
459; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
460; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]]
461  %elt = extractelement <4 x i32> %in, i32 3
462  store i32 %elt, i32* %addr
463
464  %newaddr = getelementptr i32, i32* %addr, i32 2
465  ret i32* %newaddr
466}
467
468define float* @test_v4f32_post_imm_st1_lane(<4 x float> %in, float* %addr) {
469; CHECK-LABEL: test_v4f32_post_imm_st1_lane:
470; CHECK: st1.s { v0 }[3], [x0], #4
471  %elt = extractelement <4 x float> %in, i32 3
472  store float %elt, float* %addr
473
474  %newaddr = getelementptr float, float* %addr, i32 1
475  ret float* %newaddr
476}
477
478define float* @test_v4f32_post_reg_st1_lane(<4 x float> %in, float* %addr) {
479; CHECK-LABEL: test_v4f32_post_reg_st1_lane:
480; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
481; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]]
482  %elt = extractelement <4 x float> %in, i32 3
483  store float %elt, float* %addr
484
485  %newaddr = getelementptr float, float* %addr, i32 2
486  ret float* %newaddr
487}
488
489define i64* @test_v2i64_post_imm_st1_lane(<2 x i64> %in, i64* %addr) {
490; CHECK-LABEL: test_v2i64_post_imm_st1_lane:
491; CHECK: st1.d { v0 }[1], [x0], #8
492  %elt = extractelement <2 x i64> %in, i64 1
493  store i64 %elt, i64* %addr
494
495  %newaddr = getelementptr i64, i64* %addr, i64 1
496  ret i64* %newaddr
497}
498
499define i64* @test_v2i64_post_reg_st1_lane(<2 x i64> %in, i64* %addr) {
500; CHECK-LABEL: test_v2i64_post_reg_st1_lane:
501; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x10
502; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]]
503  %elt = extractelement <2 x i64> %in, i64 1
504  store i64 %elt, i64* %addr
505
506  %newaddr = getelementptr i64, i64* %addr, i64 2
507  ret i64* %newaddr
508}
509
510define double* @test_v2f64_post_imm_st1_lane(<2 x double> %in, double* %addr) {
511; CHECK-LABEL: test_v2f64_post_imm_st1_lane:
512; CHECK: st1.d { v0 }[1], [x0], #8
513  %elt = extractelement <2 x double> %in, i32 1
514  store double %elt, double* %addr
515
516  %newaddr = getelementptr double, double* %addr, i32 1
517  ret double* %newaddr
518}
519
520define double* @test_v2f64_post_reg_st1_lane(<2 x double> %in, double* %addr) {
521; CHECK-LABEL: test_v2f64_post_reg_st1_lane:
522; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x10
523; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]]
524  %elt = extractelement <2 x double> %in, i32 1
525  store double %elt, double* %addr
526
527  %newaddr = getelementptr double, double* %addr, i32 2
528  ret double* %newaddr
529}
530
531define i8* @test_v8i8_post_imm_st1_lane(<8 x i8> %in, i8* %addr) {
532; CHECK-LABEL: test_v8i8_post_imm_st1_lane:
533; CHECK: st1.b { v0 }[3], [x0], #1
534  %elt = extractelement <8 x i8> %in, i32 3
535  store i8 %elt, i8* %addr
536
537  %newaddr = getelementptr i8, i8* %addr, i32 1
538  ret i8* %newaddr
539}
540
541define i8* @test_v8i8_post_reg_st1_lane(<8 x i8> %in, i8* %addr) {
542; CHECK-LABEL: test_v8i8_post_reg_st1_lane:
543; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x2
544; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]]
545  %elt = extractelement <8 x i8> %in, i32 3
546  store i8 %elt, i8* %addr
547
548  %newaddr = getelementptr i8, i8* %addr, i32 2
549  ret i8* %newaddr
550}
551
552define i16* @test_v4i16_post_imm_st1_lane(<4 x i16> %in, i16* %addr) {
553; CHECK-LABEL: test_v4i16_post_imm_st1_lane:
554; CHECK: st1.h { v0 }[3], [x0], #2
555  %elt = extractelement <4 x i16> %in, i32 3
556  store i16 %elt, i16* %addr
557
558  %newaddr = getelementptr i16, i16* %addr, i32 1
559  ret i16* %newaddr
560}
561
562define i16* @test_v4i16_post_reg_st1_lane(<4 x i16> %in, i16* %addr) {
563; CHECK-LABEL: test_v4i16_post_reg_st1_lane:
564; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x4
565; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]]
566  %elt = extractelement <4 x i16> %in, i32 3
567  store i16 %elt, i16* %addr
568
569  %newaddr = getelementptr i16, i16* %addr, i32 2
570  ret i16* %newaddr
571}
572
573define i32* @test_v2i32_post_imm_st1_lane(<2 x i32> %in, i32* %addr) {
574; CHECK-LABEL: test_v2i32_post_imm_st1_lane:
575; CHECK: st1.s { v0 }[1], [x0], #4
576  %elt = extractelement <2 x i32> %in, i32 1
577  store i32 %elt, i32* %addr
578
579  %newaddr = getelementptr i32, i32* %addr, i32 1
580  ret i32* %newaddr
581}
582
583define i32* @test_v2i32_post_reg_st1_lane(<2 x i32> %in, i32* %addr) {
584; CHECK-LABEL: test_v2i32_post_reg_st1_lane:
585; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
586; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]]
587  %elt = extractelement <2 x i32> %in, i32 1
588  store i32 %elt, i32* %addr
589
590  %newaddr = getelementptr i32, i32* %addr, i32 2
591  ret i32* %newaddr
592}
593
594define float* @test_v2f32_post_imm_st1_lane(<2 x float> %in, float* %addr) {
595; CHECK-LABEL: test_v2f32_post_imm_st1_lane:
596; CHECK: st1.s { v0 }[1], [x0], #4
597  %elt = extractelement <2 x float> %in, i32 1
598  store float %elt, float* %addr
599
600  %newaddr = getelementptr float, float* %addr, i32 1
601  ret float* %newaddr
602}
603
604define float* @test_v2f32_post_reg_st1_lane(<2 x float> %in, float* %addr) {
605; CHECK-LABEL: test_v2f32_post_reg_st1_lane:
606; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
607; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]]
608  %elt = extractelement <2 x float> %in, i32 1
609  store float %elt, float* %addr
610
611  %newaddr = getelementptr float, float* %addr, i32 2
612  ret float* %newaddr
613}
614
615define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) {
616;CHECK-LABEL: test_v16i8_post_imm_ld2:
617;CHECK: ld2.16b { v0, v1 }, [x0], #32
618  %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
619  %tmp = getelementptr i8, i8* %A, i32 32
620  store i8* %tmp, i8** %ptr
621  ret { <16 x i8>, <16 x i8> } %ld2
622}
623
624define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) {
625;CHECK-LABEL: test_v16i8_post_reg_ld2:
626;CHECK: ld2.16b { v0, v1 }, [x0], x{{[0-9]+}}
627  %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
628  %tmp = getelementptr i8, i8* %A, i64 %inc
629  store i8* %tmp, i8** %ptr
630  ret { <16 x i8>, <16 x i8> } %ld2
631}
632
633declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*)
634
635
636define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2(i8* %A, i8** %ptr) {
637;CHECK-LABEL: test_v8i8_post_imm_ld2:
638;CHECK: ld2.8b { v0, v1 }, [x0], #16
639  %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
640  %tmp = getelementptr i8, i8* %A, i32 16
641  store i8* %tmp, i8** %ptr
642  ret { <8 x i8>, <8 x i8> } %ld2
643}
644
645define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) {
646;CHECK-LABEL: test_v8i8_post_reg_ld2:
647;CHECK: ld2.8b { v0, v1 }, [x0], x{{[0-9]+}}
648  %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
649  %tmp = getelementptr i8, i8* %A, i64 %inc
650  store i8* %tmp, i8** %ptr
651  ret { <8 x i8>, <8 x i8> } %ld2
652}
653
654declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*)
655
656
657define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2(i16* %A, i16** %ptr) {
658;CHECK-LABEL: test_v8i16_post_imm_ld2:
659;CHECK: ld2.8h { v0, v1 }, [x0], #32
660  %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
661  %tmp = getelementptr i16, i16* %A, i32 16
662  store i16* %tmp, i16** %ptr
663  ret { <8 x i16>, <8 x i16> } %ld2
664}
665
666define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) {
667;CHECK-LABEL: test_v8i16_post_reg_ld2:
668;CHECK: ld2.8h { v0, v1 }, [x0], x{{[0-9]+}}
669  %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
670  %tmp = getelementptr i16, i16* %A, i64 %inc
671  store i16* %tmp, i16** %ptr
672  ret { <8 x i16>, <8 x i16> } %ld2
673}
674
675declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16*)
676
677
678define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2(i16* %A, i16** %ptr) {
679;CHECK-LABEL: test_v4i16_post_imm_ld2:
680;CHECK: ld2.4h { v0, v1 }, [x0], #16
681  %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
682  %tmp = getelementptr i16, i16* %A, i32 8
683  store i16* %tmp, i16** %ptr
684  ret { <4 x i16>, <4 x i16> } %ld2
685}
686
687define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) {
688;CHECK-LABEL: test_v4i16_post_reg_ld2:
689;CHECK: ld2.4h { v0, v1 }, [x0], x{{[0-9]+}}
690  %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
691  %tmp = getelementptr i16, i16* %A, i64 %inc
692  store i16* %tmp, i16** %ptr
693  ret { <4 x i16>, <4 x i16> } %ld2
694}
695
696declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16*)
697
698
699define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2(i32* %A, i32** %ptr) {
700;CHECK-LABEL: test_v4i32_post_imm_ld2:
701;CHECK: ld2.4s { v0, v1 }, [x0], #32
702  %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
703  %tmp = getelementptr i32, i32* %A, i32 8
704  store i32* %tmp, i32** %ptr
705  ret { <4 x i32>, <4 x i32> } %ld2
706}
707
708define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) {
709;CHECK-LABEL: test_v4i32_post_reg_ld2:
710;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}}
711  %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
712  %tmp = getelementptr i32, i32* %A, i64 %inc
713  store i32* %tmp, i32** %ptr
714  ret { <4 x i32>, <4 x i32> } %ld2
715}
716
717declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*)
718
719
720define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2(i32* %A, i32** %ptr) {
721;CHECK-LABEL: test_v2i32_post_imm_ld2:
722;CHECK: ld2.2s { v0, v1 }, [x0], #16
723  %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
724  %tmp = getelementptr i32, i32* %A, i32 4
725  store i32* %tmp, i32** %ptr
726  ret { <2 x i32>, <2 x i32> } %ld2
727}
728
729define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) {
730;CHECK-LABEL: test_v2i32_post_reg_ld2:
731;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}}
732  %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
733  %tmp = getelementptr i32, i32* %A, i64 %inc
734  store i32* %tmp, i32** %ptr
735  ret { <2 x i32>, <2 x i32> } %ld2
736}
737
738declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*)
739
740
741define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2(i64* %A, i64** %ptr) {
742;CHECK-LABEL: test_v2i64_post_imm_ld2:
743;CHECK: ld2.2d { v0, v1 }, [x0], #32
744  %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
745  %tmp = getelementptr i64, i64* %A, i32 4
746  store i64* %tmp, i64** %ptr
747  ret { <2 x i64>, <2 x i64> } %ld2
748}
749
750define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) {
751;CHECK-LABEL: test_v2i64_post_reg_ld2:
752;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}}
753  %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
754  %tmp = getelementptr i64, i64* %A, i64 %inc
755  store i64* %tmp, i64** %ptr
756  ret { <2 x i64>, <2 x i64> } %ld2
757}
758
759declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64*)
760
761
762define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2(i64* %A, i64** %ptr) {
763;CHECK-LABEL: test_v1i64_post_imm_ld2:
764;CHECK: ld1.1d { v0, v1 }, [x0], #16
765  %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
766  %tmp = getelementptr i64, i64* %A, i32 2
767  store i64* %tmp, i64** %ptr
768  ret { <1 x i64>, <1 x i64> } %ld2
769}
770
771define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) {
772;CHECK-LABEL: test_v1i64_post_reg_ld2:
773;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
774  %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
775  %tmp = getelementptr i64, i64* %A, i64 %inc
776  store i64* %tmp, i64** %ptr
777  ret { <1 x i64>, <1 x i64> } %ld2
778}
779
780declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64*)
781
782
783define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2(float* %A, float** %ptr) {
784;CHECK-LABEL: test_v4f32_post_imm_ld2:
785;CHECK: ld2.4s { v0, v1 }, [x0], #32
786  %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A)
787  %tmp = getelementptr float, float* %A, i32 8
788  store float* %tmp, float** %ptr
789  ret { <4 x float>, <4 x float> } %ld2
790}
791
792define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) {
793;CHECK-LABEL: test_v4f32_post_reg_ld2:
794;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}}
795  %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A)
796  %tmp = getelementptr float, float* %A, i64 %inc
797  store float* %tmp, float** %ptr
798  ret { <4 x float>, <4 x float> } %ld2
799}
800
801declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float*)
802
803
804define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2(float* %A, float** %ptr) {
805;CHECK-LABEL: test_v2f32_post_imm_ld2:
806;CHECK: ld2.2s { v0, v1 }, [x0], #16
807  %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A)
808  %tmp = getelementptr float, float* %A, i32 4
809  store float* %tmp, float** %ptr
810  ret { <2 x float>, <2 x float> } %ld2
811}
812
813define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) {
814;CHECK-LABEL: test_v2f32_post_reg_ld2:
815;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}}
816  %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A)
817  %tmp = getelementptr float, float* %A, i64 %inc
818  store float* %tmp, float** %ptr
819  ret { <2 x float>, <2 x float> } %ld2
820}
821
822declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float*)
823
824
825define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2(double* %A, double** %ptr) {
826;CHECK-LABEL: test_v2f64_post_imm_ld2:
827;CHECK: ld2.2d { v0, v1 }, [x0], #32
828  %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A)
829  %tmp = getelementptr double, double* %A, i32 4
830  store double* %tmp, double** %ptr
831  ret { <2 x double>, <2 x double> } %ld2
832}
833
834define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) {
835;CHECK-LABEL: test_v2f64_post_reg_ld2:
836;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}}
837  %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A)
838  %tmp = getelementptr double, double* %A, i64 %inc
839  store double* %tmp, double** %ptr
840  ret { <2 x double>, <2 x double> } %ld2
841}
842
843declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double*)
844
845
846define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2(double* %A, double** %ptr) {
847;CHECK-LABEL: test_v1f64_post_imm_ld2:
848;CHECK: ld1.1d { v0, v1 }, [x0], #16
849  %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
850  %tmp = getelementptr double, double* %A, i32 2
851  store double* %tmp, double** %ptr
852  ret { <1 x double>, <1 x double> } %ld2
853}
854
855define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) {
856;CHECK-LABEL: test_v1f64_post_reg_ld2:
857;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
858  %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
859  %tmp = getelementptr double, double* %A, i64 %inc
860  store double* %tmp, double** %ptr
861  ret { <1 x double>, <1 x double> } %ld2
862}
863
864declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double*)
865
866
867define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3(i8* %A, i8** %ptr) {
868;CHECK-LABEL: test_v16i8_post_imm_ld3:
869;CHECK: ld3.16b { v0, v1, v2 }, [x0], #48
870  %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
871  %tmp = getelementptr i8, i8* %A, i32 48
872  store i8* %tmp, i8** %ptr
873  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
874}
875
876define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) {
877;CHECK-LABEL: test_v16i8_post_reg_ld3:
878;CHECK: ld3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
879  %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
880  %tmp = getelementptr i8, i8* %A, i64 %inc
881  store i8* %tmp, i8** %ptr
882  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
883}
884
885declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8*)
886
887
888define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3(i8* %A, i8** %ptr) {
889;CHECK-LABEL: test_v8i8_post_imm_ld3:
890;CHECK: ld3.8b { v0, v1, v2 }, [x0], #24
891  %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
892  %tmp = getelementptr i8, i8* %A, i32 24
893  store i8* %tmp, i8** %ptr
894  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
895}
896
897define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) {
898;CHECK-LABEL: test_v8i8_post_reg_ld3:
899;CHECK: ld3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
900  %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
901  %tmp = getelementptr i8, i8* %A, i64 %inc
902  store i8* %tmp, i8** %ptr
903  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
904}
905
906declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8*)
907
908
909define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3(i16* %A, i16** %ptr) {
910;CHECK-LABEL: test_v8i16_post_imm_ld3:
911;CHECK: ld3.8h { v0, v1, v2 }, [x0], #48
912  %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
913  %tmp = getelementptr i16, i16* %A, i32 24
914  store i16* %tmp, i16** %ptr
915  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
916}
917
918define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) {
919;CHECK-LABEL: test_v8i16_post_reg_ld3:
920;CHECK: ld3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
921  %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
922  %tmp = getelementptr i16, i16* %A, i64 %inc
923  store i16* %tmp, i16** %ptr
924  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
925}
926
927declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16*)
928
929
930define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3(i16* %A, i16** %ptr) {
931;CHECK-LABEL: test_v4i16_post_imm_ld3:
932;CHECK: ld3.4h { v0, v1, v2 }, [x0], #24
933  %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
934  %tmp = getelementptr i16, i16* %A, i32 12
935  store i16* %tmp, i16** %ptr
936  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
937}
938
939define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) {
940;CHECK-LABEL: test_v4i16_post_reg_ld3:
941;CHECK: ld3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
942  %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
943  %tmp = getelementptr i16, i16* %A, i64 %inc
944  store i16* %tmp, i16** %ptr
945  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
946}
947
948declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16*)
949
950
951define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3(i32* %A, i32** %ptr) {
952;CHECK-LABEL: test_v4i32_post_imm_ld3:
953;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
954  %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
955  %tmp = getelementptr i32, i32* %A, i32 12
956  store i32* %tmp, i32** %ptr
957  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
958}
959
960define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) {
961;CHECK-LABEL: test_v4i32_post_reg_ld3:
962;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
963  %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
964  %tmp = getelementptr i32, i32* %A, i64 %inc
965  store i32* %tmp, i32** %ptr
966  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
967}
968
969declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32*)
970
971
972define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3(i32* %A, i32** %ptr) {
973;CHECK-LABEL: test_v2i32_post_imm_ld3:
974;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
975  %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
976  %tmp = getelementptr i32, i32* %A, i32 6
977  store i32* %tmp, i32** %ptr
978  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
979}
980
981define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) {
982;CHECK-LABEL: test_v2i32_post_reg_ld3:
983;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
984  %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
985  %tmp = getelementptr i32, i32* %A, i64 %inc
986  store i32* %tmp, i32** %ptr
987  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
988}
989
990declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32*)
991
992
993define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3(i64* %A, i64** %ptr) {
994;CHECK-LABEL: test_v2i64_post_imm_ld3:
995;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
996  %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
997  %tmp = getelementptr i64, i64* %A, i32 6
998  store i64* %tmp, i64** %ptr
999  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
1000}
1001
1002define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) {
1003;CHECK-LABEL: test_v2i64_post_reg_ld3:
1004;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1005  %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
1006  %tmp = getelementptr i64, i64* %A, i64 %inc
1007  store i64* %tmp, i64** %ptr
1008  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
1009}
1010
1011declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64*)
1012
1013
1014define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3(i64* %A, i64** %ptr) {
1015;CHECK-LABEL: test_v1i64_post_imm_ld3:
1016;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
1017  %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
1018  %tmp = getelementptr i64, i64* %A, i32 3
1019  store i64* %tmp, i64** %ptr
1020  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
1021}
1022
1023define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) {
1024;CHECK-LABEL: test_v1i64_post_reg_ld3:
1025;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1026  %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
1027  %tmp = getelementptr i64, i64* %A, i64 %inc
1028  store i64* %tmp, i64** %ptr
1029  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
1030}
1031
1032declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64*)
1033
1034
1035define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3(float* %A, float** %ptr) {
1036;CHECK-LABEL: test_v4f32_post_imm_ld3:
1037;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
1038  %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A)
1039  %tmp = getelementptr float, float* %A, i32 12
1040  store float* %tmp, float** %ptr
1041  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
1042}
1043
1044define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) {
1045;CHECK-LABEL: test_v4f32_post_reg_ld3:
1046;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
1047  %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A)
1048  %tmp = getelementptr float, float* %A, i64 %inc
1049  store float* %tmp, float** %ptr
1050  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
1051}
1052
1053declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float*)
1054
1055
1056define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3(float* %A, float** %ptr) {
1057;CHECK-LABEL: test_v2f32_post_imm_ld3:
1058;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
1059  %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A)
1060  %tmp = getelementptr float, float* %A, i32 6
1061  store float* %tmp, float** %ptr
1062  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
1063}
1064
1065define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) {
1066;CHECK-LABEL: test_v2f32_post_reg_ld3:
1067;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
1068  %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A)
1069  %tmp = getelementptr float, float* %A, i64 %inc
1070  store float* %tmp, float** %ptr
1071  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
1072}
1073
1074declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float*)
1075
1076
1077define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3(double* %A, double** %ptr) {
1078;CHECK-LABEL: test_v2f64_post_imm_ld3:
1079;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
1080  %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A)
1081  %tmp = getelementptr double, double* %A, i32 6
1082  store double* %tmp, double** %ptr
1083  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
1084}
1085
1086define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) {
1087;CHECK-LABEL: test_v2f64_post_reg_ld3:
1088;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1089  %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A)
1090  %tmp = getelementptr double, double* %A, i64 %inc
1091  store double* %tmp, double** %ptr
1092  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
1093}
1094
1095declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double*)
1096
1097
1098define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3(double* %A, double** %ptr) {
1099;CHECK-LABEL: test_v1f64_post_imm_ld3:
1100;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
1101  %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
1102  %tmp = getelementptr double, double* %A, i32 3
1103  store double* %tmp, double** %ptr
1104  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
1105}
1106
1107define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) {
1108;CHECK-LABEL: test_v1f64_post_reg_ld3:
1109;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1110  %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
1111  %tmp = getelementptr double, double* %A, i64 %inc
1112  store double* %tmp, double** %ptr
1113  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
1114}
1115
1116declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double*)
1117
1118
1119define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4(i8* %A, i8** %ptr) {
1120;CHECK-LABEL: test_v16i8_post_imm_ld4:
1121;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], #64
1122  %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
1123  %tmp = getelementptr i8, i8* %A, i32 64
1124  store i8* %tmp, i8** %ptr
1125  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
1126}
1127
1128define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) {
1129;CHECK-LABEL: test_v16i8_post_reg_ld4:
1130;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1131  %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
1132  %tmp = getelementptr i8, i8* %A, i64 %inc
1133  store i8* %tmp, i8** %ptr
1134  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
1135}
1136
1137declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8*)
1138
1139
1140define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4(i8* %A, i8** %ptr) {
1141;CHECK-LABEL: test_v8i8_post_imm_ld4:
1142;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], #32
1143  %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
1144  %tmp = getelementptr i8, i8* %A, i32 32
1145  store i8* %tmp, i8** %ptr
1146  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
1147}
1148
1149define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) {
1150;CHECK-LABEL: test_v8i8_post_reg_ld4:
1151;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1152  %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
1153  %tmp = getelementptr i8, i8* %A, i64 %inc
1154  store i8* %tmp, i8** %ptr
1155  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
1156}
1157
1158declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8*)
1159
1160
1161define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4(i16* %A, i16** %ptr) {
1162;CHECK-LABEL: test_v8i16_post_imm_ld4:
1163;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], #64
1164  %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
1165  %tmp = getelementptr i16, i16* %A, i32 32
1166  store i16* %tmp, i16** %ptr
1167  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
1168}
1169
1170define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) {
1171;CHECK-LABEL: test_v8i16_post_reg_ld4:
1172;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1173  %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
1174  %tmp = getelementptr i16, i16* %A, i64 %inc
1175  store i16* %tmp, i16** %ptr
1176  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
1177}
1178
1179declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16*)
1180
1181
1182define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4(i16* %A, i16** %ptr) {
1183;CHECK-LABEL: test_v4i16_post_imm_ld4:
1184;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], #32
1185  %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
1186  %tmp = getelementptr i16, i16* %A, i32 16
1187  store i16* %tmp, i16** %ptr
1188  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
1189}
1190
1191define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) {
1192;CHECK-LABEL: test_v4i16_post_reg_ld4:
1193;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1194  %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
1195  %tmp = getelementptr i16, i16* %A, i64 %inc
1196  store i16* %tmp, i16** %ptr
1197  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
1198}
1199
1200declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16*)
1201
1202
1203define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4(i32* %A, i32** %ptr) {
1204;CHECK-LABEL: test_v4i32_post_imm_ld4:
1205;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
1206  %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
1207  %tmp = getelementptr i32, i32* %A, i32 16
1208  store i32* %tmp, i32** %ptr
1209  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
1210}
1211
1212define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) {
1213;CHECK-LABEL: test_v4i32_post_reg_ld4:
1214;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1215  %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
1216  %tmp = getelementptr i32, i32* %A, i64 %inc
1217  store i32* %tmp, i32** %ptr
1218  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
1219}
1220
1221declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32*)
1222
1223
1224define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4(i32* %A, i32** %ptr) {
1225;CHECK-LABEL: test_v2i32_post_imm_ld4:
1226;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
1227  %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
1228  %tmp = getelementptr i32, i32* %A, i32 8
1229  store i32* %tmp, i32** %ptr
1230  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
1231}
1232
1233define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) {
1234;CHECK-LABEL: test_v2i32_post_reg_ld4:
1235;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1236  %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
1237  %tmp = getelementptr i32, i32* %A, i64 %inc
1238  store i32* %tmp, i32** %ptr
1239  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
1240}
1241
1242declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32*)
1243
1244
1245define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4(i64* %A, i64** %ptr) {
1246;CHECK-LABEL: test_v2i64_post_imm_ld4:
1247;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
1248  %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
1249  %tmp = getelementptr i64, i64* %A, i32 8
1250  store i64* %tmp, i64** %ptr
1251  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
1252}
1253
1254define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) {
1255;CHECK-LABEL: test_v2i64_post_reg_ld4:
1256;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1257  %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
1258  %tmp = getelementptr i64, i64* %A, i64 %inc
1259  store i64* %tmp, i64** %ptr
1260  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
1261}
1262
1263declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64*)
1264
1265
1266define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4(i64* %A, i64** %ptr) {
1267;CHECK-LABEL: test_v1i64_post_imm_ld4:
1268;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
1269  %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
1270  %tmp = getelementptr i64, i64* %A, i32 4
1271  store i64* %tmp, i64** %ptr
1272  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
1273}
1274
1275define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) {
1276;CHECK-LABEL: test_v1i64_post_reg_ld4:
1277;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1278  %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
1279  %tmp = getelementptr i64, i64* %A, i64 %inc
1280  store i64* %tmp, i64** %ptr
1281  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
1282}
1283
1284declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64*)
1285
1286
1287define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4(float* %A, float** %ptr) {
1288;CHECK-LABEL: test_v4f32_post_imm_ld4:
1289;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
1290  %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A)
1291  %tmp = getelementptr float, float* %A, i32 16
1292  store float* %tmp, float** %ptr
1293  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
1294}
1295
1296define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) {
1297;CHECK-LABEL: test_v4f32_post_reg_ld4:
1298;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1299  %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A)
1300  %tmp = getelementptr float, float* %A, i64 %inc
1301  store float* %tmp, float** %ptr
1302  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
1303}
1304
1305declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float*)
1306
1307
1308define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4(float* %A, float** %ptr) {
1309;CHECK-LABEL: test_v2f32_post_imm_ld4:
1310;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
1311  %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A)
1312  %tmp = getelementptr float, float* %A, i32 8
1313  store float* %tmp, float** %ptr
1314  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
1315}
1316
1317define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) {
1318;CHECK-LABEL: test_v2f32_post_reg_ld4:
1319;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1320  %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A)
1321  %tmp = getelementptr float, float* %A, i64 %inc
1322  store float* %tmp, float** %ptr
1323  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
1324}
1325
1326declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float*)
1327
1328
1329define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4(double* %A, double** %ptr) {
1330;CHECK-LABEL: test_v2f64_post_imm_ld4:
1331;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
1332  %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A)
1333  %tmp = getelementptr double, double* %A, i32 8
1334  store double* %tmp, double** %ptr
1335  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
1336}
1337
1338define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) {
1339;CHECK-LABEL: test_v2f64_post_reg_ld4:
1340;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1341  %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A)
1342  %tmp = getelementptr double, double* %A, i64 %inc
1343  store double* %tmp, double** %ptr
1344  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
1345}
1346
1347declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double*)
1348
1349
1350define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4(double* %A, double** %ptr) {
1351;CHECK-LABEL: test_v1f64_post_imm_ld4:
1352;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
1353  %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
1354  %tmp = getelementptr double, double* %A, i32 4
1355  store double* %tmp, double** %ptr
1356  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
1357}
1358
1359define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) {
1360;CHECK-LABEL: test_v1f64_post_reg_ld4:
1361;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1362  %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
1363  %tmp = getelementptr double, double* %A, i64 %inc
1364  store double* %tmp, double** %ptr
1365  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
1366}
1367
1368declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double*)
1369
1370define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
1371;CHECK-LABEL: test_v16i8_post_imm_ld1x2:
1372;CHECK: ld1.16b { v0, v1 }, [x0], #32
1373  %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A)
1374  %tmp = getelementptr i8, i8* %A, i32 32
1375  store i8* %tmp, i8** %ptr
1376  ret { <16 x i8>, <16 x i8> } %ld1x2
1377}
1378
1379define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) {
1380;CHECK-LABEL: test_v16i8_post_reg_ld1x2:
1381;CHECK: ld1.16b { v0, v1 }, [x0], x{{[0-9]+}}
1382  %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A)
1383  %tmp = getelementptr i8, i8* %A, i64 %inc
1384  store i8* %tmp, i8** %ptr
1385  ret { <16 x i8>, <16 x i8> } %ld1x2
1386}
1387
1388declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8*)
1389
1390
1391define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
1392;CHECK-LABEL: test_v8i8_post_imm_ld1x2:
1393;CHECK: ld1.8b { v0, v1 }, [x0], #16
1394  %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A)
1395  %tmp = getelementptr i8, i8* %A, i32 16
1396  store i8* %tmp, i8** %ptr
1397  ret { <8 x i8>, <8 x i8> } %ld1x2
1398}
1399
1400define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) {
1401;CHECK-LABEL: test_v8i8_post_reg_ld1x2:
1402;CHECK: ld1.8b { v0, v1 }, [x0], x{{[0-9]+}}
1403  %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A)
1404  %tmp = getelementptr i8, i8* %A, i64 %inc
1405  store i8* %tmp, i8** %ptr
1406  ret { <8 x i8>, <8 x i8> } %ld1x2
1407}
1408
1409declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8*)
1410
1411
1412define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x2(i16* %A, i16** %ptr) {
1413;CHECK-LABEL: test_v8i16_post_imm_ld1x2:
1414;CHECK: ld1.8h { v0, v1 }, [x0], #32
1415  %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A)
1416  %tmp = getelementptr i16, i16* %A, i32 16
1417  store i16* %tmp, i16** %ptr
1418  ret { <8 x i16>, <8 x i16> } %ld1x2
1419}
1420
1421define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) {
1422;CHECK-LABEL: test_v8i16_post_reg_ld1x2:
1423;CHECK: ld1.8h { v0, v1 }, [x0], x{{[0-9]+}}
1424  %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A)
1425  %tmp = getelementptr i16, i16* %A, i64 %inc
1426  store i16* %tmp, i16** %ptr
1427  ret { <8 x i16>, <8 x i16> } %ld1x2
1428}
1429
1430declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16*)
1431
1432
1433define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x2(i16* %A, i16** %ptr) {
1434;CHECK-LABEL: test_v4i16_post_imm_ld1x2:
1435;CHECK: ld1.4h { v0, v1 }, [x0], #16
1436  %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A)
1437  %tmp = getelementptr i16, i16* %A, i32 8
1438  store i16* %tmp, i16** %ptr
1439  ret { <4 x i16>, <4 x i16> } %ld1x2
1440}
1441
1442define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) {
1443;CHECK-LABEL: test_v4i16_post_reg_ld1x2:
1444;CHECK: ld1.4h { v0, v1 }, [x0], x{{[0-9]+}}
1445  %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A)
1446  %tmp = getelementptr i16, i16* %A, i64 %inc
1447  store i16* %tmp, i16** %ptr
1448  ret { <4 x i16>, <4 x i16> } %ld1x2
1449}
1450
1451declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16*)
1452
1453
1454define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x2(i32* %A, i32** %ptr) {
1455;CHECK-LABEL: test_v4i32_post_imm_ld1x2:
1456;CHECK: ld1.4s { v0, v1 }, [x0], #32
1457  %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A)
1458  %tmp = getelementptr i32, i32* %A, i32 8
1459  store i32* %tmp, i32** %ptr
1460  ret { <4 x i32>, <4 x i32> } %ld1x2
1461}
1462
1463define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) {
1464;CHECK-LABEL: test_v4i32_post_reg_ld1x2:
1465;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}}
1466  %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A)
1467  %tmp = getelementptr i32, i32* %A, i64 %inc
1468  store i32* %tmp, i32** %ptr
1469  ret { <4 x i32>, <4 x i32> } %ld1x2
1470}
1471
1472declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32*)
1473
1474
1475define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x2(i32* %A, i32** %ptr) {
1476;CHECK-LABEL: test_v2i32_post_imm_ld1x2:
1477;CHECK: ld1.2s { v0, v1 }, [x0], #16
1478  %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A)
1479  %tmp = getelementptr i32, i32* %A, i32 4
1480  store i32* %tmp, i32** %ptr
1481  ret { <2 x i32>, <2 x i32> } %ld1x2
1482}
1483
1484define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) {
1485;CHECK-LABEL: test_v2i32_post_reg_ld1x2:
1486;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}}
1487  %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A)
1488  %tmp = getelementptr i32, i32* %A, i64 %inc
1489  store i32* %tmp, i32** %ptr
1490  ret { <2 x i32>, <2 x i32> } %ld1x2
1491}
1492
1493declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32*)
1494
1495
1496define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x2(i64* %A, i64** %ptr) {
1497;CHECK-LABEL: test_v2i64_post_imm_ld1x2:
1498;CHECK: ld1.2d { v0, v1 }, [x0], #32
1499  %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A)
1500  %tmp = getelementptr i64, i64* %A, i32 4
1501  store i64* %tmp, i64** %ptr
1502  ret { <2 x i64>, <2 x i64> } %ld1x2
1503}
1504
1505define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) {
1506;CHECK-LABEL: test_v2i64_post_reg_ld1x2:
1507;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}}
1508  %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A)
1509  %tmp = getelementptr i64, i64* %A, i64 %inc
1510  store i64* %tmp, i64** %ptr
1511  ret { <2 x i64>, <2 x i64> } %ld1x2
1512}
1513
1514declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64*)
1515
1516
1517define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x2(i64* %A, i64** %ptr) {
1518;CHECK-LABEL: test_v1i64_post_imm_ld1x2:
1519;CHECK: ld1.1d { v0, v1 }, [x0], #16
1520  %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A)
1521  %tmp = getelementptr i64, i64* %A, i32 2
1522  store i64* %tmp, i64** %ptr
1523  ret { <1 x i64>, <1 x i64> } %ld1x2
1524}
1525
1526define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) {
1527;CHECK-LABEL: test_v1i64_post_reg_ld1x2:
1528;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
1529  %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A)
1530  %tmp = getelementptr i64, i64* %A, i64 %inc
1531  store i64* %tmp, i64** %ptr
1532  ret { <1 x i64>, <1 x i64> } %ld1x2
1533}
1534
1535declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64*)
1536
1537
1538define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x2(float* %A, float** %ptr) {
1539;CHECK-LABEL: test_v4f32_post_imm_ld1x2:
1540;CHECK: ld1.4s { v0, v1 }, [x0], #32
1541  %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A)
1542  %tmp = getelementptr float, float* %A, i32 8
1543  store float* %tmp, float** %ptr
1544  ret { <4 x float>, <4 x float> } %ld1x2
1545}
1546
1547define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) {
1548;CHECK-LABEL: test_v4f32_post_reg_ld1x2:
1549;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}}
1550  %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A)
1551  %tmp = getelementptr float, float* %A, i64 %inc
1552  store float* %tmp, float** %ptr
1553  ret { <4 x float>, <4 x float> } %ld1x2
1554}
1555
1556declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float*)
1557
1558
1559define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x2(float* %A, float** %ptr) {
1560;CHECK-LABEL: test_v2f32_post_imm_ld1x2:
1561;CHECK: ld1.2s { v0, v1 }, [x0], #16
1562  %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A)
1563  %tmp = getelementptr float, float* %A, i32 4
1564  store float* %tmp, float** %ptr
1565  ret { <2 x float>, <2 x float> } %ld1x2
1566}
1567
1568define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) {
1569;CHECK-LABEL: test_v2f32_post_reg_ld1x2:
1570;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}}
1571  %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A)
1572  %tmp = getelementptr float, float* %A, i64 %inc
1573  store float* %tmp, float** %ptr
1574  ret { <2 x float>, <2 x float> } %ld1x2
1575}
1576
1577declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float*)
1578
1579
1580define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x2(double* %A, double** %ptr) {
1581;CHECK-LABEL: test_v2f64_post_imm_ld1x2:
1582;CHECK: ld1.2d { v0, v1 }, [x0], #32
1583  %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A)
1584  %tmp = getelementptr double, double* %A, i32 4
1585  store double* %tmp, double** %ptr
1586  ret { <2 x double>, <2 x double> } %ld1x2
1587}
1588
1589define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) {
1590;CHECK-LABEL: test_v2f64_post_reg_ld1x2:
1591;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}}
1592  %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A)
1593  %tmp = getelementptr double, double* %A, i64 %inc
1594  store double* %tmp, double** %ptr
1595  ret { <2 x double>, <2 x double> } %ld1x2
1596}
1597
1598declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double*)
1599
1600
1601define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x2(double* %A, double** %ptr) {
1602;CHECK-LABEL: test_v1f64_post_imm_ld1x2:
1603;CHECK: ld1.1d { v0, v1 }, [x0], #16
1604  %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A)
1605  %tmp = getelementptr double, double* %A, i32 2
1606  store double* %tmp, double** %ptr
1607  ret { <1 x double>, <1 x double> } %ld1x2
1608}
1609
1610define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) {
1611;CHECK-LABEL: test_v1f64_post_reg_ld1x2:
1612;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
1613  %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A)
1614  %tmp = getelementptr double, double* %A, i64 %inc
1615  store double* %tmp, double** %ptr
1616  ret { <1 x double>, <1 x double> } %ld1x2
1617}
1618
1619declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double*)
1620
1621
1622define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x3(i8* %A, i8** %ptr) {
1623;CHECK-LABEL: test_v16i8_post_imm_ld1x3:
1624;CHECK: ld1.16b { v0, v1, v2 }, [x0], #48
1625  %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A)
1626  %tmp = getelementptr i8, i8* %A, i32 48
1627  store i8* %tmp, i8** %ptr
1628  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
1629}
1630
1631define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) {
1632;CHECK-LABEL: test_v16i8_post_reg_ld1x3:
1633;CHECK: ld1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
1634  %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A)
1635  %tmp = getelementptr i8, i8* %A, i64 %inc
1636  store i8* %tmp, i8** %ptr
1637  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
1638}
1639
1640declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8*)
1641
1642
1643define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x3(i8* %A, i8** %ptr) {
1644;CHECK-LABEL: test_v8i8_post_imm_ld1x3:
1645;CHECK: ld1.8b { v0, v1, v2 }, [x0], #24
1646  %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A)
1647  %tmp = getelementptr i8, i8* %A, i32 24
1648  store i8* %tmp, i8** %ptr
1649  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
1650}
1651
1652define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) {
1653;CHECK-LABEL: test_v8i8_post_reg_ld1x3:
1654;CHECK: ld1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
1655  %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A)
1656  %tmp = getelementptr i8, i8* %A, i64 %inc
1657  store i8* %tmp, i8** %ptr
1658  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
1659}
1660
1661declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8*)
1662
1663
1664define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x3(i16* %A, i16** %ptr) {
1665;CHECK-LABEL: test_v8i16_post_imm_ld1x3:
1666;CHECK: ld1.8h { v0, v1, v2 }, [x0], #48
1667  %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A)
1668  %tmp = getelementptr i16, i16* %A, i32 24
1669  store i16* %tmp, i16** %ptr
1670  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
1671}
1672
1673define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) {
1674;CHECK-LABEL: test_v8i16_post_reg_ld1x3:
1675;CHECK: ld1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
1676  %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A)
1677  %tmp = getelementptr i16, i16* %A, i64 %inc
1678  store i16* %tmp, i16** %ptr
1679  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
1680}
1681
1682declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16*)
1683
1684
1685define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x3(i16* %A, i16** %ptr) {
1686;CHECK-LABEL: test_v4i16_post_imm_ld1x3:
1687;CHECK: ld1.4h { v0, v1, v2 }, [x0], #24
1688  %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A)
1689  %tmp = getelementptr i16, i16* %A, i32 12
1690  store i16* %tmp, i16** %ptr
1691  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
1692}
1693
1694define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) {
1695;CHECK-LABEL: test_v4i16_post_reg_ld1x3:
1696;CHECK: ld1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
1697  %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A)
1698  %tmp = getelementptr i16, i16* %A, i64 %inc
1699  store i16* %tmp, i16** %ptr
1700  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
1701}
1702
1703declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16*)
1704
1705
1706define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x3(i32* %A, i32** %ptr) {
1707;CHECK-LABEL: test_v4i32_post_imm_ld1x3:
1708;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48
1709  %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A)
1710  %tmp = getelementptr i32, i32* %A, i32 12
1711  store i32* %tmp, i32** %ptr
1712  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
1713}
1714
1715define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) {
1716;CHECK-LABEL: test_v4i32_post_reg_ld1x3:
1717;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
1718  %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A)
1719  %tmp = getelementptr i32, i32* %A, i64 %inc
1720  store i32* %tmp, i32** %ptr
1721  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
1722}
1723
1724declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32*)
1725
1726
1727define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x3(i32* %A, i32** %ptr) {
1728;CHECK-LABEL: test_v2i32_post_imm_ld1x3:
1729;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24
1730  %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A)
1731  %tmp = getelementptr i32, i32* %A, i32 6
1732  store i32* %tmp, i32** %ptr
1733  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3
1734}
1735
1736define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) {
1737;CHECK-LABEL: test_v2i32_post_reg_ld1x3:
1738;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
1739  %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A)
1740  %tmp = getelementptr i32, i32* %A, i64 %inc
1741  store i32* %tmp, i32** %ptr
1742  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3
1743}
1744
1745declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32*)
1746
1747
1748define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x3(i64* %A, i64** %ptr) {
1749;CHECK-LABEL: test_v2i64_post_imm_ld1x3:
1750;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48
1751  %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A)
1752  %tmp = getelementptr i64, i64* %A, i32 6
1753  store i64* %tmp, i64** %ptr
1754  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3
1755}
1756
1757define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) {
1758;CHECK-LABEL: test_v2i64_post_reg_ld1x3:
1759;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1760  %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A)
1761  %tmp = getelementptr i64, i64* %A, i64 %inc
1762  store i64* %tmp, i64** %ptr
1763  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3
1764}
1765
1766declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64*)
1767
1768
1769define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x3(i64* %A, i64** %ptr) {
1770;CHECK-LABEL: test_v1i64_post_imm_ld1x3:
1771;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
1772  %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A)
1773  %tmp = getelementptr i64, i64* %A, i32 3
1774  store i64* %tmp, i64** %ptr
1775  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3
1776}
1777
1778define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) {
1779;CHECK-LABEL: test_v1i64_post_reg_ld1x3:
1780;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1781  %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A)
1782  %tmp = getelementptr i64, i64* %A, i64 %inc
1783  store i64* %tmp, i64** %ptr
1784  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3
1785}
1786
1787declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64*)
1788
1789
1790define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x3(float* %A, float** %ptr) {
1791;CHECK-LABEL: test_v4f32_post_imm_ld1x3:
1792;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48
1793  %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A)
1794  %tmp = getelementptr float, float* %A, i32 12
1795  store float* %tmp, float** %ptr
1796  ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3
1797}
1798
1799define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) {
1800;CHECK-LABEL: test_v4f32_post_reg_ld1x3:
1801;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
1802  %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A)
1803  %tmp = getelementptr float, float* %A, i64 %inc
1804  store float* %tmp, float** %ptr
1805  ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3
1806}
1807
1808declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float*)
1809
1810
1811define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x3(float* %A, float** %ptr) {
1812;CHECK-LABEL: test_v2f32_post_imm_ld1x3:
1813;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24
1814  %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A)
1815  %tmp = getelementptr float, float* %A, i32 6
1816  store float* %tmp, float** %ptr
1817  ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3
1818}
1819
1820define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) {
1821;CHECK-LABEL: test_v2f32_post_reg_ld1x3:
1822;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
1823  %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A)
1824  %tmp = getelementptr float, float* %A, i64 %inc
1825  store float* %tmp, float** %ptr
1826  ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3
1827}
1828
1829declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float*)
1830
1831
1832define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x3(double* %A, double** %ptr) {
1833;CHECK-LABEL: test_v2f64_post_imm_ld1x3:
1834;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48
1835  %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A)
1836  %tmp = getelementptr double, double* %A, i32 6
1837  store double* %tmp, double** %ptr
1838  ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3
1839}
1840
1841define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) {
1842;CHECK-LABEL: test_v2f64_post_reg_ld1x3:
1843;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1844  %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A)
1845  %tmp = getelementptr double, double* %A, i64 %inc
1846  store double* %tmp, double** %ptr
1847  ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3
1848}
1849
1850declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double*)
1851
1852
1853define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x3(double* %A, double** %ptr) {
1854;CHECK-LABEL: test_v1f64_post_imm_ld1x3:
1855;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
1856  %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A)
1857  %tmp = getelementptr double, double* %A, i32 3
1858  store double* %tmp, double** %ptr
1859  ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3
1860}
1861
1862define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) {
1863;CHECK-LABEL: test_v1f64_post_reg_ld1x3:
1864;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1865  %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A)
1866  %tmp = getelementptr double, double* %A, i64 %inc
1867  store double* %tmp, double** %ptr
1868  ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3
1869}
1870
1871declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double*)
1872
1873
1874define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x4(i8* %A, i8** %ptr) {
1875;CHECK-LABEL: test_v16i8_post_imm_ld1x4:
1876;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], #64
1877  %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A)
1878  %tmp = getelementptr i8, i8* %A, i32 64
1879  store i8* %tmp, i8** %ptr
1880  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4
1881}
1882
1883define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) {
1884;CHECK-LABEL: test_v16i8_post_reg_ld1x4:
1885;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1886  %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A)
1887  %tmp = getelementptr i8, i8* %A, i64 %inc
1888  store i8* %tmp, i8** %ptr
1889  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4
1890}
1891
1892declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8*)
1893
1894
1895define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x4(i8* %A, i8** %ptr) {
1896;CHECK-LABEL: test_v8i8_post_imm_ld1x4:
1897;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], #32
1898  %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A)
1899  %tmp = getelementptr i8, i8* %A, i32 32
1900  store i8* %tmp, i8** %ptr
1901  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4
1902}
1903
1904define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) {
1905;CHECK-LABEL: test_v8i8_post_reg_ld1x4:
1906;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1907  %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A)
1908  %tmp = getelementptr i8, i8* %A, i64 %inc
1909  store i8* %tmp, i8** %ptr
1910  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4
1911}
1912
1913declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8*)
1914
1915
1916define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x4(i16* %A, i16** %ptr) {
1917;CHECK-LABEL: test_v8i16_post_imm_ld1x4:
1918;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], #64
1919  %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A)
1920  %tmp = getelementptr i16, i16* %A, i32 32
1921  store i16* %tmp, i16** %ptr
1922  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4
1923}
1924
1925define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) {
1926;CHECK-LABEL: test_v8i16_post_reg_ld1x4:
1927;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1928  %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A)
1929  %tmp = getelementptr i16, i16* %A, i64 %inc
1930  store i16* %tmp, i16** %ptr
1931  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4
1932}
1933
1934declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16*)
1935
1936
1937define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x4(i16* %A, i16** %ptr) {
1938;CHECK-LABEL: test_v4i16_post_imm_ld1x4:
1939;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], #32
1940  %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A)
1941  %tmp = getelementptr i16, i16* %A, i32 16
1942  store i16* %tmp, i16** %ptr
1943  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4
1944}
1945
1946define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) {
1947;CHECK-LABEL: test_v4i16_post_reg_ld1x4:
1948;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1949  %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A)
1950  %tmp = getelementptr i16, i16* %A, i64 %inc
1951  store i16* %tmp, i16** %ptr
1952  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4
1953}
1954
1955declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16*)
1956
1957
1958define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x4(i32* %A, i32** %ptr) {
1959;CHECK-LABEL: test_v4i32_post_imm_ld1x4:
1960;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64
1961  %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A)
1962  %tmp = getelementptr i32, i32* %A, i32 16
1963  store i32* %tmp, i32** %ptr
1964  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4
1965}
1966
1967define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) {
1968;CHECK-LABEL: test_v4i32_post_reg_ld1x4:
1969;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1970  %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A)
1971  %tmp = getelementptr i32, i32* %A, i64 %inc
1972  store i32* %tmp, i32** %ptr
1973  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4
1974}
1975
1976declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32*)
1977
1978
1979define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x4(i32* %A, i32** %ptr) {
1980;CHECK-LABEL: test_v2i32_post_imm_ld1x4:
1981;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32
1982  %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A)
1983  %tmp = getelementptr i32, i32* %A, i32 8
1984  store i32* %tmp, i32** %ptr
1985  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4
1986}
1987
1988define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) {
1989;CHECK-LABEL: test_v2i32_post_reg_ld1x4:
1990;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1991  %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A)
1992  %tmp = getelementptr i32, i32* %A, i64 %inc
1993  store i32* %tmp, i32** %ptr
1994  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4
1995}
1996
1997declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32*)
1998
1999
2000define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x4(i64* %A, i64** %ptr) {
2001;CHECK-LABEL: test_v2i64_post_imm_ld1x4:
2002;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64
2003  %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A)
2004  %tmp = getelementptr i64, i64* %A, i32 8
2005  store i64* %tmp, i64** %ptr
2006  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4
2007}
2008
2009define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) {
2010;CHECK-LABEL: test_v2i64_post_reg_ld1x4:
2011;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2012  %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A)
2013  %tmp = getelementptr i64, i64* %A, i64 %inc
2014  store i64* %tmp, i64** %ptr
2015  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4
2016}
2017
2018declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64*)
2019
2020
2021define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x4(i64* %A, i64** %ptr) {
2022;CHECK-LABEL: test_v1i64_post_imm_ld1x4:
2023;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
2024  %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A)
2025  %tmp = getelementptr i64, i64* %A, i32 4
2026  store i64* %tmp, i64** %ptr
2027  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4
2028}
2029
2030define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) {
2031;CHECK-LABEL: test_v1i64_post_reg_ld1x4:
2032;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2033  %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A)
2034  %tmp = getelementptr i64, i64* %A, i64 %inc
2035  store i64* %tmp, i64** %ptr
2036  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4
2037}
2038
2039declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64*)
2040
2041
2042define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x4(float* %A, float** %ptr) {
2043;CHECK-LABEL: test_v4f32_post_imm_ld1x4:
2044;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64
2045  %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A)
2046  %tmp = getelementptr float, float* %A, i32 16
2047  store float* %tmp, float** %ptr
2048  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4
2049}
2050
2051define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) {
2052;CHECK-LABEL: test_v4f32_post_reg_ld1x4:
2053;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2054  %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A)
2055  %tmp = getelementptr float, float* %A, i64 %inc
2056  store float* %tmp, float** %ptr
2057  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4
2058}
2059
2060declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float*)
2061
2062
2063define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x4(float* %A, float** %ptr) {
2064;CHECK-LABEL: test_v2f32_post_imm_ld1x4:
2065;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32
2066  %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A)
2067  %tmp = getelementptr float, float* %A, i32 8
2068  store float* %tmp, float** %ptr
2069  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4
2070}
2071
2072define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) {
2073;CHECK-LABEL: test_v2f32_post_reg_ld1x4:
2074;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2075  %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A)
2076  %tmp = getelementptr float, float* %A, i64 %inc
2077  store float* %tmp, float** %ptr
2078  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4
2079}
2080
2081declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float*)
2082
2083
2084define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x4(double* %A, double** %ptr) {
2085;CHECK-LABEL: test_v2f64_post_imm_ld1x4:
2086;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64
2087  %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A)
2088  %tmp = getelementptr double, double* %A, i32 8
2089  store double* %tmp, double** %ptr
2090  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4
2091}
2092
2093define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) {
2094;CHECK-LABEL: test_v2f64_post_reg_ld1x4:
2095;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2096  %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A)
2097  %tmp = getelementptr double, double* %A, i64 %inc
2098  store double* %tmp, double** %ptr
2099  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4
2100}
2101
2102declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double*)
2103
2104
2105define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x4(double* %A, double** %ptr) {
2106;CHECK-LABEL: test_v1f64_post_imm_ld1x4:
2107;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
2108  %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A)
2109  %tmp = getelementptr double, double* %A, i32 4
2110  store double* %tmp, double** %ptr
2111  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4
2112}
2113
2114define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) {
2115;CHECK-LABEL: test_v1f64_post_reg_ld1x4:
2116;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2117  %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A)
2118  %tmp = getelementptr double, double* %A, i64 %inc
2119  store double* %tmp, double** %ptr
2120  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4
2121}
2122
2123declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double*)
2124
2125
2126define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind {
2127;CHECK-LABEL: test_v16i8_post_imm_ld2r:
2128;CHECK: ld2r.16b { v0, v1 }, [x0], #2
2129  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
2130  %tmp = getelementptr i8, i8* %A, i32 2
2131  store i8* %tmp, i8** %ptr
2132  ret { <16 x i8>, <16 x i8> } %ld2
2133}
2134
2135define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind {
2136;CHECK-LABEL: test_v16i8_post_reg_ld2r:
2137;CHECK: ld2r.16b { v0, v1 }, [x0], x{{[0-9]+}}
2138  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
2139  %tmp = getelementptr i8, i8* %A, i64 %inc
2140  store i8* %tmp, i8** %ptr
2141  ret { <16 x i8>, <16 x i8> } %ld2
2142}
2143
2144declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly
2145
2146
2147define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind {
2148;CHECK-LABEL: test_v8i8_post_imm_ld2r:
2149;CHECK: ld2r.8b { v0, v1 }, [x0], #2
2150  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
2151  %tmp = getelementptr i8, i8* %A, i32 2
2152  store i8* %tmp, i8** %ptr
2153  ret { <8 x i8>, <8 x i8> } %ld2
2154}
2155
2156define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind {
2157;CHECK-LABEL: test_v8i8_post_reg_ld2r:
2158;CHECK: ld2r.8b { v0, v1 }, [x0], x{{[0-9]+}}
2159  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
2160  %tmp = getelementptr i8, i8* %A, i64 %inc
2161  store i8* %tmp, i8** %ptr
2162  ret { <8 x i8>, <8 x i8> } %ld2
2163}
2164
2165declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly
2166
2167
2168define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind {
2169;CHECK-LABEL: test_v8i16_post_imm_ld2r:
2170;CHECK: ld2r.8h { v0, v1 }, [x0], #4
2171  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
2172  %tmp = getelementptr i16, i16* %A, i32 2
2173  store i16* %tmp, i16** %ptr
2174  ret { <8 x i16>, <8 x i16> } %ld2
2175}
2176
2177define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind {
2178;CHECK-LABEL: test_v8i16_post_reg_ld2r:
2179;CHECK: ld2r.8h { v0, v1 }, [x0], x{{[0-9]+}}
2180  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
2181  %tmp = getelementptr i16, i16* %A, i64 %inc
2182  store i16* %tmp, i16** %ptr
2183  ret { <8 x i16>, <8 x i16> } %ld2
2184}
2185
2186declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly
2187
2188
2189define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind {
2190;CHECK-LABEL: test_v4i16_post_imm_ld2r:
2191;CHECK: ld2r.4h { v0, v1 }, [x0], #4
2192  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
2193  %tmp = getelementptr i16, i16* %A, i32 2
2194  store i16* %tmp, i16** %ptr
2195  ret { <4 x i16>, <4 x i16> } %ld2
2196}
2197
2198define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind {
2199;CHECK-LABEL: test_v4i16_post_reg_ld2r:
2200;CHECK: ld2r.4h { v0, v1 }, [x0], x{{[0-9]+}}
2201  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
2202  %tmp = getelementptr i16, i16* %A, i64 %inc
2203  store i16* %tmp, i16** %ptr
2204  ret { <4 x i16>, <4 x i16> } %ld2
2205}
2206
2207declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly
2208
2209
2210define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind {
2211;CHECK-LABEL: test_v4i32_post_imm_ld2r:
2212;CHECK: ld2r.4s { v0, v1 }, [x0], #8
2213  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
2214  %tmp = getelementptr i32, i32* %A, i32 2
2215  store i32* %tmp, i32** %ptr
2216  ret { <4 x i32>, <4 x i32> } %ld2
2217}
2218
2219define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind {
2220;CHECK-LABEL: test_v4i32_post_reg_ld2r:
2221;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}}
2222  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
2223  %tmp = getelementptr i32, i32* %A, i64 %inc
2224  store i32* %tmp, i32** %ptr
2225  ret { <4 x i32>, <4 x i32> } %ld2
2226}
2227
2228declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly
2229
2230define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind {
2231;CHECK-LABEL: test_v2i32_post_imm_ld2r:
2232;CHECK: ld2r.2s { v0, v1 }, [x0], #8
2233  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
2234  %tmp = getelementptr i32, i32* %A, i32 2
2235  store i32* %tmp, i32** %ptr
2236  ret { <2 x i32>, <2 x i32> } %ld2
2237}
2238
2239define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind {
2240;CHECK-LABEL: test_v2i32_post_reg_ld2r:
2241;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}}
2242  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
2243  %tmp = getelementptr i32, i32* %A, i64 %inc
2244  store i32* %tmp, i32** %ptr
2245  ret { <2 x i32>, <2 x i32> } %ld2
2246}
2247
2248declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly
2249
2250
2251define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind {
2252;CHECK-LABEL: test_v2i64_post_imm_ld2r:
2253;CHECK: ld2r.2d { v0, v1 }, [x0], #16
2254  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
2255  %tmp = getelementptr i64, i64* %A, i32 2
2256  store i64* %tmp, i64** %ptr
2257  ret { <2 x i64>, <2 x i64> } %ld2
2258}
2259
2260define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind {
2261;CHECK-LABEL: test_v2i64_post_reg_ld2r:
2262;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}}
2263  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
2264  %tmp = getelementptr i64, i64* %A, i64 %inc
2265  store i64* %tmp, i64** %ptr
2266  ret { <2 x i64>, <2 x i64> } %ld2
2267}
2268
2269declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly
2270
2271define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind {
2272;CHECK-LABEL: test_v1i64_post_imm_ld2r:
2273;CHECK: ld2r.1d { v0, v1 }, [x0], #16
2274  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
2275  %tmp = getelementptr i64, i64* %A, i32 2
2276  store i64* %tmp, i64** %ptr
2277  ret { <1 x i64>, <1 x i64> } %ld2
2278}
2279
2280define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind {
2281;CHECK-LABEL: test_v1i64_post_reg_ld2r:
2282;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}}
2283  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
2284  %tmp = getelementptr i64, i64* %A, i64 %inc
2285  store i64* %tmp, i64** %ptr
2286  ret { <1 x i64>, <1 x i64> } %ld2
2287}
2288
2289declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly
2290
2291
2292define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2r(float* %A, float** %ptr) nounwind {
2293;CHECK-LABEL: test_v4f32_post_imm_ld2r:
2294;CHECK: ld2r.4s { v0, v1 }, [x0], #8
2295  %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A)
2296  %tmp = getelementptr float, float* %A, i32 2
2297  store float* %tmp, float** %ptr
2298  ret { <4 x float>, <4 x float> } %ld2
2299}
2300
2301define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind {
2302;CHECK-LABEL: test_v4f32_post_reg_ld2r:
2303;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}}
2304  %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A)
2305  %tmp = getelementptr float, float* %A, i64 %inc
2306  store float* %tmp, float** %ptr
2307  ret { <4 x float>, <4 x float> } %ld2
2308}
2309
2310declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float*) nounwind readonly
2311
2312define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2r(float* %A, float** %ptr) nounwind {
2313;CHECK-LABEL: test_v2f32_post_imm_ld2r:
2314;CHECK: ld2r.2s { v0, v1 }, [x0], #8
2315  %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A)
2316  %tmp = getelementptr float, float* %A, i32 2
2317  store float* %tmp, float** %ptr
2318  ret { <2 x float>, <2 x float> } %ld2
2319}
2320
2321define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind {
2322;CHECK-LABEL: test_v2f32_post_reg_ld2r:
2323;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}}
2324  %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A)
2325  %tmp = getelementptr float, float* %A, i64 %inc
2326  store float* %tmp, float** %ptr
2327  ret { <2 x float>, <2 x float> } %ld2
2328}
2329
2330declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float*) nounwind readonly
2331
2332
2333define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2r(double* %A, double** %ptr) nounwind {
2334;CHECK-LABEL: test_v2f64_post_imm_ld2r:
2335;CHECK: ld2r.2d { v0, v1 }, [x0], #16
2336  %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A)
2337  %tmp = getelementptr double, double* %A, i32 2
2338  store double* %tmp, double** %ptr
2339  ret { <2 x double>, <2 x double> } %ld2
2340}
2341
2342define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind {
2343;CHECK-LABEL: test_v2f64_post_reg_ld2r:
2344;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}}
2345  %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A)
2346  %tmp = getelementptr double, double* %A, i64 %inc
2347  store double* %tmp, double** %ptr
2348  ret { <2 x double>, <2 x double> } %ld2
2349}
2350
2351declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double*) nounwind readonly
2352
2353define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2r(double* %A, double** %ptr) nounwind {
2354;CHECK-LABEL: test_v1f64_post_imm_ld2r:
2355;CHECK: ld2r.1d { v0, v1 }, [x0], #16
2356  %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A)
2357  %tmp = getelementptr double, double* %A, i32 2
2358  store double* %tmp, double** %ptr
2359  ret { <1 x double>, <1 x double> } %ld2
2360}
2361
2362define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind {
2363;CHECK-LABEL: test_v1f64_post_reg_ld2r:
2364;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}}
2365  %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A)
2366  %tmp = getelementptr double, double* %A, i64 %inc
2367  store double* %tmp, double** %ptr
2368  ret { <1 x double>, <1 x double> } %ld2
2369}
2370
2371declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double*) nounwind readonly
2372
2373
2374define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind {
2375;CHECK-LABEL: test_v16i8_post_imm_ld3r:
2376;CHECK: ld3r.16b { v0, v1, v2 }, [x0], #3
2377  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
2378  %tmp = getelementptr i8, i8* %A, i32 3
2379  store i8* %tmp, i8** %ptr
2380  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
2381}
2382
2383define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind {
2384;CHECK-LABEL: test_v16i8_post_reg_ld3r:
2385;CHECK: ld3r.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
2386  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
2387  %tmp = getelementptr i8, i8* %A, i64 %inc
2388  store i8* %tmp, i8** %ptr
2389  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
2390}
2391
2392declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly
2393
2394
2395define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind {
2396;CHECK-LABEL: test_v8i8_post_imm_ld3r:
2397;CHECK: ld3r.8b { v0, v1, v2 }, [x0], #3
2398  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
2399  %tmp = getelementptr i8, i8* %A, i32 3
2400  store i8* %tmp, i8** %ptr
2401  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
2402}
2403
2404define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind {
2405;CHECK-LABEL: test_v8i8_post_reg_ld3r:
2406;CHECK: ld3r.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
2407  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
2408  %tmp = getelementptr i8, i8* %A, i64 %inc
2409  store i8* %tmp, i8** %ptr
2410  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
2411}
2412
2413declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly
2414
2415
2416define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind {
2417;CHECK-LABEL: test_v8i16_post_imm_ld3r:
2418;CHECK: ld3r.8h { v0, v1, v2 }, [x0], #6
2419  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
2420  %tmp = getelementptr i16, i16* %A, i32 3
2421  store i16* %tmp, i16** %ptr
2422  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
2423}
2424
2425define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind {
2426;CHECK-LABEL: test_v8i16_post_reg_ld3r:
2427;CHECK: ld3r.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
2428  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
2429  %tmp = getelementptr i16, i16* %A, i64 %inc
2430  store i16* %tmp, i16** %ptr
2431  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
2432}
2433
2434declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly
2435
2436
2437define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind {
2438;CHECK-LABEL: test_v4i16_post_imm_ld3r:
2439;CHECK: ld3r.4h { v0, v1, v2 }, [x0], #6
2440  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
2441  %tmp = getelementptr i16, i16* %A, i32 3
2442  store i16* %tmp, i16** %ptr
2443  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
2444}
2445
2446define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind {
2447;CHECK-LABEL: test_v4i16_post_reg_ld3r:
2448;CHECK: ld3r.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
2449  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
2450  %tmp = getelementptr i16, i16* %A, i64 %inc
2451  store i16* %tmp, i16** %ptr
2452  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
2453}
2454
2455declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly
2456
2457
2458define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind {
2459;CHECK-LABEL: test_v4i32_post_imm_ld3r:
2460;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12
2461  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
2462  %tmp = getelementptr i32, i32* %A, i32 3
2463  store i32* %tmp, i32** %ptr
2464  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
2465}
2466
2467define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind {
2468;CHECK-LABEL: test_v4i32_post_reg_ld3r:
2469;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
2470  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
2471  %tmp = getelementptr i32, i32* %A, i64 %inc
2472  store i32* %tmp, i32** %ptr
2473  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
2474}
2475
2476declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly
2477
2478define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind {
2479;CHECK-LABEL: test_v2i32_post_imm_ld3r:
2480;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12
2481  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
2482  %tmp = getelementptr i32, i32* %A, i32 3
2483  store i32* %tmp, i32** %ptr
2484  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
2485}
2486
2487define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind {
2488;CHECK-LABEL: test_v2i32_post_reg_ld3r:
2489;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
2490  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
2491  %tmp = getelementptr i32, i32* %A, i64 %inc
2492  store i32* %tmp, i32** %ptr
2493  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
2494}
2495
2496declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly
2497
2498
2499define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind {
2500;CHECK-LABEL: test_v2i64_post_imm_ld3r:
2501;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24
2502  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
2503  %tmp = getelementptr i64, i64* %A, i32 3
2504  store i64* %tmp, i64** %ptr
2505  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
2506}
2507
2508define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind {
2509;CHECK-LABEL: test_v2i64_post_reg_ld3r:
2510;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
2511  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
2512  %tmp = getelementptr i64, i64* %A, i64 %inc
2513  store i64* %tmp, i64** %ptr
2514  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
2515}
2516
2517declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly
2518
2519define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind {
2520;CHECK-LABEL: test_v1i64_post_imm_ld3r:
2521;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24
2522  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
2523  %tmp = getelementptr i64, i64* %A, i32 3
2524  store i64* %tmp, i64** %ptr
2525  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
2526}
2527
2528define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind {
2529;CHECK-LABEL: test_v1i64_post_reg_ld3r:
2530;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
2531  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
2532  %tmp = getelementptr i64, i64* %A, i64 %inc
2533  store i64* %tmp, i64** %ptr
2534  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
2535}
2536
2537declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly
2538
2539
2540define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3r(float* %A, float** %ptr) nounwind {
2541;CHECK-LABEL: test_v4f32_post_imm_ld3r:
2542;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12
2543  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A)
2544  %tmp = getelementptr float, float* %A, i32 3
2545  store float* %tmp, float** %ptr
2546  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
2547}
2548
2549define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind {
2550;CHECK-LABEL: test_v4f32_post_reg_ld3r:
2551;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
2552  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A)
2553  %tmp = getelementptr float, float* %A, i64 %inc
2554  store float* %tmp, float** %ptr
2555  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
2556}
2557
2558declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float*) nounwind readonly
2559
2560define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3r(float* %A, float** %ptr) nounwind {
2561;CHECK-LABEL: test_v2f32_post_imm_ld3r:
2562;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12
2563  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A)
2564  %tmp = getelementptr float, float* %A, i32 3
2565  store float* %tmp, float** %ptr
2566  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
2567}
2568
2569define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind {
2570;CHECK-LABEL: test_v2f32_post_reg_ld3r:
2571;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
2572  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A)
2573  %tmp = getelementptr float, float* %A, i64 %inc
2574  store float* %tmp, float** %ptr
2575  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
2576}
2577
2578declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float*) nounwind readonly
2579
2580
2581define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3r(double* %A, double** %ptr) nounwind {
2582;CHECK-LABEL: test_v2f64_post_imm_ld3r:
2583;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24
2584  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A)
2585  %tmp = getelementptr double, double* %A, i32 3
2586  store double* %tmp, double** %ptr
2587  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
2588}
2589
2590define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind {
2591;CHECK-LABEL: test_v2f64_post_reg_ld3r:
2592;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
2593  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A)
2594  %tmp = getelementptr double, double* %A, i64 %inc
2595  store double* %tmp, double** %ptr
2596  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
2597}
2598
2599declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double*) nounwind readonly
2600
2601define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3r(double* %A, double** %ptr) nounwind {
2602;CHECK-LABEL: test_v1f64_post_imm_ld3r:
2603;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24
2604  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A)
2605  %tmp = getelementptr double, double* %A, i32 3
2606  store double* %tmp, double** %ptr
2607  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
2608}
2609
2610define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind {
2611;CHECK-LABEL: test_v1f64_post_reg_ld3r:
2612;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
2613  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A)
2614  %tmp = getelementptr double, double* %A, i64 %inc
2615  store double* %tmp, double** %ptr
2616  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
2617}
2618
2619declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double*) nounwind readonly
2620
2621
2622define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind {
2623;CHECK-LABEL: test_v16i8_post_imm_ld4r:
2624;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], #4
2625  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
2626  %tmp = getelementptr i8, i8* %A, i32 4
2627  store i8* %tmp, i8** %ptr
2628  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
2629}
2630
2631define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind {
2632;CHECK-LABEL: test_v16i8_post_reg_ld4r:
2633;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2634  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
2635  %tmp = getelementptr i8, i8* %A, i64 %inc
2636  store i8* %tmp, i8** %ptr
2637  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
2638}
2639
2640declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly
2641
2642
2643define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind {
2644;CHECK-LABEL: test_v8i8_post_imm_ld4r:
2645;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], #4
2646  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
2647  %tmp = getelementptr i8, i8* %A, i32 4
2648  store i8* %tmp, i8** %ptr
2649  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
2650}
2651
2652define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind {
2653;CHECK-LABEL: test_v8i8_post_reg_ld4r:
2654;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2655  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
2656  %tmp = getelementptr i8, i8* %A, i64 %inc
2657  store i8* %tmp, i8** %ptr
2658  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
2659}
2660
2661declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly
2662
2663
2664define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind {
2665;CHECK-LABEL: test_v8i16_post_imm_ld4r:
2666;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], #8
2667  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
2668  %tmp = getelementptr i16, i16* %A, i32 4
2669  store i16* %tmp, i16** %ptr
2670  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
2671}
2672
2673define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind {
2674;CHECK-LABEL: test_v8i16_post_reg_ld4r:
2675;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2676  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
2677  %tmp = getelementptr i16, i16* %A, i64 %inc
2678  store i16* %tmp, i16** %ptr
2679  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
2680}
2681
2682declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly
2683
2684
2685define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind {
2686;CHECK-LABEL: test_v4i16_post_imm_ld4r:
2687;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], #8
2688  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
2689  %tmp = getelementptr i16, i16* %A, i32 4
2690  store i16* %tmp, i16** %ptr
2691  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
2692}
2693
2694define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind {
2695;CHECK-LABEL: test_v4i16_post_reg_ld4r:
2696;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2697  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
2698  %tmp = getelementptr i16, i16* %A, i64 %inc
2699  store i16* %tmp, i16** %ptr
2700  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
2701}
2702
2703declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly
2704
2705
2706define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind {
2707;CHECK-LABEL: test_v4i32_post_imm_ld4r:
2708;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16
2709  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
2710  %tmp = getelementptr i32, i32* %A, i32 4
2711  store i32* %tmp, i32** %ptr
2712  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
2713}
2714
2715define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind {
2716;CHECK-LABEL: test_v4i32_post_reg_ld4r:
2717;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2718  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
2719  %tmp = getelementptr i32, i32* %A, i64 %inc
2720  store i32* %tmp, i32** %ptr
2721  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
2722}
2723
2724declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly
2725
2726define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind {
2727;CHECK-LABEL: test_v2i32_post_imm_ld4r:
2728;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16
2729  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
2730  %tmp = getelementptr i32, i32* %A, i32 4
2731  store i32* %tmp, i32** %ptr
2732  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
2733}
2734
2735define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind {
2736;CHECK-LABEL: test_v2i32_post_reg_ld4r:
2737;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2738  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
2739  %tmp = getelementptr i32, i32* %A, i64 %inc
2740  store i32* %tmp, i32** %ptr
2741  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
2742}
2743
2744declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly
2745
2746
2747define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind {
2748;CHECK-LABEL: test_v2i64_post_imm_ld4r:
2749;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32
2750  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
2751  %tmp = getelementptr i64, i64* %A, i32 4
2752  store i64* %tmp, i64** %ptr
2753  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
2754}
2755
2756define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind {
2757;CHECK-LABEL: test_v2i64_post_reg_ld4r:
2758;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2759  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
2760  %tmp = getelementptr i64, i64* %A, i64 %inc
2761  store i64* %tmp, i64** %ptr
2762  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
2763}
2764
2765declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly
2766
2767define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind {
2768;CHECK-LABEL: test_v1i64_post_imm_ld4r:
2769;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32
2770  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
2771  %tmp = getelementptr i64, i64* %A, i32 4
2772  store i64* %tmp, i64** %ptr
2773  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
2774}
2775
2776define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind {
2777;CHECK-LABEL: test_v1i64_post_reg_ld4r:
2778;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2779  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
2780  %tmp = getelementptr i64, i64* %A, i64 %inc
2781  store i64* %tmp, i64** %ptr
2782  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
2783}
2784
2785declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly
2786
2787
2788define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4r(float* %A, float** %ptr) nounwind {
2789;CHECK-LABEL: test_v4f32_post_imm_ld4r:
2790;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16
2791  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A)
2792  %tmp = getelementptr float, float* %A, i32 4
2793  store float* %tmp, float** %ptr
2794  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
2795}
2796
2797define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind {
2798;CHECK-LABEL: test_v4f32_post_reg_ld4r:
2799;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2800  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A)
2801  %tmp = getelementptr float, float* %A, i64 %inc
2802  store float* %tmp, float** %ptr
2803  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
2804}
2805
2806declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float*) nounwind readonly
2807
2808define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4r(float* %A, float** %ptr) nounwind {
2809;CHECK-LABEL: test_v2f32_post_imm_ld4r:
2810;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16
2811  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A)
2812  %tmp = getelementptr float, float* %A, i32 4
2813  store float* %tmp, float** %ptr
2814  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
2815}
2816
2817define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind {
2818;CHECK-LABEL: test_v2f32_post_reg_ld4r:
2819;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2820  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A)
2821  %tmp = getelementptr float, float* %A, i64 %inc
2822  store float* %tmp, float** %ptr
2823  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
2824}
2825
2826declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float*) nounwind readonly
2827
2828
2829define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4r(double* %A, double** %ptr) nounwind {
2830;CHECK-LABEL: test_v2f64_post_imm_ld4r:
2831;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32
2832  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A)
2833  %tmp = getelementptr double, double* %A, i32 4
2834  store double* %tmp, double** %ptr
2835  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
2836}
2837
2838define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind {
2839;CHECK-LABEL: test_v2f64_post_reg_ld4r:
2840;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2841  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A)
2842  %tmp = getelementptr double, double* %A, i64 %inc
2843  store double* %tmp, double** %ptr
2844  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
2845}
2846
2847declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double*) nounwind readonly
2848
2849define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4r(double* %A, double** %ptr) nounwind {
2850;CHECK-LABEL: test_v1f64_post_imm_ld4r:
2851;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32
2852  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A)
2853  %tmp = getelementptr double, double* %A, i32 4
2854  store double* %tmp, double** %ptr
2855  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
2856}
2857
2858define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind {
2859;CHECK-LABEL: test_v1f64_post_reg_ld4r:
2860;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2861  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A)
2862  %tmp = getelementptr double, double* %A, i64 %inc
2863  store double* %tmp, double** %ptr
2864  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
2865}
2866
2867declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double*) nounwind readonly
2868
2869
2870define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
2871;CHECK-LABEL: test_v16i8_post_imm_ld2lane:
2872;CHECK: ld2.b { v0, v1 }[0], [x0], #2
2873  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
2874  %tmp = getelementptr i8, i8* %A, i32 2
2875  store i8* %tmp, i8** %ptr
2876  ret { <16 x i8>, <16 x i8> } %ld2
2877}
2878
2879define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C) nounwind {
2880;CHECK-LABEL: test_v16i8_post_reg_ld2lane:
2881;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
2882  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
2883  %tmp = getelementptr i8, i8* %A, i64 %inc
2884  store i8* %tmp, i8** %ptr
2885  ret { <16 x i8>, <16 x i8> } %ld2
2886}
2887
2888declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
2889
2890
2891define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
2892;CHECK-LABEL: test_v8i8_post_imm_ld2lane:
2893;CHECK: ld2.b { v0, v1 }[0], [x0], #2
2894  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
2895  %tmp = getelementptr i8, i8* %A, i32 2
2896  store i8* %tmp, i8** %ptr
2897  ret { <8 x i8>, <8 x i8> } %ld2
2898}
2899
2900define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C) nounwind {
2901;CHECK-LABEL: test_v8i8_post_reg_ld2lane:
2902;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
2903  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
2904  %tmp = getelementptr i8, i8* %A, i64 %inc
2905  store i8* %tmp, i8** %ptr
2906  ret { <8 x i8>, <8 x i8> } %ld2
2907}
2908
2909declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*) nounwind readonly
2910
2911
2912define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
2913;CHECK-LABEL: test_v8i16_post_imm_ld2lane:
2914;CHECK: ld2.h { v0, v1 }[0], [x0], #4
2915  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
2916  %tmp = getelementptr i16, i16* %A, i32 2
2917  store i16* %tmp, i16** %ptr
2918  ret { <8 x i16>, <8 x i16> } %ld2
2919}
2920
2921define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind {
2922;CHECK-LABEL: test_v8i16_post_reg_ld2lane:
2923;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
2924  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
2925  %tmp = getelementptr i16, i16* %A, i64 %inc
2926  store i16* %tmp, i16** %ptr
2927  ret { <8 x i16>, <8 x i16> } %ld2
2928}
2929
2930declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
2931
2932
2933define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
2934;CHECK-LABEL: test_v4i16_post_imm_ld2lane:
2935;CHECK: ld2.h { v0, v1 }[0], [x0], #4
2936  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
2937  %tmp = getelementptr i16, i16* %A, i32 2
2938  store i16* %tmp, i16** %ptr
2939  ret { <4 x i16>, <4 x i16> } %ld2
2940}
2941
2942define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind {
2943;CHECK-LABEL: test_v4i16_post_reg_ld2lane:
2944;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
2945  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
2946  %tmp = getelementptr i16, i16* %A, i64 %inc
2947  store i16* %tmp, i16** %ptr
2948  ret { <4 x i16>, <4 x i16> } %ld2
2949}
2950
2951declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*) nounwind readonly
2952
2953
2954define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
2955;CHECK-LABEL: test_v4i32_post_imm_ld2lane:
2956;CHECK: ld2.s { v0, v1 }[0], [x0], #8
2957  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
2958  %tmp = getelementptr i32, i32* %A, i32 2
2959  store i32* %tmp, i32** %ptr
2960  ret { <4 x i32>, <4 x i32> } %ld2
2961}
2962
2963define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind {
2964;CHECK-LABEL: test_v4i32_post_reg_ld2lane:
2965;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
2966  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
2967  %tmp = getelementptr i32, i32* %A, i64 %inc
2968  store i32* %tmp, i32** %ptr
2969  ret { <4 x i32>, <4 x i32> } %ld2
2970}
2971
2972declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
2973
2974
2975define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
2976;CHECK-LABEL: test_v2i32_post_imm_ld2lane:
2977;CHECK: ld2.s { v0, v1 }[0], [x0], #8
2978  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
2979  %tmp = getelementptr i32, i32* %A, i32 2
2980  store i32* %tmp, i32** %ptr
2981  ret { <2 x i32>, <2 x i32> } %ld2
2982}
2983
2984define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind {
2985;CHECK-LABEL: test_v2i32_post_reg_ld2lane:
2986;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
2987  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
2988  %tmp = getelementptr i32, i32* %A, i64 %inc
2989  store i32* %tmp, i32** %ptr
2990  ret { <2 x i32>, <2 x i32> } %ld2
2991}
2992
2993declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*) nounwind readonly
2994
2995
2996define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
2997;CHECK-LABEL: test_v2i64_post_imm_ld2lane:
2998;CHECK: ld2.d { v0, v1 }[0], [x0], #16
2999  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
3000  %tmp = getelementptr i64, i64* %A, i32 2
3001  store i64* %tmp, i64** %ptr
3002  ret { <2 x i64>, <2 x i64> } %ld2
3003}
3004
3005define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind {
3006;CHECK-LABEL: test_v2i64_post_reg_ld2lane:
3007;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
3008  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
3009  %tmp = getelementptr i64, i64* %A, i64 %inc
3010  store i64* %tmp, i64** %ptr
3011  ret { <2 x i64>, <2 x i64> } %ld2
3012}
3013
3014declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
3015
3016
3017define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
3018;CHECK-LABEL: test_v1i64_post_imm_ld2lane:
3019;CHECK: ld2.d { v0, v1 }[0], [x0], #16
3020  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
3021  %tmp = getelementptr i64, i64* %A, i32 2
3022  store i64* %tmp, i64** %ptr
3023  ret { <1 x i64>, <1 x i64> } %ld2
3024}
3025
3026define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind {
3027;CHECK-LABEL: test_v1i64_post_reg_ld2lane:
3028;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
3029  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
3030  %tmp = getelementptr i64, i64* %A, i64 %inc
3031  store i64* %tmp, i64** %ptr
3032  ret { <1 x i64>, <1 x i64> } %ld2
3033}
3034
3035declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*) nounwind readonly
3036
3037
3038define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
3039;CHECK-LABEL: test_v4f32_post_imm_ld2lane:
3040;CHECK: ld2.s { v0, v1 }[0], [x0], #8
3041  %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
3042  %tmp = getelementptr float, float* %A, i32 2
3043  store float* %tmp, float** %ptr
3044  ret { <4 x float>, <4 x float> } %ld2
3045}
3046
3047define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind {
3048;CHECK-LABEL: test_v4f32_post_reg_ld2lane:
3049;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
3050  %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
3051  %tmp = getelementptr float, float* %A, i64 %inc
3052  store float* %tmp, float** %ptr
3053  ret { <4 x float>, <4 x float> } %ld2
3054}
3055
3056declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*) nounwind readonly
3057
3058
3059define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
3060;CHECK-LABEL: test_v2f32_post_imm_ld2lane:
3061;CHECK: ld2.s { v0, v1 }[0], [x0], #8
3062  %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
3063  %tmp = getelementptr float, float* %A, i32 2
3064  store float* %tmp, float** %ptr
3065  ret { <2 x float>, <2 x float> } %ld2
3066}
3067
3068define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind {
3069;CHECK-LABEL: test_v2f32_post_reg_ld2lane:
3070;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
3071  %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
3072  %tmp = getelementptr float, float* %A, i64 %inc
3073  store float* %tmp, float** %ptr
3074  ret { <2 x float>, <2 x float> } %ld2
3075}
3076
3077declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*) nounwind readonly
3078
3079
3080define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
3081;CHECK-LABEL: test_v2f64_post_imm_ld2lane:
3082;CHECK: ld2.d { v0, v1 }[0], [x0], #16
3083  %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
3084  %tmp = getelementptr double, double* %A, i32 2
3085  store double* %tmp, double** %ptr
3086  ret { <2 x double>, <2 x double> } %ld2
3087}
3088
3089define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind {
3090;CHECK-LABEL: test_v2f64_post_reg_ld2lane:
3091;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
3092  %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
3093  %tmp = getelementptr double, double* %A, i64 %inc
3094  store double* %tmp, double** %ptr
3095  ret { <2 x double>, <2 x double> } %ld2
3096}
3097
3098declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*) nounwind readonly
3099
3100
3101define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
3102;CHECK-LABEL: test_v1f64_post_imm_ld2lane:
3103;CHECK: ld2.d { v0, v1 }[0], [x0], #16
3104  %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
3105  %tmp = getelementptr double, double* %A, i32 2
3106  store double* %tmp, double** %ptr
3107  ret { <1 x double>, <1 x double> } %ld2
3108}
3109
3110define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind {
3111;CHECK-LABEL: test_v1f64_post_reg_ld2lane:
3112;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
3113  %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
3114  %tmp = getelementptr double, double* %A, i64 %inc
3115  store double* %tmp, double** %ptr
3116  ret { <1 x double>, <1 x double> } %ld2
3117}
3118
3119declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*) nounwind readonly
3120
3121
3122define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
3123;CHECK-LABEL: test_v16i8_post_imm_ld3lane:
3124;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3
3125  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
3126  %tmp = getelementptr i8, i8* %A, i32 3
3127  store i8* %tmp, i8** %ptr
3128  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
3129}
3130
3131define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
3132;CHECK-LABEL: test_v16i8_post_reg_ld3lane:
3133;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3134  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
3135  %tmp = getelementptr i8, i8* %A, i64 %inc
3136  store i8* %tmp, i8** %ptr
3137  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
3138}
3139
3140declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
3141
3142
3143define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
3144;CHECK-LABEL: test_v8i8_post_imm_ld3lane:
3145;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3
3146  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
3147  %tmp = getelementptr i8, i8* %A, i32 3
3148  store i8* %tmp, i8** %ptr
3149  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
3150}
3151
3152define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
3153;CHECK-LABEL: test_v8i8_post_reg_ld3lane:
3154;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3155  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
3156  %tmp = getelementptr i8, i8* %A, i64 %inc
3157  store i8* %tmp, i8** %ptr
3158  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
3159}
3160
3161declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly
3162
3163
3164define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
3165;CHECK-LABEL: test_v8i16_post_imm_ld3lane:
3166;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6
3167  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
3168  %tmp = getelementptr i16, i16* %A, i32 3
3169  store i16* %tmp, i16** %ptr
3170  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
3171}
3172
3173define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
3174;CHECK-LABEL: test_v8i16_post_reg_ld3lane:
3175;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3176  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
3177  %tmp = getelementptr i16, i16* %A, i64 %inc
3178  store i16* %tmp, i16** %ptr
3179  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
3180}
3181
3182declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
3183
3184
3185define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
3186;CHECK-LABEL: test_v4i16_post_imm_ld3lane:
3187;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6
3188  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
3189  %tmp = getelementptr i16, i16* %A, i32 3
3190  store i16* %tmp, i16** %ptr
3191  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
3192}
3193
3194define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
3195;CHECK-LABEL: test_v4i16_post_reg_ld3lane:
3196;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3197  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
3198  %tmp = getelementptr i16, i16* %A, i64 %inc
3199  store i16* %tmp, i16** %ptr
3200  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
3201}
3202
3203declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly
3204
3205
3206define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
3207;CHECK-LABEL: test_v4i32_post_imm_ld3lane:
3208;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
3209  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
3210  %tmp = getelementptr i32, i32* %A, i32 3
3211  store i32* %tmp, i32** %ptr
3212  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
3213}
3214
3215define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
3216;CHECK-LABEL: test_v4i32_post_reg_ld3lane:
3217;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3218  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
3219  %tmp = getelementptr i32, i32* %A, i64 %inc
3220  store i32* %tmp, i32** %ptr
3221  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
3222}
3223
3224declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
3225
3226
3227define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
3228;CHECK-LABEL: test_v2i32_post_imm_ld3lane:
3229;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
3230  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
3231  %tmp = getelementptr i32, i32* %A, i32 3
3232  store i32* %tmp, i32** %ptr
3233  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
3234}
3235
3236define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
3237;CHECK-LABEL: test_v2i32_post_reg_ld3lane:
3238;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3239  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
3240  %tmp = getelementptr i32, i32* %A, i64 %inc
3241  store i32* %tmp, i32** %ptr
3242  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
3243}
3244
3245declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly
3246
3247
3248define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
3249;CHECK-LABEL: test_v2i64_post_imm_ld3lane:
3250;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
3251  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
3252  %tmp = getelementptr i64, i64* %A, i32 3
3253  store i64* %tmp, i64** %ptr
3254  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
3255}
3256
3257define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
3258;CHECK-LABEL: test_v2i64_post_reg_ld3lane:
3259;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3260  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
3261  %tmp = getelementptr i64, i64* %A, i64 %inc
3262  store i64* %tmp, i64** %ptr
3263  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
3264}
3265
3266declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
3267
3268
3269define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
3270;CHECK-LABEL: test_v1i64_post_imm_ld3lane:
3271;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
3272  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
3273  %tmp = getelementptr i64, i64* %A, i32 3
3274  store i64* %tmp, i64** %ptr
3275  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
3276}
3277
3278define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
3279;CHECK-LABEL: test_v1i64_post_reg_ld3lane:
3280;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3281  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
3282  %tmp = getelementptr i64, i64* %A, i64 %inc
3283  store i64* %tmp, i64** %ptr
3284  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
3285}
3286
3287declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly
3288
3289
3290define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
3291;CHECK-LABEL: test_v4f32_post_imm_ld3lane:
3292;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
3293  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
3294  %tmp = getelementptr float, float* %A, i32 3
3295  store float* %tmp, float** %ptr
3296  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
3297}
3298
3299define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
3300;CHECK-LABEL: test_v4f32_post_reg_ld3lane:
3301;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3302  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
3303  %tmp = getelementptr float, float* %A, i64 %inc
3304  store float* %tmp, float** %ptr
3305  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
3306}
3307
3308declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly
3309
3310
3311define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
3312;CHECK-LABEL: test_v2f32_post_imm_ld3lane:
3313;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
3314  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
3315  %tmp = getelementptr float, float* %A, i32 3
3316  store float* %tmp, float** %ptr
3317  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
3318}
3319
3320define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
3321;CHECK-LABEL: test_v2f32_post_reg_ld3lane:
3322;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3323  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
3324  %tmp = getelementptr float, float* %A, i64 %inc
3325  store float* %tmp, float** %ptr
3326  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
3327}
3328
3329declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly
3330
3331
3332define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
3333;CHECK-LABEL: test_v2f64_post_imm_ld3lane:
3334;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
3335  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
3336  %tmp = getelementptr double, double* %A, i32 3
3337  store double* %tmp, double** %ptr
3338  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
3339}
3340
3341define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
3342;CHECK-LABEL: test_v2f64_post_reg_ld3lane:
3343;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3344  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
3345  %tmp = getelementptr double, double* %A, i64 %inc
3346  store double* %tmp, double** %ptr
3347  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
3348}
3349
3350declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly
3351
3352
3353define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
3354;CHECK-LABEL: test_v1f64_post_imm_ld3lane:
3355;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
3356  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
3357  %tmp = getelementptr double, double* %A, i32 3
3358  store double* %tmp, double** %ptr
3359  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
3360}
3361
3362define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
3363;CHECK-LABEL: test_v1f64_post_reg_ld3lane:
3364;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3365  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
3366  %tmp = getelementptr double, double* %A, i64 %inc
3367  store double* %tmp, double** %ptr
3368  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
3369}
3370
3371declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly
3372
3373
3374define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
3375;CHECK-LABEL: test_v16i8_post_imm_ld4lane:
3376;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4
3377  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
3378  %tmp = getelementptr i8, i8* %A, i32 4
3379  store i8* %tmp, i8** %ptr
3380  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
3381}
3382
3383define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
3384;CHECK-LABEL: test_v16i8_post_reg_ld4lane:
3385;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3386  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
3387  %tmp = getelementptr i8, i8* %A, i64 %inc
3388  store i8* %tmp, i8** %ptr
3389  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
3390}
3391
3392declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
3393
3394
3395define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
3396;CHECK-LABEL: test_v8i8_post_imm_ld4lane:
3397;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4
3398  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
3399  %tmp = getelementptr i8, i8* %A, i32 4
3400  store i8* %tmp, i8** %ptr
3401  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
3402}
3403
3404define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
3405;CHECK-LABEL: test_v8i8_post_reg_ld4lane:
3406;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3407  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
3408  %tmp = getelementptr i8, i8* %A, i64 %inc
3409  store i8* %tmp, i8** %ptr
3410  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
3411}
3412
3413declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly
3414
3415
3416define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
3417;CHECK-LABEL: test_v8i16_post_imm_ld4lane:
3418;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8
3419  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
3420  %tmp = getelementptr i16, i16* %A, i32 4
3421  store i16* %tmp, i16** %ptr
3422  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
3423}
3424
3425define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
3426;CHECK-LABEL: test_v8i16_post_reg_ld4lane:
3427;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3428  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
3429  %tmp = getelementptr i16, i16* %A, i64 %inc
3430  store i16* %tmp, i16** %ptr
3431  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
3432}
3433
3434declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
3435
3436
3437define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
3438;CHECK-LABEL: test_v4i16_post_imm_ld4lane:
3439;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8
3440  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
3441  %tmp = getelementptr i16, i16* %A, i32 4
3442  store i16* %tmp, i16** %ptr
3443  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
3444}
3445
3446define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
3447;CHECK-LABEL: test_v4i16_post_reg_ld4lane:
3448;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3449  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
3450  %tmp = getelementptr i16, i16* %A, i64 %inc
3451  store i16* %tmp, i16** %ptr
3452  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
3453}
3454
3455declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly
3456
3457
3458define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
3459;CHECK-LABEL: test_v4i32_post_imm_ld4lane:
3460;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
3461  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
3462  %tmp = getelementptr i32, i32* %A, i32 4
3463  store i32* %tmp, i32** %ptr
3464  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
3465}
3466
3467define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
3468;CHECK-LABEL: test_v4i32_post_reg_ld4lane:
3469;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3470  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
3471  %tmp = getelementptr i32, i32* %A, i64 %inc
3472  store i32* %tmp, i32** %ptr
3473  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
3474}
3475
3476declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
3477
3478
3479define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
3480;CHECK-LABEL: test_v2i32_post_imm_ld4lane:
3481;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
3482  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
3483  %tmp = getelementptr i32, i32* %A, i32 4
3484  store i32* %tmp, i32** %ptr
3485  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
3486}
3487
3488define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
3489;CHECK-LABEL: test_v2i32_post_reg_ld4lane:
3490;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3491  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
3492  %tmp = getelementptr i32, i32* %A, i64 %inc
3493  store i32* %tmp, i32** %ptr
3494  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
3495}
3496
3497declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly
3498
3499
3500define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
3501;CHECK-LABEL: test_v2i64_post_imm_ld4lane:
3502;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
3503  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
3504  %tmp = getelementptr i64, i64* %A, i32 4
3505  store i64* %tmp, i64** %ptr
3506  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
3507}
3508
3509define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
3510;CHECK-LABEL: test_v2i64_post_reg_ld4lane:
3511;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3512  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
3513  %tmp = getelementptr i64, i64* %A, i64 %inc
3514  store i64* %tmp, i64** %ptr
3515  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
3516}
3517
3518declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
3519
3520
3521define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
3522;CHECK-LABEL: test_v1i64_post_imm_ld4lane:
3523;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
3524  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
3525  %tmp = getelementptr i64, i64* %A, i32 4
3526  store i64* %tmp, i64** %ptr
3527  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
3528}
3529
3530define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
3531;CHECK-LABEL: test_v1i64_post_reg_ld4lane:
3532;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3533  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
3534  %tmp = getelementptr i64, i64* %A, i64 %inc
3535  store i64* %tmp, i64** %ptr
3536  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
3537}
3538
3539declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly
3540
3541
3542define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
3543;CHECK-LABEL: test_v4f32_post_imm_ld4lane:
3544;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
3545  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
3546  %tmp = getelementptr float, float* %A, i32 4
3547  store float* %tmp, float** %ptr
3548  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
3549}
3550
3551define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
3552;CHECK-LABEL: test_v4f32_post_reg_ld4lane:
3553;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3554  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
3555  %tmp = getelementptr float, float* %A, i64 %inc
3556  store float* %tmp, float** %ptr
3557  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
3558}
3559
3560declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly
3561
3562
3563define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
3564;CHECK-LABEL: test_v2f32_post_imm_ld4lane:
3565;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
3566  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
3567  %tmp = getelementptr float, float* %A, i32 4
3568  store float* %tmp, float** %ptr
3569  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
3570}
3571
3572define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
3573;CHECK-LABEL: test_v2f32_post_reg_ld4lane:
3574;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3575  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
3576  %tmp = getelementptr float, float* %A, i64 %inc
3577  store float* %tmp, float** %ptr
3578  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
3579}
3580
3581declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly
3582
3583
3584define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
3585;CHECK-LABEL: test_v2f64_post_imm_ld4lane:
3586;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
3587  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
3588  %tmp = getelementptr double, double* %A, i32 4
3589  store double* %tmp, double** %ptr
3590  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
3591}
3592
3593define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
3594;CHECK-LABEL: test_v2f64_post_reg_ld4lane:
3595;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3596  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
3597  %tmp = getelementptr double, double* %A, i64 %inc
3598  store double* %tmp, double** %ptr
3599  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
3600}
3601
3602declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly
3603
3604
3605define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
3606;CHECK-LABEL: test_v1f64_post_imm_ld4lane:
3607;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
3608  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
3609  %tmp = getelementptr double, double* %A, i32 4
3610  store double* %tmp, double** %ptr
3611  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
3612}
3613
3614define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
3615;CHECK-LABEL: test_v1f64_post_reg_ld4lane:
3616;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3617  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
3618  %tmp = getelementptr double, double* %A, i64 %inc
3619  store double* %tmp, double** %ptr
3620  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
3621}
3622
3623declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly
3624
3625
3626define i8* @test_v16i8_post_imm_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
3627;CHECK-LABEL: test_v16i8_post_imm_st2:
3628;CHECK: st2.16b { v0, v1 }, [x0], #32
3629  call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
3630  %tmp = getelementptr i8, i8* %A, i32 32
3631  ret i8* %tmp
3632}
3633
3634define i8* @test_v16i8_post_reg_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
3635;CHECK-LABEL: test_v16i8_post_reg_st2:
3636;CHECK: st2.16b { v0, v1 }, [x0], x{{[0-9]+}}
3637  call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
3638  %tmp = getelementptr i8, i8* %A, i64 %inc
3639  ret i8* %tmp
3640}
3641
3642declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*)
3643
3644
3645define i8* @test_v8i8_post_imm_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
3646;CHECK-LABEL: test_v8i8_post_imm_st2:
3647;CHECK: st2.8b { v0, v1 }, [x0], #16
3648  call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
3649  %tmp = getelementptr i8, i8* %A, i32 16
3650  ret i8* %tmp
3651}
3652
3653define i8* @test_v8i8_post_reg_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
3654;CHECK-LABEL: test_v8i8_post_reg_st2:
3655;CHECK: st2.8b { v0, v1 }, [x0], x{{[0-9]+}}
3656  call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
3657  %tmp = getelementptr i8, i8* %A, i64 %inc
3658  ret i8* %tmp
3659}
3660
3661declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
3662
3663
3664define i16* @test_v8i16_post_imm_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
3665;CHECK-LABEL: test_v8i16_post_imm_st2:
3666;CHECK: st2.8h { v0, v1 }, [x0], #32
3667  call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
3668  %tmp = getelementptr i16, i16* %A, i32 16
3669  ret i16* %tmp
3670}
3671
3672define i16* @test_v8i16_post_reg_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
3673;CHECK-LABEL: test_v8i16_post_reg_st2:
3674;CHECK: st2.8h { v0, v1 }, [x0], x{{[0-9]+}}
3675  call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
3676  %tmp = getelementptr i16, i16* %A, i64 %inc
3677  ret i16* %tmp
3678}
3679
3680declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*)
3681
3682
3683define i16* @test_v4i16_post_imm_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
3684;CHECK-LABEL: test_v4i16_post_imm_st2:
3685;CHECK: st2.4h { v0, v1 }, [x0], #16
3686  call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
3687  %tmp = getelementptr i16, i16* %A, i32 8
3688  ret i16* %tmp
3689}
3690
3691define i16* @test_v4i16_post_reg_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
3692;CHECK-LABEL: test_v4i16_post_reg_st2:
3693;CHECK: st2.4h { v0, v1 }, [x0], x{{[0-9]+}}
3694  call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
3695  %tmp = getelementptr i16, i16* %A, i64 %inc
3696  ret i16* %tmp
3697}
3698
3699declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*)
3700
3701
3702define i32* @test_v4i32_post_imm_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
3703;CHECK-LABEL: test_v4i32_post_imm_st2:
3704;CHECK: st2.4s { v0, v1 }, [x0], #32
3705  call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
3706  %tmp = getelementptr i32, i32* %A, i32 8
3707  ret i32* %tmp
3708}
3709
3710define i32* @test_v4i32_post_reg_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
3711;CHECK-LABEL: test_v4i32_post_reg_st2:
3712;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}}
3713  call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
3714  %tmp = getelementptr i32, i32* %A, i64 %inc
3715  ret i32* %tmp
3716}
3717
3718declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*)
3719
3720
3721define i32* @test_v2i32_post_imm_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
3722;CHECK-LABEL: test_v2i32_post_imm_st2:
3723;CHECK: st2.2s { v0, v1 }, [x0], #16
3724  call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
3725  %tmp = getelementptr i32, i32* %A, i32 4
3726  ret i32* %tmp
3727}
3728
3729define i32* @test_v2i32_post_reg_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
3730;CHECK-LABEL: test_v2i32_post_reg_st2:
3731;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}}
3732  call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
3733  %tmp = getelementptr i32, i32* %A, i64 %inc
3734  ret i32* %tmp
3735}
3736
3737declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*)
3738
3739
3740define i64* @test_v2i64_post_imm_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
3741;CHECK-LABEL: test_v2i64_post_imm_st2:
3742;CHECK: st2.2d { v0, v1 }, [x0], #32
3743  call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
3744  %tmp = getelementptr i64, i64* %A, i64 4
3745  ret i64* %tmp
3746}
3747
3748define i64* @test_v2i64_post_reg_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
3749;CHECK-LABEL: test_v2i64_post_reg_st2:
3750;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}}
3751  call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
3752  %tmp = getelementptr i64, i64* %A, i64 %inc
3753  ret i64* %tmp
3754}
3755
3756declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*)
3757
3758
3759define i64* @test_v1i64_post_imm_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
3760;CHECK-LABEL: test_v1i64_post_imm_st2:
3761;CHECK: st1.1d { v0, v1 }, [x0], #16
3762  call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
3763  %tmp = getelementptr i64, i64* %A, i64 2
3764  ret i64* %tmp
3765}
3766
3767define i64* @test_v1i64_post_reg_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
3768;CHECK-LABEL: test_v1i64_post_reg_st2:
3769;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
3770  call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
3771  %tmp = getelementptr i64, i64* %A, i64 %inc
3772  ret i64* %tmp
3773}
3774
3775declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*)
3776
3777
3778define float* @test_v4f32_post_imm_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
3779;CHECK-LABEL: test_v4f32_post_imm_st2:
3780;CHECK: st2.4s { v0, v1 }, [x0], #32
3781  call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
3782  %tmp = getelementptr float, float* %A, i32 8
3783  ret float* %tmp
3784}
3785
3786define float* @test_v4f32_post_reg_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
3787;CHECK-LABEL: test_v4f32_post_reg_st2:
3788;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}}
3789  call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
3790  %tmp = getelementptr float, float* %A, i64 %inc
3791  ret float* %tmp
3792}
3793
3794declare void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float>, <4 x float>, float*)
3795
3796
3797define float* @test_v2f32_post_imm_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
3798;CHECK-LABEL: test_v2f32_post_imm_st2:
3799;CHECK: st2.2s { v0, v1 }, [x0], #16
3800  call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
3801  %tmp = getelementptr float, float* %A, i32 4
3802  ret float* %tmp
3803}
3804
3805define float* @test_v2f32_post_reg_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
3806;CHECK-LABEL: test_v2f32_post_reg_st2:
3807;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}}
3808  call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
3809  %tmp = getelementptr float, float* %A, i64 %inc
3810  ret float* %tmp
3811}
3812
3813declare void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float>, <2 x float>, float*)
3814
3815
3816define double* @test_v2f64_post_imm_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
3817;CHECK-LABEL: test_v2f64_post_imm_st2:
3818;CHECK: st2.2d { v0, v1 }, [x0], #32
3819  call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
3820  %tmp = getelementptr double, double* %A, i64 4
3821  ret double* %tmp
3822}
3823
3824define double* @test_v2f64_post_reg_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
3825;CHECK-LABEL: test_v2f64_post_reg_st2:
3826;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}}
3827  call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
3828  %tmp = getelementptr double, double* %A, i64 %inc
3829  ret double* %tmp
3830}
3831
3832declare void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double>, <2 x double>, double*)
3833
3834
3835define double* @test_v1f64_post_imm_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
3836;CHECK-LABEL: test_v1f64_post_imm_st2:
3837;CHECK: st1.1d { v0, v1 }, [x0], #16
3838  call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
3839  %tmp = getelementptr double, double* %A, i64 2
3840  ret double* %tmp
3841}
3842
3843define double* @test_v1f64_post_reg_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
3844;CHECK-LABEL: test_v1f64_post_reg_st2:
3845;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
3846  call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
3847  %tmp = getelementptr double, double* %A, i64 %inc
3848  ret double* %tmp
3849}
3850
3851declare void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double>, <1 x double>, double*)
3852
3853
3854define i8* @test_v16i8_post_imm_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
3855;CHECK-LABEL: test_v16i8_post_imm_st3:
3856;CHECK: st3.16b { v0, v1, v2 }, [x0], #48
3857  call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
3858  %tmp = getelementptr i8, i8* %A, i32 48
3859  ret i8* %tmp
3860}
3861
3862define i8* @test_v16i8_post_reg_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
3863;CHECK-LABEL: test_v16i8_post_reg_st3:
3864;CHECK: st3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
3865  call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
3866  %tmp = getelementptr i8, i8* %A, i64 %inc
3867  ret i8* %tmp
3868}
3869
3870declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*)
3871
3872
3873define i8* @test_v8i8_post_imm_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
3874;CHECK-LABEL: test_v8i8_post_imm_st3:
3875;CHECK: st3.8b { v0, v1, v2 }, [x0], #24
3876  call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
3877  %tmp = getelementptr i8, i8* %A, i32 24
3878  ret i8* %tmp
3879}
3880
3881define i8* @test_v8i8_post_reg_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
3882;CHECK-LABEL: test_v8i8_post_reg_st3:
3883;CHECK: st3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
3884  call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
3885  %tmp = getelementptr i8, i8* %A, i64 %inc
3886  ret i8* %tmp
3887}
3888
3889declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*)
3890
3891
3892define i16* @test_v8i16_post_imm_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
3893;CHECK-LABEL: test_v8i16_post_imm_st3:
3894;CHECK: st3.8h { v0, v1, v2 }, [x0], #48
3895  call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
3896  %tmp = getelementptr i16, i16* %A, i32 24
3897  ret i16* %tmp
3898}
3899
3900define i16* @test_v8i16_post_reg_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
3901;CHECK-LABEL: test_v8i16_post_reg_st3:
3902;CHECK: st3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
3903  call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
3904  %tmp = getelementptr i16, i16* %A, i64 %inc
3905  ret i16* %tmp
3906}
3907
3908declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*)
3909
3910
3911define i16* @test_v4i16_post_imm_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
3912;CHECK-LABEL: test_v4i16_post_imm_st3:
3913;CHECK: st3.4h { v0, v1, v2 }, [x0], #24
3914  call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
3915  %tmp = getelementptr i16, i16* %A, i32 12
3916  ret i16* %tmp
3917}
3918
3919define i16* @test_v4i16_post_reg_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
3920;CHECK-LABEL: test_v4i16_post_reg_st3:
3921;CHECK: st3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
3922  call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
3923  %tmp = getelementptr i16, i16* %A, i64 %inc
3924  ret i16* %tmp
3925}
3926
3927declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*)
3928
3929
3930define i32* @test_v4i32_post_imm_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
3931;CHECK-LABEL: test_v4i32_post_imm_st3:
3932;CHECK: st3.4s { v0, v1, v2 }, [x0], #48
3933  call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
3934  %tmp = getelementptr i32, i32* %A, i32 12
3935  ret i32* %tmp
3936}
3937
3938define i32* @test_v4i32_post_reg_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
3939;CHECK-LABEL: test_v4i32_post_reg_st3:
3940;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
3941  call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
3942  %tmp = getelementptr i32, i32* %A, i64 %inc
3943  ret i32* %tmp
3944}
3945
3946declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*)
3947
3948
3949define i32* @test_v2i32_post_imm_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
3950;CHECK-LABEL: test_v2i32_post_imm_st3:
3951;CHECK: st3.2s { v0, v1, v2 }, [x0], #24
3952  call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
3953  %tmp = getelementptr i32, i32* %A, i32 6
3954  ret i32* %tmp
3955}
3956
3957define i32* @test_v2i32_post_reg_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
3958;CHECK-LABEL: test_v2i32_post_reg_st3:
3959;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
3960  call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
3961  %tmp = getelementptr i32, i32* %A, i64 %inc
3962  ret i32* %tmp
3963}
3964
3965declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*)
3966
3967
3968define i64* @test_v2i64_post_imm_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
3969;CHECK-LABEL: test_v2i64_post_imm_st3:
3970;CHECK: st3.2d { v0, v1, v2 }, [x0], #48
3971  call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
3972  %tmp = getelementptr i64, i64* %A, i64 6
3973  ret i64* %tmp
3974}
3975
3976define i64* @test_v2i64_post_reg_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
3977;CHECK-LABEL: test_v2i64_post_reg_st3:
3978;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
3979  call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
3980  %tmp = getelementptr i64, i64* %A, i64 %inc
3981  ret i64* %tmp
3982}
3983
3984declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*)
3985
3986
3987define i64* @test_v1i64_post_imm_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
3988;CHECK-LABEL: test_v1i64_post_imm_st3:
3989;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
3990  call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
3991  %tmp = getelementptr i64, i64* %A, i64 3
3992  ret i64* %tmp
3993}
3994
3995define i64* @test_v1i64_post_reg_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
3996;CHECK-LABEL: test_v1i64_post_reg_st3:
3997;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
3998  call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
3999  %tmp = getelementptr i64, i64* %A, i64 %inc
4000  ret i64* %tmp
4001}
4002
4003declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*)
4004
4005
4006define float* @test_v4f32_post_imm_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
4007;CHECK-LABEL: test_v4f32_post_imm_st3:
4008;CHECK: st3.4s { v0, v1, v2 }, [x0], #48
4009  call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
4010  %tmp = getelementptr float, float* %A, i32 12
4011  ret float* %tmp
4012}
4013
4014define float* @test_v4f32_post_reg_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
4015;CHECK-LABEL: test_v4f32_post_reg_st3:
4016;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
4017  call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
4018  %tmp = getelementptr float, float* %A, i64 %inc
4019  ret float* %tmp
4020}
4021
4022declare void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*)
4023
4024
4025define float* @test_v2f32_post_imm_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
4026;CHECK-LABEL: test_v2f32_post_imm_st3:
4027;CHECK: st3.2s { v0, v1, v2 }, [x0], #24
4028  call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
4029  %tmp = getelementptr float, float* %A, i32 6
4030  ret float* %tmp
4031}
4032
4033define float* @test_v2f32_post_reg_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
4034;CHECK-LABEL: test_v2f32_post_reg_st3:
4035;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
4036  call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
4037  %tmp = getelementptr float, float* %A, i64 %inc
4038  ret float* %tmp
4039}
4040
4041declare void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*)
4042
4043
4044define double* @test_v2f64_post_imm_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
4045;CHECK-LABEL: test_v2f64_post_imm_st3:
4046;CHECK: st3.2d { v0, v1, v2 }, [x0], #48
4047  call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
4048  %tmp = getelementptr double, double* %A, i64 6
4049  ret double* %tmp
4050}
4051
4052define double* @test_v2f64_post_reg_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
4053;CHECK-LABEL: test_v2f64_post_reg_st3:
4054;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
4055  call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
4056  %tmp = getelementptr double, double* %A, i64 %inc
4057  ret double* %tmp
4058}
4059
4060declare void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*)
4061
4062
4063define double* @test_v1f64_post_imm_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
4064;CHECK-LABEL: test_v1f64_post_imm_st3:
4065;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
4066  call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
4067  %tmp = getelementptr double, double* %A, i64 3
4068  ret double* %tmp
4069}
4070
4071define double* @test_v1f64_post_reg_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
4072;CHECK-LABEL: test_v1f64_post_reg_st3:
4073;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
4074  call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
4075  %tmp = getelementptr double, double* %A, i64 %inc
4076  ret double* %tmp
4077}
4078
4079declare void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*)
4080
4081
4082define i8* @test_v16i8_post_imm_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
4083;CHECK-LABEL: test_v16i8_post_imm_st4:
4084;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], #64
4085  call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
4086  %tmp = getelementptr i8, i8* %A, i32 64
4087  ret i8* %tmp
4088}
4089
4090define i8* @test_v16i8_post_reg_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
4091;CHECK-LABEL: test_v16i8_post_reg_st4:
4092;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4093  call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
4094  %tmp = getelementptr i8, i8* %A, i64 %inc
4095  ret i8* %tmp
4096}
4097
4098declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*)
4099
4100
4101define i8* @test_v8i8_post_imm_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
4102;CHECK-LABEL: test_v8i8_post_imm_st4:
4103;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], #32
4104  call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
4105  %tmp = getelementptr i8, i8* %A, i32 32
4106  ret i8* %tmp
4107}
4108
4109define i8* @test_v8i8_post_reg_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
4110;CHECK-LABEL: test_v8i8_post_reg_st4:
4111;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4112  call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
4113  %tmp = getelementptr i8, i8* %A, i64 %inc
4114  ret i8* %tmp
4115}
4116
4117declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*)
4118
4119
4120define i16* @test_v8i16_post_imm_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
4121;CHECK-LABEL: test_v8i16_post_imm_st4:
4122;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], #64
4123  call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
4124  %tmp = getelementptr i16, i16* %A, i32 32
4125  ret i16* %tmp
4126}
4127
4128define i16* @test_v8i16_post_reg_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
4129;CHECK-LABEL: test_v8i16_post_reg_st4:
4130;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4131  call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
4132  %tmp = getelementptr i16, i16* %A, i64 %inc
4133  ret i16* %tmp
4134}
4135
4136declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*)
4137
4138
4139define i16* @test_v4i16_post_imm_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
4140;CHECK-LABEL: test_v4i16_post_imm_st4:
4141;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], #32
4142  call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
4143  %tmp = getelementptr i16, i16* %A, i32 16
4144  ret i16* %tmp
4145}
4146
4147define i16* @test_v4i16_post_reg_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
4148;CHECK-LABEL: test_v4i16_post_reg_st4:
4149;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4150  call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
4151  %tmp = getelementptr i16, i16* %A, i64 %inc
4152  ret i16* %tmp
4153}
4154
4155declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>,  i16*)
4156
4157
4158define i32* @test_v4i32_post_imm_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
4159;CHECK-LABEL: test_v4i32_post_imm_st4:
4160;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64
4161  call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
4162  %tmp = getelementptr i32, i32* %A, i32 16
4163  ret i32* %tmp
4164}
4165
4166define i32* @test_v4i32_post_reg_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
4167;CHECK-LABEL: test_v4i32_post_reg_st4:
4168;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4169  call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
4170  %tmp = getelementptr i32, i32* %A, i64 %inc
4171  ret i32* %tmp
4172}
4173
4174declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>,  i32*)
4175
4176
4177define i32* @test_v2i32_post_imm_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
4178;CHECK-LABEL: test_v2i32_post_imm_st4:
4179;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32
4180  call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
4181  %tmp = getelementptr i32, i32* %A, i32 8
4182  ret i32* %tmp
4183}
4184
4185define i32* @test_v2i32_post_reg_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
4186;CHECK-LABEL: test_v2i32_post_reg_st4:
4187;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4188  call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
4189  %tmp = getelementptr i32, i32* %A, i64 %inc
4190  ret i32* %tmp
4191}
4192
4193declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*)
4194
4195
4196define i64* @test_v2i64_post_imm_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
4197;CHECK-LABEL: test_v2i64_post_imm_st4:
4198;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64
4199  call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
4200  %tmp = getelementptr i64, i64* %A, i64 8
4201  ret i64* %tmp
4202}
4203
4204define i64* @test_v2i64_post_reg_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
4205;CHECK-LABEL: test_v2i64_post_reg_st4:
4206;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4207  call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
4208  %tmp = getelementptr i64, i64* %A, i64 %inc
4209  ret i64* %tmp
4210}
4211
4212declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>,  i64*)
4213
4214
4215define i64* @test_v1i64_post_imm_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
4216;CHECK-LABEL: test_v1i64_post_imm_st4:
4217;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
4218  call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
4219  %tmp = getelementptr i64, i64* %A, i64 4
4220  ret i64* %tmp
4221}
4222
4223define i64* @test_v1i64_post_reg_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
4224;CHECK-LABEL: test_v1i64_post_reg_st4:
4225;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4226  call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
4227  %tmp = getelementptr i64, i64* %A, i64 %inc
4228  ret i64* %tmp
4229}
4230
4231declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>,  i64*)
4232
4233
4234define float* @test_v4f32_post_imm_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
4235;CHECK-LABEL: test_v4f32_post_imm_st4:
4236;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64
4237  call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
4238  %tmp = getelementptr float, float* %A, i32 16
4239  ret float* %tmp
4240}
4241
4242define float* @test_v4f32_post_reg_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
4243;CHECK-LABEL: test_v4f32_post_reg_st4:
4244;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4245  call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
4246  %tmp = getelementptr float, float* %A, i64 %inc
4247  ret float* %tmp
4248}
4249
4250declare void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*)
4251
4252
4253define float* @test_v2f32_post_imm_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
4254;CHECK-LABEL: test_v2f32_post_imm_st4:
4255;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32
4256  call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
4257  %tmp = getelementptr float, float* %A, i32 8
4258  ret float* %tmp
4259}
4260
4261define float* @test_v2f32_post_reg_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
4262;CHECK-LABEL: test_v2f32_post_reg_st4:
4263;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4264  call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
4265  %tmp = getelementptr float, float* %A, i64 %inc
4266  ret float* %tmp
4267}
4268
4269declare void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*)
4270
4271
4272define double* @test_v2f64_post_imm_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
4273;CHECK-LABEL: test_v2f64_post_imm_st4:
4274;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64
4275  call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
4276  %tmp = getelementptr double, double* %A, i64 8
4277  ret double* %tmp
4278}
4279
4280define double* @test_v2f64_post_reg_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
4281;CHECK-LABEL: test_v2f64_post_reg_st4:
4282;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4283  call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
4284  %tmp = getelementptr double, double* %A, i64 %inc
4285  ret double* %tmp
4286}
4287
4288declare void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>,  double*)
4289
4290
4291define double* @test_v1f64_post_imm_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
4292;CHECK-LABEL: test_v1f64_post_imm_st4:
4293;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
4294  call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
4295  %tmp = getelementptr double, double* %A, i64 4
4296  ret double* %tmp
4297}
4298
4299define double* @test_v1f64_post_reg_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
4300;CHECK-LABEL: test_v1f64_post_reg_st4:
4301;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4302  call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
4303  %tmp = getelementptr double, double* %A, i64 %inc
4304  ret double* %tmp
4305}
4306
4307declare void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*)
4308
4309
4310define i8* @test_v16i8_post_imm_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
4311;CHECK-LABEL: test_v16i8_post_imm_st1x2:
4312;CHECK: st1.16b { v0, v1 }, [x0], #32
4313  call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
4314  %tmp = getelementptr i8, i8* %A, i32 32
4315  ret i8* %tmp
4316}
4317
4318define i8* @test_v16i8_post_reg_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
4319;CHECK-LABEL: test_v16i8_post_reg_st1x2:
4320;CHECK: st1.16b { v0, v1 }, [x0], x{{[0-9]+}}
4321  call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
4322  %tmp = getelementptr i8, i8* %A, i64 %inc
4323  ret i8* %tmp
4324}
4325
4326declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*)
4327
4328
4329define i8* @test_v8i8_post_imm_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
4330;CHECK-LABEL: test_v8i8_post_imm_st1x2:
4331;CHECK: st1.8b { v0, v1 }, [x0], #16
4332  call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
4333  %tmp = getelementptr i8, i8* %A, i32 16
4334  ret i8* %tmp
4335}
4336
4337define i8* @test_v8i8_post_reg_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
4338;CHECK-LABEL: test_v8i8_post_reg_st1x2:
4339;CHECK: st1.8b { v0, v1 }, [x0], x{{[0-9]+}}
4340  call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
4341  %tmp = getelementptr i8, i8* %A, i64 %inc
4342  ret i8* %tmp
4343}
4344
4345declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
4346
4347
4348define i16* @test_v8i16_post_imm_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
4349;CHECK-LABEL: test_v8i16_post_imm_st1x2:
4350;CHECK: st1.8h { v0, v1 }, [x0], #32
4351  call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
4352  %tmp = getelementptr i16, i16* %A, i32 16
4353  ret i16* %tmp
4354}
4355
4356define i16* @test_v8i16_post_reg_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
4357;CHECK-LABEL: test_v8i16_post_reg_st1x2:
4358;CHECK: st1.8h { v0, v1 }, [x0], x{{[0-9]+}}
4359  call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
4360  %tmp = getelementptr i16, i16* %A, i64 %inc
4361  ret i16* %tmp
4362}
4363
4364declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*)
4365
4366
4367define i16* @test_v4i16_post_imm_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
4368;CHECK-LABEL: test_v4i16_post_imm_st1x2:
4369;CHECK: st1.4h { v0, v1 }, [x0], #16
4370  call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
4371  %tmp = getelementptr i16, i16* %A, i32 8
4372  ret i16* %tmp
4373}
4374
4375define i16* @test_v4i16_post_reg_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
4376;CHECK-LABEL: test_v4i16_post_reg_st1x2:
4377;CHECK: st1.4h { v0, v1 }, [x0], x{{[0-9]+}}
4378  call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
4379  %tmp = getelementptr i16, i16* %A, i64 %inc
4380  ret i16* %tmp
4381}
4382
4383declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*)
4384
4385
4386define i32* @test_v4i32_post_imm_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
4387;CHECK-LABEL: test_v4i32_post_imm_st1x2:
4388;CHECK: st1.4s { v0, v1 }, [x0], #32
4389  call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
4390  %tmp = getelementptr i32, i32* %A, i32 8
4391  ret i32* %tmp
4392}
4393
4394define i32* @test_v4i32_post_reg_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
4395;CHECK-LABEL: test_v4i32_post_reg_st1x2:
4396;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}}
4397  call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
4398  %tmp = getelementptr i32, i32* %A, i64 %inc
4399  ret i32* %tmp
4400}
4401
4402declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*)
4403
4404
4405define i32* @test_v2i32_post_imm_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
4406;CHECK-LABEL: test_v2i32_post_imm_st1x2:
4407;CHECK: st1.2s { v0, v1 }, [x0], #16
4408  call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
4409  %tmp = getelementptr i32, i32* %A, i32 4
4410  ret i32* %tmp
4411}
4412
4413define i32* @test_v2i32_post_reg_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
4414;CHECK-LABEL: test_v2i32_post_reg_st1x2:
4415;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}}
4416  call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
4417  %tmp = getelementptr i32, i32* %A, i64 %inc
4418  ret i32* %tmp
4419}
4420
4421declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*)
4422
4423
4424define i64* @test_v2i64_post_imm_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
4425;CHECK-LABEL: test_v2i64_post_imm_st1x2:
4426;CHECK: st1.2d { v0, v1 }, [x0], #32
4427  call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
4428  %tmp = getelementptr i64, i64* %A, i64 4
4429  ret i64* %tmp
4430}
4431
4432define i64* @test_v2i64_post_reg_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
4433;CHECK-LABEL: test_v2i64_post_reg_st1x2:
4434;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}}
4435  call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
4436  %tmp = getelementptr i64, i64* %A, i64 %inc
4437  ret i64* %tmp
4438}
4439
4440declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*)
4441
4442
4443define i64* @test_v1i64_post_imm_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
4444;CHECK-LABEL: test_v1i64_post_imm_st1x2:
4445;CHECK: st1.1d { v0, v1 }, [x0], #16
4446  call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
4447  %tmp = getelementptr i64, i64* %A, i64 2
4448  ret i64* %tmp
4449}
4450
4451define i64* @test_v1i64_post_reg_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
4452;CHECK-LABEL: test_v1i64_post_reg_st1x2:
4453;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
4454  call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
4455  %tmp = getelementptr i64, i64* %A, i64 %inc
4456  ret i64* %tmp
4457}
4458
4459declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*)
4460
4461
4462define float* @test_v4f32_post_imm_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
4463;CHECK-LABEL: test_v4f32_post_imm_st1x2:
4464;CHECK: st1.4s { v0, v1 }, [x0], #32
4465  call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
4466  %tmp = getelementptr float, float* %A, i32 8
4467  ret float* %tmp
4468}
4469
4470define float* @test_v4f32_post_reg_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
4471;CHECK-LABEL: test_v4f32_post_reg_st1x2:
4472;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}}
4473  call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
4474  %tmp = getelementptr float, float* %A, i64 %inc
4475  ret float* %tmp
4476}
4477
4478declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*)
4479
4480
4481define float* @test_v2f32_post_imm_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
4482;CHECK-LABEL: test_v2f32_post_imm_st1x2:
4483;CHECK: st1.2s { v0, v1 }, [x0], #16
4484  call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
4485  %tmp = getelementptr float, float* %A, i32 4
4486  ret float* %tmp
4487}
4488
4489define float* @test_v2f32_post_reg_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
4490;CHECK-LABEL: test_v2f32_post_reg_st1x2:
4491;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}}
4492  call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
4493  %tmp = getelementptr float, float* %A, i64 %inc
4494  ret float* %tmp
4495}
4496
4497declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*)
4498
4499
4500define double* @test_v2f64_post_imm_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
4501;CHECK-LABEL: test_v2f64_post_imm_st1x2:
4502;CHECK: st1.2d { v0, v1 }, [x0], #32
4503  call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
4504  %tmp = getelementptr double, double* %A, i64 4
4505  ret double* %tmp
4506}
4507
4508define double* @test_v2f64_post_reg_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
4509;CHECK-LABEL: test_v2f64_post_reg_st1x2:
4510;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}}
4511  call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
4512  %tmp = getelementptr double, double* %A, i64 %inc
4513  ret double* %tmp
4514}
4515
4516declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*)
4517
4518
4519define double* @test_v1f64_post_imm_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
4520;CHECK-LABEL: test_v1f64_post_imm_st1x2:
4521;CHECK: st1.1d { v0, v1 }, [x0], #16
4522  call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
4523  %tmp = getelementptr double, double* %A, i64 2
4524  ret double* %tmp
4525}
4526
4527define double* @test_v1f64_post_reg_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
4528;CHECK-LABEL: test_v1f64_post_reg_st1x2:
4529;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
4530  call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
4531  %tmp = getelementptr double, double* %A, i64 %inc
4532  ret double* %tmp
4533}
4534
4535declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*)
4536
4537
4538define i8* @test_v16i8_post_imm_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
4539;CHECK-LABEL: test_v16i8_post_imm_st1x3:
4540;CHECK: st1.16b { v0, v1, v2 }, [x0], #48
4541  call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
4542  %tmp = getelementptr i8, i8* %A, i32 48
4543  ret i8* %tmp
4544}
4545
4546define i8* @test_v16i8_post_reg_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
4547;CHECK-LABEL: test_v16i8_post_reg_st1x3:
4548;CHECK: st1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
4549  call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
4550  %tmp = getelementptr i8, i8* %A, i64 %inc
4551  ret i8* %tmp
4552}
4553
4554declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*)
4555
4556
4557define i8* @test_v8i8_post_imm_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
4558;CHECK-LABEL: test_v8i8_post_imm_st1x3:
4559;CHECK: st1.8b { v0, v1, v2 }, [x0], #24
4560  call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
4561  %tmp = getelementptr i8, i8* %A, i32 24
4562  ret i8* %tmp
4563}
4564
4565define i8* @test_v8i8_post_reg_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
4566;CHECK-LABEL: test_v8i8_post_reg_st1x3:
4567;CHECK: st1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
4568  call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
4569  %tmp = getelementptr i8, i8* %A, i64 %inc
4570  ret i8* %tmp
4571}
4572
4573declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*)
4574
4575
4576define i16* @test_v8i16_post_imm_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
4577;CHECK-LABEL: test_v8i16_post_imm_st1x3:
4578;CHECK: st1.8h { v0, v1, v2 }, [x0], #48
4579  call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
4580  %tmp = getelementptr i16, i16* %A, i32 24
4581  ret i16* %tmp
4582}
4583
4584define i16* @test_v8i16_post_reg_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
4585;CHECK-LABEL: test_v8i16_post_reg_st1x3:
4586;CHECK: st1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
4587  call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
4588  %tmp = getelementptr i16, i16* %A, i64 %inc
4589  ret i16* %tmp
4590}
4591
4592declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*)
4593
4594
4595define i16* @test_v4i16_post_imm_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
4596;CHECK-LABEL: test_v4i16_post_imm_st1x3:
4597;CHECK: st1.4h { v0, v1, v2 }, [x0], #24
4598  call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
4599  %tmp = getelementptr i16, i16* %A, i32 12
4600  ret i16* %tmp
4601}
4602
4603define i16* @test_v4i16_post_reg_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
4604;CHECK-LABEL: test_v4i16_post_reg_st1x3:
4605;CHECK: st1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
4606  call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
4607  %tmp = getelementptr i16, i16* %A, i64 %inc
4608  ret i16* %tmp
4609}
4610
4611declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*)
4612
4613
4614define i32* @test_v4i32_post_imm_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
4615;CHECK-LABEL: test_v4i32_post_imm_st1x3:
4616;CHECK: st1.4s { v0, v1, v2 }, [x0], #48
4617  call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
4618  %tmp = getelementptr i32, i32* %A, i32 12
4619  ret i32* %tmp
4620}
4621
4622define i32* @test_v4i32_post_reg_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
4623;CHECK-LABEL: test_v4i32_post_reg_st1x3:
4624;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
4625  call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
4626  %tmp = getelementptr i32, i32* %A, i64 %inc
4627  ret i32* %tmp
4628}
4629
4630declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*)
4631
4632
4633define i32* @test_v2i32_post_imm_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
4634;CHECK-LABEL: test_v2i32_post_imm_st1x3:
4635;CHECK: st1.2s { v0, v1, v2 }, [x0], #24
4636  call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
4637  %tmp = getelementptr i32, i32* %A, i32 6
4638  ret i32* %tmp
4639}
4640
4641define i32* @test_v2i32_post_reg_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
4642;CHECK-LABEL: test_v2i32_post_reg_st1x3:
4643;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
4644  call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
4645  %tmp = getelementptr i32, i32* %A, i64 %inc
4646  ret i32* %tmp
4647}
4648
4649declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*)
4650
4651
4652define i64* @test_v2i64_post_imm_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
4653;CHECK-LABEL: test_v2i64_post_imm_st1x3:
4654;CHECK: st1.2d { v0, v1, v2 }, [x0], #48
4655  call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
4656  %tmp = getelementptr i64, i64* %A, i64 6
4657  ret i64* %tmp
4658}
4659
4660define i64* @test_v2i64_post_reg_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
4661;CHECK-LABEL: test_v2i64_post_reg_st1x3:
4662;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
4663  call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
4664  %tmp = getelementptr i64, i64* %A, i64 %inc
4665  ret i64* %tmp
4666}
4667
4668declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*)
4669
4670
4671define i64* @test_v1i64_post_imm_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
4672;CHECK-LABEL: test_v1i64_post_imm_st1x3:
4673;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
4674  call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
4675  %tmp = getelementptr i64, i64* %A, i64 3
4676  ret i64* %tmp
4677}
4678
4679define i64* @test_v1i64_post_reg_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
4680;CHECK-LABEL: test_v1i64_post_reg_st1x3:
4681;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
4682  call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
4683  %tmp = getelementptr i64, i64* %A, i64 %inc
4684  ret i64* %tmp
4685}
4686
4687declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*)
4688
4689
4690define float* @test_v4f32_post_imm_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
4691;CHECK-LABEL: test_v4f32_post_imm_st1x3:
4692;CHECK: st1.4s { v0, v1, v2 }, [x0], #48
4693  call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
4694  %tmp = getelementptr float, float* %A, i32 12
4695  ret float* %tmp
4696}
4697
4698define float* @test_v4f32_post_reg_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
4699;CHECK-LABEL: test_v4f32_post_reg_st1x3:
4700;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
4701  call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
4702  %tmp = getelementptr float, float* %A, i64 %inc
4703  ret float* %tmp
4704}
4705
4706declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*)
4707
4708
4709define float* @test_v2f32_post_imm_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
4710;CHECK-LABEL: test_v2f32_post_imm_st1x3:
4711;CHECK: st1.2s { v0, v1, v2 }, [x0], #24
4712  call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
4713  %tmp = getelementptr float, float* %A, i32 6
4714  ret float* %tmp
4715}
4716
4717define float* @test_v2f32_post_reg_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
4718;CHECK-LABEL: test_v2f32_post_reg_st1x3:
4719;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
4720  call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
4721  %tmp = getelementptr float, float* %A, i64 %inc
4722  ret float* %tmp
4723}
4724
4725declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*)
4726
4727
4728define double* @test_v2f64_post_imm_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
4729;CHECK-LABEL: test_v2f64_post_imm_st1x3:
4730;CHECK: st1.2d { v0, v1, v2 }, [x0], #48
4731  call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
4732  %tmp = getelementptr double, double* %A, i64 6
4733  ret double* %tmp
4734}
4735
4736define double* @test_v2f64_post_reg_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
4737;CHECK-LABEL: test_v2f64_post_reg_st1x3:
4738;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
4739  call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
4740  %tmp = getelementptr double, double* %A, i64 %inc
4741  ret double* %tmp
4742}
4743
4744declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*)
4745
4746
4747define double* @test_v1f64_post_imm_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
4748;CHECK-LABEL: test_v1f64_post_imm_st1x3:
4749;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
4750  call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
4751  %tmp = getelementptr double, double* %A, i64 3
4752  ret double* %tmp
4753}
4754
4755define double* @test_v1f64_post_reg_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
4756;CHECK-LABEL: test_v1f64_post_reg_st1x3:
4757;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
4758  call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
4759  %tmp = getelementptr double, double* %A, i64 %inc
4760  ret double* %tmp
4761}
4762
4763declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*)
4764
4765
4766define i8* @test_v16i8_post_imm_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
4767;CHECK-LABEL: test_v16i8_post_imm_st1x4:
4768;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], #64
4769  call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
4770  %tmp = getelementptr i8, i8* %A, i32 64
4771  ret i8* %tmp
4772}
4773
4774define i8* @test_v16i8_post_reg_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
4775;CHECK-LABEL: test_v16i8_post_reg_st1x4:
4776;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4777  call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
4778  %tmp = getelementptr i8, i8* %A, i64 %inc
4779  ret i8* %tmp
4780}
4781
4782declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*)
4783
4784
4785define i8* @test_v8i8_post_imm_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
4786;CHECK-LABEL: test_v8i8_post_imm_st1x4:
4787;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], #32
4788  call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
4789  %tmp = getelementptr i8, i8* %A, i32 32
4790  ret i8* %tmp
4791}
4792
4793define i8* @test_v8i8_post_reg_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
4794;CHECK-LABEL: test_v8i8_post_reg_st1x4:
4795;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4796  call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
4797  %tmp = getelementptr i8, i8* %A, i64 %inc
4798  ret i8* %tmp
4799}
4800
4801declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*)
4802
4803
4804define i16* @test_v8i16_post_imm_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
4805;CHECK-LABEL: test_v8i16_post_imm_st1x4:
4806;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], #64
4807  call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
4808  %tmp = getelementptr i16, i16* %A, i32 32
4809  ret i16* %tmp
4810}
4811
4812define i16* @test_v8i16_post_reg_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
4813;CHECK-LABEL: test_v8i16_post_reg_st1x4:
4814;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4815  call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
4816  %tmp = getelementptr i16, i16* %A, i64 %inc
4817  ret i16* %tmp
4818}
4819
4820declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*)
4821
4822
4823define i16* @test_v4i16_post_imm_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
4824;CHECK-LABEL: test_v4i16_post_imm_st1x4:
4825;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], #32
4826  call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
4827  %tmp = getelementptr i16, i16* %A, i32 16
4828  ret i16* %tmp
4829}
4830
4831define i16* @test_v4i16_post_reg_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
4832;CHECK-LABEL: test_v4i16_post_reg_st1x4:
4833;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4834  call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
4835  %tmp = getelementptr i16, i16* %A, i64 %inc
4836  ret i16* %tmp
4837}
4838
4839declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>,  i16*)
4840
4841
4842define i32* @test_v4i32_post_imm_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
4843;CHECK-LABEL: test_v4i32_post_imm_st1x4:
4844;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64
4845  call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
4846  %tmp = getelementptr i32, i32* %A, i32 16
4847  ret i32* %tmp
4848}
4849
4850define i32* @test_v4i32_post_reg_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
4851;CHECK-LABEL: test_v4i32_post_reg_st1x4:
4852;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4853  call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
4854  %tmp = getelementptr i32, i32* %A, i64 %inc
4855  ret i32* %tmp
4856}
4857
4858declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>,  i32*)
4859
4860
4861define i32* @test_v2i32_post_imm_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
4862;CHECK-LABEL: test_v2i32_post_imm_st1x4:
4863;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32
4864  call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
4865  %tmp = getelementptr i32, i32* %A, i32 8
4866  ret i32* %tmp
4867}
4868
4869define i32* @test_v2i32_post_reg_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
4870;CHECK-LABEL: test_v2i32_post_reg_st1x4:
4871;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4872  call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
4873  %tmp = getelementptr i32, i32* %A, i64 %inc
4874  ret i32* %tmp
4875}
4876
4877declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*)
4878
4879
4880define i64* @test_v2i64_post_imm_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
4881;CHECK-LABEL: test_v2i64_post_imm_st1x4:
4882;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64
4883  call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
4884  %tmp = getelementptr i64, i64* %A, i64 8
4885  ret i64* %tmp
4886}
4887
4888define i64* @test_v2i64_post_reg_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
4889;CHECK-LABEL: test_v2i64_post_reg_st1x4:
4890;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4891  call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
4892  %tmp = getelementptr i64, i64* %A, i64 %inc
4893  ret i64* %tmp
4894}
4895
4896declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>,  i64*)
4897
4898
4899define i64* @test_v1i64_post_imm_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
4900;CHECK-LABEL: test_v1i64_post_imm_st1x4:
4901;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
4902  call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
4903  %tmp = getelementptr i64, i64* %A, i64 4
4904  ret i64* %tmp
4905}
4906
4907define i64* @test_v1i64_post_reg_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
4908;CHECK-LABEL: test_v1i64_post_reg_st1x4:
4909;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4910  call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
4911  %tmp = getelementptr i64, i64* %A, i64 %inc
4912  ret i64* %tmp
4913}
4914
4915declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>,  i64*)
4916
4917
4918define float* @test_v4f32_post_imm_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
4919;CHECK-LABEL: test_v4f32_post_imm_st1x4:
4920;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64
4921  call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
4922  %tmp = getelementptr float, float* %A, i32 16
4923  ret float* %tmp
4924}
4925
4926define float* @test_v4f32_post_reg_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
4927;CHECK-LABEL: test_v4f32_post_reg_st1x4:
4928;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4929  call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
4930  %tmp = getelementptr float, float* %A, i64 %inc
4931  ret float* %tmp
4932}
4933
4934declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*)
4935
4936
4937define float* @test_v2f32_post_imm_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
4938;CHECK-LABEL: test_v2f32_post_imm_st1x4:
4939;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32
4940  call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
4941  %tmp = getelementptr float, float* %A, i32 8
4942  ret float* %tmp
4943}
4944
4945define float* @test_v2f32_post_reg_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
4946;CHECK-LABEL: test_v2f32_post_reg_st1x4:
4947;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4948  call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
4949  %tmp = getelementptr float, float* %A, i64 %inc
4950  ret float* %tmp
4951}
4952
4953declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*)
4954
4955
4956define double* @test_v2f64_post_imm_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
4957;CHECK-LABEL: test_v2f64_post_imm_st1x4:
4958;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64
4959  call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
4960  %tmp = getelementptr double, double* %A, i64 8
4961  ret double* %tmp
4962}
4963
4964define double* @test_v2f64_post_reg_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
4965;CHECK-LABEL: test_v2f64_post_reg_st1x4:
4966;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4967  call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
4968  %tmp = getelementptr double, double* %A, i64 %inc
4969  ret double* %tmp
4970}
4971
4972declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>,  double*)
4973
4974
4975define double* @test_v1f64_post_imm_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
4976;CHECK-LABEL: test_v1f64_post_imm_st1x4:
4977;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
4978  call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
4979  %tmp = getelementptr double, double* %A, i64 4
4980  ret double* %tmp
4981}
4982
4983define double* @test_v1f64_post_reg_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
4984;CHECK-LABEL: test_v1f64_post_reg_st1x4:
4985;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4986  call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
4987  %tmp = getelementptr double, double* %A, i64 %inc
4988  ret double* %tmp
4989}
4990
4991declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*)
4992
4993
4994define i8* @test_v16i8_post_imm_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) {
4995  call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A)
4996  %tmp = getelementptr i8, i8* %A, i32 2
4997  ret i8* %tmp
4998}
4999
5000define i8* @test_v16i8_post_reg_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) {
5001  call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A)
5002  %tmp = getelementptr i8, i8* %A, i64 %inc
5003  ret i8* %tmp
5004}
5005
5006declare void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i64, i8*) nounwind readnone
5007
5008
5009define i8* @test_v16i8_post_imm_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
5010;CHECK-LABEL: test_v16i8_post_imm_st2lane:
5011;CHECK: st2.b { v0, v1 }[0], [x0], #2
5012  call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
5013  %tmp = getelementptr i8, i8* %A, i32 2
5014  ret i8* %tmp
5015}
5016
5017define i8* @test_v16i8_post_reg_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
5018;CHECK-LABEL: test_v16i8_post_reg_st2lane:
5019;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
5020  call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
5021  %tmp = getelementptr i8, i8* %A, i64 %inc
5022  ret i8* %tmp
5023}
5024
5025declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*)
5026
5027
5028define i8* @test_v8i8_post_imm_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
5029;CHECK-LABEL: test_v8i8_post_imm_st2lane:
5030;CHECK: st2.b { v0, v1 }[0], [x0], #2
5031  call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
5032  %tmp = getelementptr i8, i8* %A, i32 2
5033  ret i8* %tmp
5034}
5035
5036define i8* @test_v8i8_post_reg_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
5037;CHECK-LABEL: test_v8i8_post_reg_st2lane:
5038;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
5039  call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
5040  %tmp = getelementptr i8, i8* %A, i64 %inc
5041  ret i8* %tmp
5042}
5043
5044declare void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*)
5045
5046
5047define i16* @test_v8i16_post_imm_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
5048;CHECK-LABEL: test_v8i16_post_imm_st2lane:
5049;CHECK: st2.h { v0, v1 }[0], [x0], #4
5050  call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
5051  %tmp = getelementptr i16, i16* %A, i32 2
5052  ret i16* %tmp
5053}
5054
5055define i16* @test_v8i16_post_reg_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
5056;CHECK-LABEL: test_v8i16_post_reg_st2lane:
5057;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
5058  call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
5059  %tmp = getelementptr i16, i16* %A, i64 %inc
5060  ret i16* %tmp
5061}
5062
5063declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*)
5064
5065
5066define i16* @test_v4i16_post_imm_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
5067;CHECK-LABEL: test_v4i16_post_imm_st2lane:
5068;CHECK: st2.h { v0, v1 }[0], [x0], #4
5069  call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
5070  %tmp = getelementptr i16, i16* %A, i32 2
5071  ret i16* %tmp
5072}
5073
5074define i16* @test_v4i16_post_reg_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
5075;CHECK-LABEL: test_v4i16_post_reg_st2lane:
5076;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
5077  call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
5078  %tmp = getelementptr i16, i16* %A, i64 %inc
5079  ret i16* %tmp
5080}
5081
5082declare void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*)
5083
5084
5085define i32* @test_v4i32_post_imm_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
5086;CHECK-LABEL: test_v4i32_post_imm_st2lane:
5087;CHECK: st2.s { v0, v1 }[0], [x0], #8
5088  call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
5089  %tmp = getelementptr i32, i32* %A, i32 2
5090  ret i32* %tmp
5091}
5092
5093define i32* @test_v4i32_post_reg_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
5094;CHECK-LABEL: test_v4i32_post_reg_st2lane:
5095;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
5096  call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
5097  %tmp = getelementptr i32, i32* %A, i64 %inc
5098  ret i32* %tmp
5099}
5100
5101declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*)
5102
5103
5104define i32* @test_v2i32_post_imm_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
5105;CHECK-LABEL: test_v2i32_post_imm_st2lane:
5106;CHECK: st2.s { v0, v1 }[0], [x0], #8
5107  call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
5108  %tmp = getelementptr i32, i32* %A, i32 2
5109  ret i32* %tmp
5110}
5111
5112define i32* @test_v2i32_post_reg_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
5113;CHECK-LABEL: test_v2i32_post_reg_st2lane:
5114;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
5115  call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
5116  %tmp = getelementptr i32, i32* %A, i64 %inc
5117  ret i32* %tmp
5118}
5119
5120declare void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*)
5121
5122
5123define i64* @test_v2i64_post_imm_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
5124;CHECK-LABEL: test_v2i64_post_imm_st2lane:
5125;CHECK: st2.d { v0, v1 }[0], [x0], #16
5126  call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
5127  %tmp = getelementptr i64, i64* %A, i64 2
5128  ret i64* %tmp
5129}
5130
5131define i64* @test_v2i64_post_reg_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
5132;CHECK-LABEL: test_v2i64_post_reg_st2lane:
5133;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
5134  call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
5135  %tmp = getelementptr i64, i64* %A, i64 %inc
5136  ret i64* %tmp
5137}
5138
5139declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*)
5140
5141
5142define i64* @test_v1i64_post_imm_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
5143;CHECK-LABEL: test_v1i64_post_imm_st2lane:
5144;CHECK: st2.d { v0, v1 }[0], [x0], #16
5145  call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
5146  %tmp = getelementptr i64, i64* %A, i64 2
5147  ret i64* %tmp
5148}
5149
5150define i64* @test_v1i64_post_reg_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
5151;CHECK-LABEL: test_v1i64_post_reg_st2lane:
5152;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
5153  call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
5154  %tmp = getelementptr i64, i64* %A, i64 %inc
5155  ret i64* %tmp
5156}
5157
5158declare void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*)
5159
5160
5161define float* @test_v4f32_post_imm_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
5162;CHECK-LABEL: test_v4f32_post_imm_st2lane:
5163;CHECK: st2.s { v0, v1 }[0], [x0], #8
5164  call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
5165  %tmp = getelementptr float, float* %A, i32 2
5166  ret float* %tmp
5167}
5168
5169define float* @test_v4f32_post_reg_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
5170;CHECK-LABEL: test_v4f32_post_reg_st2lane:
5171;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
5172  call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
5173  %tmp = getelementptr float, float* %A, i64 %inc
5174  ret float* %tmp
5175}
5176
5177declare void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*)
5178
5179
5180define float* @test_v2f32_post_imm_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
5181;CHECK-LABEL: test_v2f32_post_imm_st2lane:
5182;CHECK: st2.s { v0, v1 }[0], [x0], #8
5183  call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
5184  %tmp = getelementptr float, float* %A, i32 2
5185  ret float* %tmp
5186}
5187
5188define float* @test_v2f32_post_reg_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
5189;CHECK-LABEL: test_v2f32_post_reg_st2lane:
5190;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
5191  call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
5192  %tmp = getelementptr float, float* %A, i64 %inc
5193  ret float* %tmp
5194}
5195
5196declare void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*)
5197
5198
5199define double* @test_v2f64_post_imm_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
5200;CHECK-LABEL: test_v2f64_post_imm_st2lane:
5201;CHECK: st2.d { v0, v1 }[0], [x0], #16
5202  call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
5203  %tmp = getelementptr double, double* %A, i64 2
5204  ret double* %tmp
5205}
5206
5207define double* @test_v2f64_post_reg_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
5208;CHECK-LABEL: test_v2f64_post_reg_st2lane:
5209;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
5210  call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
5211  %tmp = getelementptr double, double* %A, i64 %inc
5212  ret double* %tmp
5213}
5214
5215declare void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*)
5216
5217
5218define double* @test_v1f64_post_imm_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
5219;CHECK-LABEL: test_v1f64_post_imm_st2lane:
5220;CHECK: st2.d { v0, v1 }[0], [x0], #16
5221  call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
5222  %tmp = getelementptr double, double* %A, i64 2
5223  ret double* %tmp
5224}
5225
5226define double* @test_v1f64_post_reg_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
5227;CHECK-LABEL: test_v1f64_post_reg_st2lane:
5228;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
5229  call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
5230  %tmp = getelementptr double, double* %A, i64 %inc
5231  ret double* %tmp
5232}
5233
5234declare void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*)
5235
5236
5237define i8* @test_v16i8_post_imm_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
5238;CHECK-LABEL: test_v16i8_post_imm_st3lane:
5239;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3
5240  call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
5241  %tmp = getelementptr i8, i8* %A, i32 3
5242  ret i8* %tmp
5243}
5244
5245define i8* @test_v16i8_post_reg_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
5246;CHECK-LABEL: test_v16i8_post_reg_st3lane:
5247;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5248  call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
5249  %tmp = getelementptr i8, i8* %A, i64 %inc
5250  ret i8* %tmp
5251}
5252
5253declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*)
5254
5255
5256define i8* @test_v8i8_post_imm_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
5257;CHECK-LABEL: test_v8i8_post_imm_st3lane:
5258;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3
5259  call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
5260  %tmp = getelementptr i8, i8* %A, i32 3
5261  ret i8* %tmp
5262}
5263
5264define i8* @test_v8i8_post_reg_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
5265;CHECK-LABEL: test_v8i8_post_reg_st3lane:
5266;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5267  call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
5268  %tmp = getelementptr i8, i8* %A, i64 %inc
5269  ret i8* %tmp
5270}
5271
5272declare void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*)
5273
5274
5275define i16* @test_v8i16_post_imm_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
5276;CHECK-LABEL: test_v8i16_post_imm_st3lane:
5277;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6
5278  call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
5279  %tmp = getelementptr i16, i16* %A, i32 3
5280  ret i16* %tmp
5281}
5282
5283define i16* @test_v8i16_post_reg_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
5284;CHECK-LABEL: test_v8i16_post_reg_st3lane:
5285;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5286  call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
5287  %tmp = getelementptr i16, i16* %A, i64 %inc
5288  ret i16* %tmp
5289}
5290
5291declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*)
5292
5293
5294define i16* @test_v4i16_post_imm_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
5295;CHECK-LABEL: test_v4i16_post_imm_st3lane:
5296;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6
5297  call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
5298  %tmp = getelementptr i16, i16* %A, i32 3
5299  ret i16* %tmp
5300}
5301
5302define i16* @test_v4i16_post_reg_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
5303;CHECK-LABEL: test_v4i16_post_reg_st3lane:
5304;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5305  call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
5306  %tmp = getelementptr i16, i16* %A, i64 %inc
5307  ret i16* %tmp
5308}
5309
5310declare void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*)
5311
5312
5313define i32* @test_v4i32_post_imm_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
5314;CHECK-LABEL: test_v4i32_post_imm_st3lane:
5315;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
5316  call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
5317  %tmp = getelementptr i32, i32* %A, i32 3
5318  ret i32* %tmp
5319}
5320
5321define i32* @test_v4i32_post_reg_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
5322;CHECK-LABEL: test_v4i32_post_reg_st3lane:
5323;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5324  call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
5325  %tmp = getelementptr i32, i32* %A, i64 %inc
5326  ret i32* %tmp
5327}
5328
5329declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
5330
5331
5332define i32* @test_v2i32_post_imm_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
5333;CHECK-LABEL: test_v2i32_post_imm_st3lane:
5334;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
5335  call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
5336  %tmp = getelementptr i32, i32* %A, i32 3
5337  ret i32* %tmp
5338}
5339
5340define i32* @test_v2i32_post_reg_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
5341;CHECK-LABEL: test_v2i32_post_reg_st3lane:
5342;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5343  call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
5344  %tmp = getelementptr i32, i32* %A, i64 %inc
5345  ret i32* %tmp
5346}
5347
5348declare void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*)
5349
5350
5351define i64* @test_v2i64_post_imm_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
5352;CHECK-LABEL: test_v2i64_post_imm_st3lane:
5353;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
5354  call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
5355  %tmp = getelementptr i64, i64* %A, i64 3
5356  ret i64* %tmp
5357}
5358
5359define i64* @test_v2i64_post_reg_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
5360;CHECK-LABEL: test_v2i64_post_reg_st3lane:
5361;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5362  call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
5363  %tmp = getelementptr i64, i64* %A, i64 %inc
5364  ret i64* %tmp
5365}
5366
5367declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*)
5368
5369
5370define i64* @test_v1i64_post_imm_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
5371;CHECK-LABEL: test_v1i64_post_imm_st3lane:
5372;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
5373  call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
5374  %tmp = getelementptr i64, i64* %A, i64 3
5375  ret i64* %tmp
5376}
5377
5378define i64* @test_v1i64_post_reg_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
5379;CHECK-LABEL: test_v1i64_post_reg_st3lane:
5380;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5381  call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
5382  %tmp = getelementptr i64, i64* %A, i64 %inc
5383  ret i64* %tmp
5384}
5385
5386declare void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
5387
5388
5389define float* @test_v4f32_post_imm_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
5390;CHECK-LABEL: test_v4f32_post_imm_st3lane:
5391;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
5392  call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
5393  %tmp = getelementptr float, float* %A, i32 3
5394  ret float* %tmp
5395}
5396
5397define float* @test_v4f32_post_reg_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
5398;CHECK-LABEL: test_v4f32_post_reg_st3lane:
5399;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5400  call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
5401  %tmp = getelementptr float, float* %A, i64 %inc
5402  ret float* %tmp
5403}
5404
5405declare void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*)
5406
5407
5408define float* @test_v2f32_post_imm_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
5409;CHECK-LABEL: test_v2f32_post_imm_st3lane:
5410;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
5411  call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
5412  %tmp = getelementptr float, float* %A, i32 3
5413  ret float* %tmp
5414}
5415
5416define float* @test_v2f32_post_reg_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
5417;CHECK-LABEL: test_v2f32_post_reg_st3lane:
5418;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5419  call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
5420  %tmp = getelementptr float, float* %A, i64 %inc
5421  ret float* %tmp
5422}
5423
5424declare void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*)
5425
5426
5427define double* @test_v2f64_post_imm_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
5428;CHECK-LABEL: test_v2f64_post_imm_st3lane:
5429;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
5430  call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
5431  %tmp = getelementptr double, double* %A, i64 3
5432  ret double* %tmp
5433}
5434
5435define double* @test_v2f64_post_reg_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
5436;CHECK-LABEL: test_v2f64_post_reg_st3lane:
5437;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5438  call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
5439  %tmp = getelementptr double, double* %A, i64 %inc
5440  ret double* %tmp
5441}
5442
5443declare void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*)
5444
5445
5446define double* @test_v1f64_post_imm_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
5447;CHECK-LABEL: test_v1f64_post_imm_st3lane:
5448;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
5449  call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
5450  %tmp = getelementptr double, double* %A, i64 3
5451  ret double* %tmp
5452}
5453
5454define double* @test_v1f64_post_reg_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
5455;CHECK-LABEL: test_v1f64_post_reg_st3lane:
5456;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5457  call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
5458  %tmp = getelementptr double, double* %A, i64 %inc
5459  ret double* %tmp
5460}
5461
5462declare void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*)
5463
5464
5465define i8* @test_v16i8_post_imm_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
5466;CHECK-LABEL: test_v16i8_post_imm_st4lane:
5467;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4
5468  call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
5469  %tmp = getelementptr i8, i8* %A, i32 4
5470  ret i8* %tmp
5471}
5472
5473define i8* @test_v16i8_post_reg_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
5474;CHECK-LABEL: test_v16i8_post_reg_st4lane:
5475;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5476  call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
5477  %tmp = getelementptr i8, i8* %A, i64 %inc
5478  ret i8* %tmp
5479}
5480
5481declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*)
5482
5483
5484define i8* @test_v8i8_post_imm_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
5485;CHECK-LABEL: test_v8i8_post_imm_st4lane:
5486;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4
5487  call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
5488  %tmp = getelementptr i8, i8* %A, i32 4
5489  ret i8* %tmp
5490}
5491
5492define i8* @test_v8i8_post_reg_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
5493;CHECK-LABEL: test_v8i8_post_reg_st4lane:
5494;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5495  call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
5496  %tmp = getelementptr i8, i8* %A, i64 %inc
5497  ret i8* %tmp
5498}
5499
5500declare void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*)
5501
5502
5503define i16* @test_v8i16_post_imm_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
5504;CHECK-LABEL: test_v8i16_post_imm_st4lane:
5505;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8
5506  call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
5507  %tmp = getelementptr i16, i16* %A, i32 4
5508  ret i16* %tmp
5509}
5510
5511define i16* @test_v8i16_post_reg_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
5512;CHECK-LABEL: test_v8i16_post_reg_st4lane:
5513;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5514  call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
5515  %tmp = getelementptr i16, i16* %A, i64 %inc
5516  ret i16* %tmp
5517}
5518
5519declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*)
5520
5521
5522define i16* @test_v4i16_post_imm_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
5523;CHECK-LABEL: test_v4i16_post_imm_st4lane:
5524;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8
5525  call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
5526  %tmp = getelementptr i16, i16* %A, i32 4
5527  ret i16* %tmp
5528}
5529
5530define i16* @test_v4i16_post_reg_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
5531;CHECK-LABEL: test_v4i16_post_reg_st4lane:
5532;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5533  call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
5534  %tmp = getelementptr i16, i16* %A, i64 %inc
5535  ret i16* %tmp
5536}
5537
5538declare void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*)
5539
5540
5541define i32* @test_v4i32_post_imm_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
5542;CHECK-LABEL: test_v4i32_post_imm_st4lane:
5543;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
5544  call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
5545  %tmp = getelementptr i32, i32* %A, i32 4
5546  ret i32* %tmp
5547}
5548
5549define i32* @test_v4i32_post_reg_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
5550;CHECK-LABEL: test_v4i32_post_reg_st4lane:
5551;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5552  call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
5553  %tmp = getelementptr i32, i32* %A, i64 %inc
5554  ret i32* %tmp
5555}
5556
5557declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
5558
5559
5560define i32* @test_v2i32_post_imm_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
5561;CHECK-LABEL: test_v2i32_post_imm_st4lane:
5562;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
5563  call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
5564  %tmp = getelementptr i32, i32* %A, i32 4
5565  ret i32* %tmp
5566}
5567
5568define i32* @test_v2i32_post_reg_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
5569;CHECK-LABEL: test_v2i32_post_reg_st4lane:
5570;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5571  call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
5572  %tmp = getelementptr i32, i32* %A, i64 %inc
5573  ret i32* %tmp
5574}
5575
5576declare void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*)
5577
5578
5579define i64* @test_v2i64_post_imm_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
5580;CHECK-LABEL: test_v2i64_post_imm_st4lane:
5581;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
5582  call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
5583  %tmp = getelementptr i64, i64* %A, i64 4
5584  ret i64* %tmp
5585}
5586
5587define i64* @test_v2i64_post_reg_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
5588;CHECK-LABEL: test_v2i64_post_reg_st4lane:
5589;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5590  call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
5591  %tmp = getelementptr i64, i64* %A, i64 %inc
5592  ret i64* %tmp
5593}
5594
5595declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*)
5596
5597
5598define i64* @test_v1i64_post_imm_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
5599;CHECK-LABEL: test_v1i64_post_imm_st4lane:
5600;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
5601  call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
5602  %tmp = getelementptr i64, i64* %A, i64 4
5603  ret i64* %tmp
5604}
5605
5606define i64* @test_v1i64_post_reg_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
5607;CHECK-LABEL: test_v1i64_post_reg_st4lane:
5608;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5609  call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
5610  %tmp = getelementptr i64, i64* %A, i64 %inc
5611  ret i64* %tmp
5612}
5613
5614declare void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
5615
5616
5617define float* @test_v4f32_post_imm_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
5618;CHECK-LABEL: test_v4f32_post_imm_st4lane:
5619;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
5620  call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
5621  %tmp = getelementptr float, float* %A, i32 4
5622  ret float* %tmp
5623}
5624
5625define float* @test_v4f32_post_reg_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
5626;CHECK-LABEL: test_v4f32_post_reg_st4lane:
5627;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5628  call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
5629  %tmp = getelementptr float, float* %A, i64 %inc
5630  ret float* %tmp
5631}
5632
5633declare void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*)
5634
5635
5636define float* @test_v2f32_post_imm_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
5637;CHECK-LABEL: test_v2f32_post_imm_st4lane:
5638;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
5639  call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
5640  %tmp = getelementptr float, float* %A, i32 4
5641  ret float* %tmp
5642}
5643
5644define float* @test_v2f32_post_reg_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
5645;CHECK-LABEL: test_v2f32_post_reg_st4lane:
5646;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5647  call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
5648  %tmp = getelementptr float, float* %A, i64 %inc
5649  ret float* %tmp
5650}
5651
5652declare void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*)
5653
5654
5655define double* @test_v2f64_post_imm_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
5656;CHECK-LABEL: test_v2f64_post_imm_st4lane:
5657;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
5658  call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
5659  %tmp = getelementptr double, double* %A, i64 4
5660  ret double* %tmp
5661}
5662
5663define double* @test_v2f64_post_reg_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
5664;CHECK-LABEL: test_v2f64_post_reg_st4lane:
5665;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5666  call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
5667  %tmp = getelementptr double, double* %A, i64 %inc
5668  ret double* %tmp
5669}
5670
5671declare void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*)
5672
5673
5674define double* @test_v1f64_post_imm_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
5675;CHECK-LABEL: test_v1f64_post_imm_st4lane:
5676;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
5677  call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
5678  %tmp = getelementptr double, double* %A, i64 4
5679  ret double* %tmp
5680}
5681
5682define double* @test_v1f64_post_reg_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
5683;CHECK-LABEL: test_v1f64_post_reg_st4lane:
5684;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5685  call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
5686  %tmp = getelementptr double, double* %A, i64 %inc
5687  ret double* %tmp
5688}
5689
5690declare void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*)
5691
5692define <16 x i8> @test_v16i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
5693; CHECK-LABEL: test_v16i8_post_imm_ld1r:
5694; CHECK: ld1r.16b { v0 }, [x0], #1
5695  %tmp1 = load i8, i8* %bar
5696  %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
5697  %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
5698  %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
5699  %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
5700  %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
5701  %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
5702  %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
5703  %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
5704  %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
5705  %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
5706  %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
5707  %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
5708  %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
5709  %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
5710  %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
5711  %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
5712  %tmp18 = getelementptr i8, i8* %bar, i64 1
5713  store i8* %tmp18, i8** %ptr
5714  ret <16 x i8> %tmp17
5715}
5716
5717define <16 x i8> @test_v16i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
5718; CHECK-LABEL: test_v16i8_post_reg_ld1r:
5719; CHECK: ld1r.16b { v0 }, [x0], x{{[0-9]+}}
5720  %tmp1 = load i8, i8* %bar
5721  %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
5722  %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
5723  %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
5724  %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
5725  %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
5726  %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
5727  %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
5728  %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
5729  %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
5730  %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
5731  %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
5732  %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
5733  %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
5734  %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
5735  %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
5736  %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
5737  %tmp18 = getelementptr i8, i8* %bar, i64 %inc
5738  store i8* %tmp18, i8** %ptr
5739  ret <16 x i8> %tmp17
5740}
5741
5742define <8 x i8> @test_v8i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
5743; CHECK-LABEL: test_v8i8_post_imm_ld1r:
5744; CHECK: ld1r.8b { v0 }, [x0], #1
5745  %tmp1 = load i8, i8* %bar
5746  %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
5747  %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
5748  %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
5749  %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
5750  %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
5751  %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
5752  %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
5753  %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
5754  %tmp10 = getelementptr i8, i8* %bar, i64 1
5755  store i8* %tmp10, i8** %ptr
5756  ret <8 x i8> %tmp9
5757}
5758
5759define <8 x i8> @test_v8i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
5760; CHECK-LABEL: test_v8i8_post_reg_ld1r:
5761; CHECK: ld1r.8b { v0 }, [x0], x{{[0-9]+}}
5762  %tmp1 = load i8, i8* %bar
5763  %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
5764  %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
5765  %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
5766  %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
5767  %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
5768  %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
5769  %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
5770  %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
5771  %tmp10 = getelementptr i8, i8* %bar, i64 %inc
5772  store i8* %tmp10, i8** %ptr
5773  ret <8 x i8> %tmp9
5774}
5775
5776define <8 x i16> @test_v8i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
5777; CHECK-LABEL: test_v8i16_post_imm_ld1r:
5778; CHECK: ld1r.8h { v0 }, [x0], #2
5779  %tmp1 = load i16, i16* %bar
5780  %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
5781  %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
5782  %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
5783  %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
5784  %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
5785  %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
5786  %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
5787  %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
5788  %tmp10 = getelementptr i16, i16* %bar, i64 1
5789  store i16* %tmp10, i16** %ptr
5790  ret <8 x i16> %tmp9
5791}
5792
5793define <8 x i16> @test_v8i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
5794; CHECK-LABEL: test_v8i16_post_reg_ld1r:
5795; CHECK: ld1r.8h { v0 }, [x0], x{{[0-9]+}}
5796  %tmp1 = load i16, i16* %bar
5797  %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
5798  %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
5799  %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
5800  %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
5801  %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
5802  %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
5803  %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
5804  %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
5805  %tmp10 = getelementptr i16, i16* %bar, i64 %inc
5806  store i16* %tmp10, i16** %ptr
5807  ret <8 x i16> %tmp9
5808}
5809
5810define <4 x i16> @test_v4i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
5811; CHECK-LABEL: test_v4i16_post_imm_ld1r:
5812; CHECK: ld1r.4h { v0 }, [x0], #2
5813  %tmp1 = load i16, i16* %bar
5814  %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
5815  %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
5816  %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
5817  %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
5818  %tmp6 = getelementptr i16, i16* %bar, i64 1
5819  store i16* %tmp6, i16** %ptr
5820  ret <4 x i16> %tmp5
5821}
5822
5823define <4 x i16> @test_v4i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
5824; CHECK-LABEL: test_v4i16_post_reg_ld1r:
5825; CHECK: ld1r.4h { v0 }, [x0], x{{[0-9]+}}
5826  %tmp1 = load i16, i16* %bar
5827  %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
5828  %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
5829  %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
5830  %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
5831  %tmp6 = getelementptr i16, i16* %bar, i64 %inc
5832  store i16* %tmp6, i16** %ptr
5833  ret <4 x i16> %tmp5
5834}
5835
5836define <4 x i32> @test_v4i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
5837; CHECK-LABEL: test_v4i32_post_imm_ld1r:
5838; CHECK: ld1r.4s { v0 }, [x0], #4
5839  %tmp1 = load i32, i32* %bar
5840  %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
5841  %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
5842  %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
5843  %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
5844  %tmp6 = getelementptr i32, i32* %bar, i64 1
5845  store i32* %tmp6, i32** %ptr
5846  ret <4 x i32> %tmp5
5847}
5848
5849define <4 x i32> @test_v4i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
5850; CHECK-LABEL: test_v4i32_post_reg_ld1r:
5851; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}}
5852  %tmp1 = load i32, i32* %bar
5853  %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
5854  %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
5855  %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
5856  %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
5857  %tmp6 = getelementptr i32, i32* %bar, i64 %inc
5858  store i32* %tmp6, i32** %ptr
5859  ret <4 x i32> %tmp5
5860}
5861
5862define <2 x i32> @test_v2i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
5863; CHECK-LABEL: test_v2i32_post_imm_ld1r:
5864; CHECK: ld1r.2s { v0 }, [x0], #4
5865  %tmp1 = load i32, i32* %bar
5866  %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
5867  %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
5868  %tmp4 = getelementptr i32, i32* %bar, i64 1
5869  store i32* %tmp4, i32** %ptr
5870  ret <2 x i32> %tmp3
5871}
5872
5873define <2 x i32> @test_v2i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
5874; CHECK-LABEL: test_v2i32_post_reg_ld1r:
5875; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}}
5876  %tmp1 = load i32, i32* %bar
5877  %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
5878  %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
5879  %tmp4 = getelementptr i32, i32* %bar, i64 %inc
5880  store i32* %tmp4, i32** %ptr
5881  ret <2 x i32> %tmp3
5882}
5883
5884define <2 x i64> @test_v2i64_post_imm_ld1r(i64* %bar, i64** %ptr) {
5885; CHECK-LABEL: test_v2i64_post_imm_ld1r:
5886; CHECK: ld1r.2d { v0 }, [x0], #8
5887  %tmp1 = load i64, i64* %bar
5888  %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
5889  %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
5890  %tmp4 = getelementptr i64, i64* %bar, i64 1
5891  store i64* %tmp4, i64** %ptr
5892  ret <2 x i64> %tmp3
5893}
5894
5895define <2 x i64> @test_v2i64_post_reg_ld1r(i64* %bar, i64** %ptr, i64 %inc) {
5896; CHECK-LABEL: test_v2i64_post_reg_ld1r:
5897; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}}
5898  %tmp1 = load i64, i64* %bar
5899  %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
5900  %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
5901  %tmp4 = getelementptr i64, i64* %bar, i64 %inc
5902  store i64* %tmp4, i64** %ptr
5903  ret <2 x i64> %tmp3
5904}
5905
5906define <4 x float> @test_v4f32_post_imm_ld1r(float* %bar, float** %ptr) {
5907; CHECK-LABEL: test_v4f32_post_imm_ld1r:
5908; CHECK: ld1r.4s { v0 }, [x0], #4
5909  %tmp1 = load float, float* %bar
5910  %tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
5911  %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
5912  %tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2
5913  %tmp5 = insertelement <4 x float> %tmp4, float %tmp1, i32 3
5914  %tmp6 = getelementptr float, float* %bar, i64 1
5915  store float* %tmp6, float** %ptr
5916  ret <4 x float> %tmp5
5917}
5918
5919define <4 x float> @test_v4f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) {
5920; CHECK-LABEL: test_v4f32_post_reg_ld1r:
5921; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}}
5922  %tmp1 = load float, float* %bar
5923  %tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
5924  %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
5925  %tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2
5926  %tmp5 = insertelement <4 x float> %tmp4, float %tmp1, i32 3
5927  %tmp6 = getelementptr float, float* %bar, i64 %inc
5928  store float* %tmp6, float** %ptr
5929  ret <4 x float> %tmp5
5930}
5931
5932define <2 x float> @test_v2f32_post_imm_ld1r(float* %bar, float** %ptr) {
5933; CHECK-LABEL: test_v2f32_post_imm_ld1r:
5934; CHECK: ld1r.2s { v0 }, [x0], #4
5935  %tmp1 = load float, float* %bar
5936  %tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
5937  %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
5938  %tmp4 = getelementptr float, float* %bar, i64 1
5939  store float* %tmp4, float** %ptr
5940  ret <2 x float> %tmp3
5941}
5942
5943define <2 x float> @test_v2f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) {
5944; CHECK-LABEL: test_v2f32_post_reg_ld1r:
5945; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}}
5946  %tmp1 = load float, float* %bar
5947  %tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
5948  %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
5949  %tmp4 = getelementptr float, float* %bar, i64 %inc
5950  store float* %tmp4, float** %ptr
5951  ret <2 x float> %tmp3
5952}
5953
5954define <2 x double> @test_v2f64_post_imm_ld1r(double* %bar, double** %ptr) {
5955; CHECK-LABEL: test_v2f64_post_imm_ld1r:
5956; CHECK: ld1r.2d { v0 }, [x0], #8
5957  %tmp1 = load double, double* %bar
5958  %tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
5959  %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
5960  %tmp4 = getelementptr double, double* %bar, i64 1
5961  store double* %tmp4, double** %ptr
5962  ret <2 x double> %tmp3
5963}
5964
5965define <2 x double> @test_v2f64_post_reg_ld1r(double* %bar, double** %ptr, i64 %inc) {
5966; CHECK-LABEL: test_v2f64_post_reg_ld1r:
5967; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}}
5968  %tmp1 = load double, double* %bar
5969  %tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
5970  %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
5971  %tmp4 = getelementptr double, double* %bar, i64 %inc
5972  store double* %tmp4, double** %ptr
5973  ret <2 x double> %tmp3
5974}
5975
5976define <16 x i8> @test_v16i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <16 x i8> %A) {
5977; CHECK-LABEL: test_v16i8_post_imm_ld1lane:
5978; CHECK: ld1.b { v0 }[1], [x0], #1
5979  %tmp1 = load i8, i8* %bar
5980  %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
5981  %tmp3 = getelementptr i8, i8* %bar, i64 1
5982  store i8* %tmp3, i8** %ptr
5983  ret <16 x i8> %tmp2
5984}
5985
5986define <16 x i8> @test_v16i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <16 x i8> %A) {
5987; CHECK-LABEL: test_v16i8_post_reg_ld1lane:
5988; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}}
5989  %tmp1 = load i8, i8* %bar
5990  %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
5991  %tmp3 = getelementptr i8, i8* %bar, i64 %inc
5992  store i8* %tmp3, i8** %ptr
5993  ret <16 x i8> %tmp2
5994}
5995
5996define <8 x i8> @test_v8i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <8 x i8> %A) {
5997; CHECK-LABEL: test_v8i8_post_imm_ld1lane:
5998; CHECK: ld1.b { v0 }[1], [x0], #1
5999  %tmp1 = load i8, i8* %bar
6000  %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
6001  %tmp3 = getelementptr i8, i8* %bar, i64 1
6002  store i8* %tmp3, i8** %ptr
6003  ret <8 x i8> %tmp2
6004}
6005
6006define <8 x i8> @test_v8i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <8 x i8> %A) {
6007; CHECK-LABEL: test_v8i8_post_reg_ld1lane:
6008; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}}
6009  %tmp1 = load i8, i8* %bar
6010  %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
6011  %tmp3 = getelementptr i8, i8* %bar, i64 %inc
6012  store i8* %tmp3, i8** %ptr
6013  ret <8 x i8> %tmp2
6014}
6015
6016define <8 x i16> @test_v8i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <8 x i16> %A) {
6017; CHECK-LABEL: test_v8i16_post_imm_ld1lane:
6018; CHECK: ld1.h { v0 }[1], [x0], #2
6019  %tmp1 = load i16, i16* %bar
6020  %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
6021  %tmp3 = getelementptr i16, i16* %bar, i64 1
6022  store i16* %tmp3, i16** %ptr
6023  ret <8 x i16> %tmp2
6024}
6025
6026define <8 x i16> @test_v8i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <8 x i16> %A) {
6027; CHECK-LABEL: test_v8i16_post_reg_ld1lane:
6028; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}}
6029  %tmp1 = load i16, i16* %bar
6030  %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
6031  %tmp3 = getelementptr i16, i16* %bar, i64 %inc
6032  store i16* %tmp3, i16** %ptr
6033  ret <8 x i16> %tmp2
6034}
6035
6036define <4 x i16> @test_v4i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <4 x i16> %A) {
6037; CHECK-LABEL: test_v4i16_post_imm_ld1lane:
6038; CHECK: ld1.h { v0 }[1], [x0], #2
6039  %tmp1 = load i16, i16* %bar
6040  %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
6041  %tmp3 = getelementptr i16, i16* %bar, i64 1
6042  store i16* %tmp3, i16** %ptr
6043  ret <4 x i16> %tmp2
6044}
6045
6046define <4 x i16> @test_v4i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <4 x i16> %A) {
6047; CHECK-LABEL: test_v4i16_post_reg_ld1lane:
6048; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}}
6049  %tmp1 = load i16, i16* %bar
6050  %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
6051  %tmp3 = getelementptr i16, i16* %bar, i64 %inc
6052  store i16* %tmp3, i16** %ptr
6053  ret <4 x i16> %tmp2
6054}
6055
6056define <4 x i32> @test_v4i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <4 x i32> %A) {
6057; CHECK-LABEL: test_v4i32_post_imm_ld1lane:
6058; CHECK: ld1.s { v0 }[1], [x0], #4
6059  %tmp1 = load i32, i32* %bar
6060  %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
6061  %tmp3 = getelementptr i32, i32* %bar, i64 1
6062  store i32* %tmp3, i32** %ptr
6063  ret <4 x i32> %tmp2
6064}
6065
6066define <4 x i32> @test_v4i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <4 x i32> %A) {
6067; CHECK-LABEL: test_v4i32_post_reg_ld1lane:
6068; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
6069  %tmp1 = load i32, i32* %bar
6070  %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
6071  %tmp3 = getelementptr i32, i32* %bar, i64 %inc
6072  store i32* %tmp3, i32** %ptr
6073  ret <4 x i32> %tmp2
6074}
6075
6076define <2 x i32> @test_v2i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <2 x i32> %A) {
6077; CHECK-LABEL: test_v2i32_post_imm_ld1lane:
6078; CHECK: ld1.s { v0 }[1], [x0], #4
6079  %tmp1 = load i32, i32* %bar
6080  %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
6081  %tmp3 = getelementptr i32, i32* %bar, i64 1
6082  store i32* %tmp3, i32** %ptr
6083  ret <2 x i32> %tmp2
6084}
6085
6086define <2 x i32> @test_v2i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <2 x i32> %A) {
6087; CHECK-LABEL: test_v2i32_post_reg_ld1lane:
6088; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
6089  %tmp1 = load i32, i32* %bar
6090  %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
6091  %tmp3 = getelementptr i32, i32* %bar, i64 %inc
6092  store i32* %tmp3, i32** %ptr
6093  ret <2 x i32> %tmp2
6094}
6095
6096define <2 x i64> @test_v2i64_post_imm_ld1lane(i64* %bar, i64** %ptr, <2 x i64> %A) {
6097; CHECK-LABEL: test_v2i64_post_imm_ld1lane:
6098; CHECK: ld1.d { v0 }[1], [x0], #8
6099  %tmp1 = load i64, i64* %bar
6100  %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
6101  %tmp3 = getelementptr i64, i64* %bar, i64 1
6102  store i64* %tmp3, i64** %ptr
6103  ret <2 x i64> %tmp2
6104}
6105
6106define <2 x i64> @test_v2i64_post_reg_ld1lane(i64* %bar, i64** %ptr, i64 %inc, <2 x i64> %A) {
6107; CHECK-LABEL: test_v2i64_post_reg_ld1lane:
6108; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}}
6109  %tmp1 = load i64, i64* %bar
6110  %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
6111  %tmp3 = getelementptr i64, i64* %bar, i64 %inc
6112  store i64* %tmp3, i64** %ptr
6113  ret <2 x i64> %tmp2
6114}
6115
6116define <4 x float> @test_v4f32_post_imm_ld1lane(float* %bar, float** %ptr, <4 x float> %A) {
6117; CHECK-LABEL: test_v4f32_post_imm_ld1lane:
6118; CHECK: ld1.s { v0 }[1], [x0], #4
6119  %tmp1 = load float, float* %bar
6120  %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
6121  %tmp3 = getelementptr float, float* %bar, i64 1
6122  store float* %tmp3, float** %ptr
6123  ret <4 x float> %tmp2
6124}
6125
6126define <4 x float> @test_v4f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <4 x float> %A) {
6127; CHECK-LABEL: test_v4f32_post_reg_ld1lane:
6128; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
6129  %tmp1 = load float, float* %bar
6130  %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
6131  %tmp3 = getelementptr float, float* %bar, i64 %inc
6132  store float* %tmp3, float** %ptr
6133  ret <4 x float> %tmp2
6134}
6135
6136define <2 x float> @test_v2f32_post_imm_ld1lane(float* %bar, float** %ptr, <2 x float> %A) {
6137; CHECK-LABEL: test_v2f32_post_imm_ld1lane:
6138; CHECK: ld1.s { v0 }[1], [x0], #4
6139  %tmp1 = load float, float* %bar
6140  %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
6141  %tmp3 = getelementptr float, float* %bar, i64 1
6142  store float* %tmp3, float** %ptr
6143  ret <2 x float> %tmp2
6144}
6145
6146define <2 x float> @test_v2f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <2 x float> %A) {
6147; CHECK-LABEL: test_v2f32_post_reg_ld1lane:
6148; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
6149  %tmp1 = load float, float* %bar
6150  %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
6151  %tmp3 = getelementptr float, float* %bar, i64 %inc
6152  store float* %tmp3, float** %ptr
6153  ret <2 x float> %tmp2
6154}
6155
6156define <2 x double> @test_v2f64_post_imm_ld1lane(double* %bar, double** %ptr, <2 x double> %A) {
6157; CHECK-LABEL: test_v2f64_post_imm_ld1lane:
6158; CHECK: ld1.d { v0 }[1], [x0], #8
6159  %tmp1 = load double, double* %bar
6160  %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
6161  %tmp3 = getelementptr double, double* %bar, i64 1
6162  store double* %tmp3, double** %ptr
6163  ret <2 x double> %tmp2
6164}
6165
6166define <2 x double> @test_v2f64_post_reg_ld1lane(double* %bar, double** %ptr, i64 %inc, <2 x double> %A) {
6167; CHECK-LABEL: test_v2f64_post_reg_ld1lane:
6168; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}}
6169  %tmp1 = load double, double* %bar
6170  %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
6171  %tmp3 = getelementptr double, double* %bar, i64 %inc
6172  store double* %tmp3, double** %ptr
6173  ret <2 x double> %tmp2
6174}
6175
6176; Check for dependencies between the vector and the scalar load.
6177define <4 x float> @test_v4f32_post_reg_ld1lane_dep_vec_on_load(float* %bar, float** %ptr, i64 %inc, <4 x float>* %dep_ptr_1, <4 x float>* %dep_ptr_2, <4 x float> %vec) {
6178; CHECK-LABEL: test_v4f32_post_reg_ld1lane_dep_vec_on_load:
6179; CHECK: %bb.0:
6180; CHECK-NEXT: ldr s[[LD:[0-9]+]], [x0]
6181; CHECK-NEXT: str q0, [x3]
6182; CHECK-NEXT: ldr q0, [x4]
6183; CHECK-NEXT: mov.s v0[1], v[[LD]][0]
6184; CHECK-NEXT: add [[POST:x[0-9]]], x0, x2, lsl #2
6185; CHECK-NEXT: str [[POST]], [x1]
6186; CHECK-NEXT: ret
6187  %tmp1 = load float, float* %bar
6188  store <4 x float> %vec, <4 x float>* %dep_ptr_1, align 16
6189  %A = load <4 x float>, <4 x float>* %dep_ptr_2, align 16
6190  %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
6191  %tmp3 = getelementptr float, float* %bar, i64 %inc
6192  store float* %tmp3, float** %ptr
6193  ret <4 x float> %tmp2
6194}
6195
6196; Make sure that we test the narrow V64 code path.
6197; The tests above don't, because there, 64-bit insert_vector_elt nodes will be
6198; widened to 128-bit before the LD1LANEpost combine has the chance to run,
6199; making it avoid narrow vector types.
6200; One way to trick that combine into running early is to force the vector ops
6201; legalizer to run.  We achieve that using the ctpop.
6202; PR23265
6203define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(i16* %bar, i16** %ptr, i64 %inc, <4 x i16> %A, <2 x i32>* %d) {
6204; CHECK-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow:
6205; CHECK: ld1.h  { v0 }[1], [x0], x{{[0-9]+}}
6206  %tmp1 = load i16, i16* %bar
6207  %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
6208  %tmp3 = getelementptr i16, i16* %bar, i64 %inc
6209  store i16* %tmp3, i16** %ptr
6210  %dl =  load <2 x i32>,  <2 x i32>* %d
6211  %dr = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %dl)
6212  store <2 x i32> %dr, <2 x i32>* %d
6213  ret <4 x i16> %tmp2
6214}
6215
6216declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
6217
6218; CHECK-LABEL: test_ld1lane_build:
6219; CHECK-DAG: ldr s[[REGNUM0:[0-9]+]], [x0]
6220; CHECK-DAG: ld1.s { v[[REGNUM0:[0-9]+]] }[1], [x1]
6221; CHECK-DAG: ldr s[[REGNUM1:[0-9]+]], [x2]
6222; CHECK-DAG: ld1.s { v[[REGNUM1:[0-9]+]] }[1], [x3]
6223; CHECK: sub.2s v[[REGNUM2:[0-9]+]], v[[REGNUM0]], v[[REGNUM1]]
6224; CHECK-NEXT: str d[[REGNUM2]], [x4]
6225; CHECK-NEXT: ret
6226define void @test_ld1lane_build(i32* %ptr0, i32* %ptr1, i32* %ptr2, i32* %ptr3, <2 x i32>* %out) {
6227  %load0 = load i32, i32* %ptr0, align 4
6228  %load1 = load i32, i32* %ptr1, align 4
6229  %vec0_0 = insertelement <2 x i32> undef, i32 %load0, i32 0
6230  %vec0_1 = insertelement <2 x i32> %vec0_0, i32 %load1, i32 1
6231
6232  %load2 = load i32, i32* %ptr2, align 4
6233  %load3 = load i32, i32* %ptr3, align 4
6234  %vec1_0 = insertelement <2 x i32> undef, i32 %load2, i32 0
6235  %vec1_1 = insertelement <2 x i32> %vec1_0, i32 %load3, i32 1
6236
6237  %sub = sub nsw <2 x i32> %vec0_1, %vec1_1
6238  store <2 x i32> %sub, <2 x i32>* %out, align 16
6239  ret void
6240}
6241
6242; CHECK-LABEL: test_ld1lane_build_i16:
6243; CHECK-DAG:  ldr h[[REGNUM1:[0-9]+]], [x0]
6244; CHECK-DAG:  ld1.h { v[[REGNUM1]] }[1], [x1]
6245; CHECK-DAG:  ld1.h { v[[REGNUM1]] }[2], [x2]
6246; CHECK-DAG:  ld1.h { v[[REGNUM1]] }[3], [x3]
6247; CHECK:      sub.4h v[[REGNUM2:[0-9]+]], v[[REGNUM1]], v0
6248; CHECK-NEXT: str d[[REGNUM2]], [x4]
6249; CHECK-NEXT: ret
6250define void  @test_ld1lane_build_i16(i16* %a, i16* %b, i16* %c, i16* %d, <4 x i16> %e, <4 x i16>* %p) {
6251  %ld.a = load i16, i16* %a
6252  %ld.b = load i16, i16* %b
6253  %ld.c = load i16, i16* %c
6254  %ld.d = load i16, i16* %d
6255  %v.a = insertelement <4 x i16> undef, i16 %ld.a, i64 0
6256  %v.b = insertelement <4 x i16> %v.a, i16 %ld.b, i64 1
6257  %v.c = insertelement <4 x i16> %v.b, i16 %ld.c, i64 2
6258  %v = insertelement <4 x i16> %v.c, i16 %ld.d, i64 3
6259  %sub = sub nsw <4 x i16> %v, %e
6260  store <4 x i16> %sub, <4 x i16>* %p
6261  ret void
6262}
6263
6264; CHECK-LABEL: test_ld1lane_build_half:
6265; CHECK-DAG:  ldr h[[REGNUM1:[0-9]+]], [x0]
6266; CHECK-DAG:  ld1.h { v[[REGNUM1]] }[1], [x1]
6267; CHECK-DAG:  ld1.h { v[[REGNUM1]] }[2], [x2]
6268; CHECK-DAG:  ld1.h { v[[REGNUM1]] }[3], [x3]
6269; CHECK-DAG:  fcvtl v[[REGNUM01:[0-9]+]].4s, v0.4h
6270; CHECK-DAG:  fcvtl v[[REGNUM11:[0-9]+]].4s, v[[REGNUM1]].4h
6271; CHECK:      fsub.4s v[[REGNUM2:[0-9]+]], v[[REGNUM11]], v[[REGNUM01]]
6272; CHECK-DAG:  fcvtn v[[REGNUM3:[0-9]+]].4h, v[[REGNUM2]].4s
6273; CHECK-NEXT: str d[[REGNUM2]], [x4]
6274; CHECK-NEXT: ret
6275define void  @test_ld1lane_build_half(half* %a, half* %b, half* %c, half* %d, <4 x half> %e, <4 x half>* %p) {
6276  %ld.a = load half, half* %a
6277  %ld.b = load half, half* %b
6278  %ld.c = load half, half* %c
6279  %ld.d = load half, half* %d
6280  %v.a = insertelement <4 x half> undef, half %ld.a, i64 0
6281  %v.b = insertelement <4 x half> %v.a, half %ld.b, i64 1
6282  %v.c = insertelement <4 x half> %v.b, half %ld.c, i64 2
6283  %v = insertelement <4 x half> %v.c, half %ld.d, i64 3
6284  %sub = fsub <4 x half> %v, %e
6285  store <4 x half> %sub, <4 x half>* %p
6286  ret void
6287}
6288
6289; CHECK-LABEL: test_ld1lane_build_i8:
6290; CHECK-DAG:  ldr b[[REGNUM1:[0-9]+]], [x0]
6291; CHECK-DAG:  ld1.b { v[[REGNUM1]] }[1], [x1]
6292; CHECK-DAG:  ld1.b { v[[REGNUM1]] }[2], [x2]
6293; CHECK-DAG:  ld1.b { v[[REGNUM1]] }[3], [x3]
6294; CHECK-DAG:  ld1.b { v[[REGNUM1]] }[4], [x4]
6295; CHECK-DAG:  ld1.b { v[[REGNUM1]] }[5], [x5]
6296; CHECK-DAG:  ld1.b { v[[REGNUM1]] }[6], [x6]
6297; CHECK-DAG:  ld1.b { v[[REGNUM1]] }[7], [x7]
6298; CHECK:      sub.8b v[[REGNUM2:[0-9]+]], v[[REGNUM1]], v0
6299; CHECK-NEXT: str d[[REGNUM2]], [x
6300; CHECK-NEXT: ret
6301define void  @test_ld1lane_build_i8(i8* %a, i8* %b, i8* %c, i8* %d, i8* %e, i8* %f, i8* %g, i8* %h, <8 x i8> %v, <8 x i8>* %p) {
6302  %ld.a = load i8, i8* %a
6303  %ld.b = load i8, i8* %b
6304  %ld.c = load i8, i8* %c
6305  %ld.d = load i8, i8* %d
6306  %ld.e = load i8, i8* %e
6307  %ld.f = load i8, i8* %f
6308  %ld.g = load i8, i8* %g
6309  %ld.h = load i8, i8* %h
6310  %v.a = insertelement <8 x i8> undef, i8 %ld.a, i64 0
6311  %v.b = insertelement <8 x i8> %v.a,  i8 %ld.b, i64 1
6312  %v.c = insertelement <8 x i8> %v.b,  i8 %ld.c, i64 2
6313  %v.d = insertelement <8 x i8> %v.c,  i8 %ld.d, i64 3
6314  %v.e = insertelement <8 x i8> %v.d,  i8 %ld.e, i64 4
6315  %v.f = insertelement <8 x i8> %v.e,  i8 %ld.f, i64 5
6316  %v.g = insertelement <8 x i8> %v.f,  i8 %ld.g, i64 6
6317  %v1 = insertelement <8 x i8> %v.g,  i8 %ld.h, i64 7
6318  %sub = sub nsw <8 x i8> %v1, %v
6319  store <8 x i8> %sub, <8 x i8>* %p
6320  ret void
6321}
6322