• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs -asm-verbose=false | FileCheck %s
2
3%struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
4%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
5%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>, <8 x i8>,  <8 x i8> }
6
7define %struct.__neon_int8x8x2_t @ld2_8b(i8* %A) nounwind {
8; CHECK-LABEL: ld2_8b
9; Make sure we are loading into the results defined by the ABI (i.e., v0, v1)
10; and from the argument of the function also defined by ABI (i.e., x0)
11; CHECK: ld2.8b { v0, v1 }, [x0]
12; CHECK-NEXT: ret
13	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
14	ret %struct.__neon_int8x8x2_t  %tmp2
15}
16
17define %struct.__neon_int8x8x3_t @ld3_8b(i8* %A) nounwind {
18; CHECK-LABEL: ld3_8b
19; Make sure we are using the operands defined by the ABI
20; CHECK: ld3.8b { v0, v1, v2 }, [x0]
21; CHECK-NEXT: ret
22	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
23	ret %struct.__neon_int8x8x3_t  %tmp2
24}
25
26define %struct.__neon_int8x8x4_t @ld4_8b(i8* %A) nounwind {
27; CHECK-LABEL: ld4_8b
28; Make sure we are using the operands defined by the ABI
29; CHECK: ld4.8b { v0, v1, v2, v3 }, [x0]
30; CHECK-NEXT: ret
31	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
32	ret %struct.__neon_int8x8x4_t  %tmp2
33}
34
35declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*) nounwind readonly
36declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0i8(i8*) nounwind readonly
37declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0i8(i8*) nounwind readonly
38
39%struct.__neon_int8x16x2_t = type { <16 x i8>,  <16 x i8> }
40%struct.__neon_int8x16x3_t = type { <16 x i8>,  <16 x i8>,  <16 x i8> }
41%struct.__neon_int8x16x4_t = type { <16 x i8>,  <16 x i8>, <16 x i8>,  <16 x i8> }
42
43define %struct.__neon_int8x16x2_t @ld2_16b(i8* %A) nounwind {
44; CHECK-LABEL: ld2_16b
45; Make sure we are using the operands defined by the ABI
46; CHECK: ld2.16b { v0, v1 }, [x0]
47; CHECK-NEXT: ret
48  %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
49  ret %struct.__neon_int8x16x2_t  %tmp2
50}
51
52define %struct.__neon_int8x16x3_t @ld3_16b(i8* %A) nounwind {
53; CHECK-LABEL: ld3_16b
54; Make sure we are using the operands defined by the ABI
55; CHECK: ld3.16b { v0, v1, v2 }, [x0]
56; CHECK-NEXT: ret
57  %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
58  ret %struct.__neon_int8x16x3_t  %tmp2
59}
60
61define %struct.__neon_int8x16x4_t @ld4_16b(i8* %A) nounwind {
62; CHECK-LABEL: ld4_16b
63; Make sure we are using the operands defined by the ABI
64; CHECK: ld4.16b { v0, v1, v2, v3 }, [x0]
65; CHECK-NEXT: ret
66  %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
67  ret %struct.__neon_int8x16x4_t  %tmp2
68}
69
70declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*) nounwind readonly
71declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0i8(i8*) nounwind readonly
72declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0i8(i8*) nounwind readonly
73
74%struct.__neon_int16x4x2_t = type { <4 x i16>,  <4 x i16> }
75%struct.__neon_int16x4x3_t = type { <4 x i16>,  <4 x i16>,  <4 x i16> }
76%struct.__neon_int16x4x4_t = type { <4 x i16>,  <4 x i16>, <4 x i16>,  <4 x i16> }
77
78define %struct.__neon_int16x4x2_t @ld2_4h(i16* %A) nounwind {
79; CHECK-LABEL: ld2_4h
80; Make sure we are using the operands defined by the ABI
81; CHECK: ld2.4h { v0, v1 }, [x0]
82; CHECK-NEXT: ret
83	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
84	ret %struct.__neon_int16x4x2_t  %tmp2
85}
86
87define %struct.__neon_int16x4x3_t @ld3_4h(i16* %A) nounwind {
88; CHECK-LABEL: ld3_4h
89; Make sure we are using the operands defined by the ABI
90; CHECK: ld3.4h { v0, v1, v2 }, [x0]
91; CHECK-NEXT: ret
92	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
93	ret %struct.__neon_int16x4x3_t  %tmp2
94}
95
96define %struct.__neon_int16x4x4_t @ld4_4h(i16* %A) nounwind {
97; CHECK-LABEL: ld4_4h
98; Make sure we are using the operands defined by the ABI
99; CHECK: ld4.4h { v0, v1, v2, v3 }, [x0]
100; CHECK-NEXT: ret
101	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
102	ret %struct.__neon_int16x4x4_t  %tmp2
103}
104
105declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0i16(i16*) nounwind readonly
106declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0i16(i16*) nounwind readonly
107declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0i16(i16*) nounwind readonly
108
109%struct.__neon_int16x8x2_t = type { <8 x i16>,  <8 x i16> }
110%struct.__neon_int16x8x3_t = type { <8 x i16>,  <8 x i16>,  <8 x i16> }
111%struct.__neon_int16x8x4_t = type { <8 x i16>,  <8 x i16>, <8 x i16>,  <8 x i16> }
112
113define %struct.__neon_int16x8x2_t @ld2_8h(i16* %A) nounwind {
114; CHECK-LABEL: ld2_8h
115; Make sure we are using the operands defined by the ABI
116; CHECK: ld2.8h { v0, v1 }, [x0]
117; CHECK-NEXT: ret
118  %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
119  ret %struct.__neon_int16x8x2_t  %tmp2
120}
121
122define %struct.__neon_int16x8x3_t @ld3_8h(i16* %A) nounwind {
123; CHECK-LABEL: ld3_8h
124; Make sure we are using the operands defined by the ABI
125; CHECK: ld3.8h { v0, v1, v2 }, [x0]
126; CHECK-NEXT: ret
127  %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
128  ret %struct.__neon_int16x8x3_t %tmp2
129}
130
131define %struct.__neon_int16x8x4_t @ld4_8h(i16* %A) nounwind {
132; CHECK-LABEL: ld4_8h
133; Make sure we are using the operands defined by the ABI
134; CHECK: ld4.8h { v0, v1, v2, v3 }, [x0]
135; CHECK-NEXT: ret
136  %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
137  ret %struct.__neon_int16x8x4_t  %tmp2
138}
139
140declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0i16(i16*) nounwind readonly
141declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0i16(i16*) nounwind readonly
142declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0i16(i16*) nounwind readonly
143
144%struct.__neon_int32x2x2_t = type { <2 x i32>,  <2 x i32> }
145%struct.__neon_int32x2x3_t = type { <2 x i32>,  <2 x i32>,  <2 x i32> }
146%struct.__neon_int32x2x4_t = type { <2 x i32>,  <2 x i32>, <2 x i32>,  <2 x i32> }
147
148define %struct.__neon_int32x2x2_t @ld2_2s(i32* %A) nounwind {
149; CHECK-LABEL: ld2_2s
150; Make sure we are using the operands defined by the ABI
151; CHECK: ld2.2s { v0, v1 }, [x0]
152; CHECK-NEXT: ret
153	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
154	ret %struct.__neon_int32x2x2_t  %tmp2
155}
156
157define %struct.__neon_int32x2x3_t @ld3_2s(i32* %A) nounwind {
158; CHECK-LABEL: ld3_2s
159; Make sure we are using the operands defined by the ABI
160; CHECK: ld3.2s { v0, v1, v2 }, [x0]
161; CHECK-NEXT: ret
162	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
163	ret %struct.__neon_int32x2x3_t  %tmp2
164}
165
166define %struct.__neon_int32x2x4_t @ld4_2s(i32* %A) nounwind {
167; CHECK-LABEL: ld4_2s
168; Make sure we are using the operands defined by the ABI
169; CHECK: ld4.2s { v0, v1, v2, v3 }, [x0]
170; CHECK-NEXT: ret
171	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
172	ret %struct.__neon_int32x2x4_t  %tmp2
173}
174
175declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*) nounwind readonly
176declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0i32(i32*) nounwind readonly
177declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0i32(i32*) nounwind readonly
178
179%struct.__neon_int32x4x2_t = type { <4 x i32>,  <4 x i32> }
180%struct.__neon_int32x4x3_t = type { <4 x i32>,  <4 x i32>,  <4 x i32> }
181%struct.__neon_int32x4x4_t = type { <4 x i32>,  <4 x i32>, <4 x i32>,  <4 x i32> }
182
183define %struct.__neon_int32x4x2_t @ld2_4s(i32* %A) nounwind {
184; CHECK-LABEL: ld2_4s
185; Make sure we are using the operands defined by the ABI
186; CHECK: ld2.4s { v0, v1 }, [x0]
187; CHECK-NEXT: ret
188	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
189	ret %struct.__neon_int32x4x2_t  %tmp2
190}
191
192define %struct.__neon_int32x4x3_t @ld3_4s(i32* %A) nounwind {
193; CHECK-LABEL: ld3_4s
194; Make sure we are using the operands defined by the ABI
195; CHECK: ld3.4s { v0, v1, v2 }, [x0]
196; CHECK-NEXT: ret
197	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
198	ret %struct.__neon_int32x4x3_t  %tmp2
199}
200
201define %struct.__neon_int32x4x4_t @ld4_4s(i32* %A) nounwind {
202; CHECK-LABEL: ld4_4s
203; Make sure we are using the operands defined by the ABI
204; CHECK: ld4.4s { v0, v1, v2, v3 }, [x0]
205; CHECK-NEXT: ret
206	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
207	ret %struct.__neon_int32x4x4_t  %tmp2
208}
209
210declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*) nounwind readonly
211declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0i32(i32*) nounwind readonly
212declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0i32(i32*) nounwind readonly
213
214%struct.__neon_int64x2x2_t = type { <2 x i64>,  <2 x i64> }
215%struct.__neon_int64x2x3_t = type { <2 x i64>,  <2 x i64>,  <2 x i64> }
216%struct.__neon_int64x2x4_t = type { <2 x i64>,  <2 x i64>, <2 x i64>,  <2 x i64> }
217
218define %struct.__neon_int64x2x2_t @ld2_2d(i64* %A) nounwind {
219; CHECK-LABEL: ld2_2d
220; Make sure we are using the operands defined by the ABI
221; CHECK: ld2.2d { v0, v1 }, [x0]
222; CHECK-NEXT: ret
223	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
224	ret %struct.__neon_int64x2x2_t  %tmp2
225}
226
227define %struct.__neon_int64x2x3_t @ld3_2d(i64* %A) nounwind {
228; CHECK-LABEL: ld3_2d
229; Make sure we are using the operands defined by the ABI
230; CHECK: ld3.2d { v0, v1, v2 }, [x0]
231; CHECK-NEXT: ret
232	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
233	ret %struct.__neon_int64x2x3_t  %tmp2
234}
235
236define %struct.__neon_int64x2x4_t @ld4_2d(i64* %A) nounwind {
237; CHECK-LABEL: ld4_2d
238; Make sure we are using the operands defined by the ABI
239; CHECK: ld4.2d { v0, v1, v2, v3 }, [x0]
240; CHECK-NEXT: ret
241	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
242	ret %struct.__neon_int64x2x4_t  %tmp2
243}
244
245declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0i64(i64*) nounwind readonly
246declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0i64(i64*) nounwind readonly
247declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0i64(i64*) nounwind readonly
248
249%struct.__neon_int64x1x2_t = type { <1 x i64>,  <1 x i64> }
250%struct.__neon_int64x1x3_t = type { <1 x i64>,  <1 x i64>, <1 x i64> }
251%struct.__neon_int64x1x4_t = type { <1 x i64>,  <1 x i64>, <1 x i64>, <1 x i64> }
252
253
254define %struct.__neon_int64x1x2_t @ld2_1di64(i64* %A) nounwind {
255; CHECK-LABEL: ld2_1di64
256; Make sure we are using the operands defined by the ABI
257; CHECK: ld1.1d { v0, v1 }, [x0]
258; CHECK-NEXT: ret
259	%tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
260	ret %struct.__neon_int64x1x2_t  %tmp2
261}
262
263define %struct.__neon_int64x1x3_t @ld3_1di64(i64* %A) nounwind {
264; CHECK-LABEL: ld3_1di64
265; Make sure we are using the operands defined by the ABI
266; CHECK: ld1.1d { v0, v1, v2 }, [x0]
267; CHECK-NEXT: ret
268	%tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
269	ret %struct.__neon_int64x1x3_t  %tmp2
270}
271
272define %struct.__neon_int64x1x4_t @ld4_1di64(i64* %A) nounwind {
273; CHECK-LABEL: ld4_1di64
274; Make sure we are using the operands defined by the ABI
275; CHECK: ld1.1d { v0, v1, v2, v3 }, [x0]
276; CHECK-NEXT: ret
277	%tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
278	ret %struct.__neon_int64x1x4_t  %tmp2
279}
280
281
282declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0i64(i64*) nounwind readonly
283declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0i64(i64*) nounwind readonly
284declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0i64(i64*) nounwind readonly
285
286%struct.__neon_float64x1x2_t = type { <1 x double>,  <1 x double> }
287%struct.__neon_float64x1x3_t = type { <1 x double>,  <1 x double>, <1 x double> }
288%struct.__neon_float64x1x4_t = type { <1 x double>,  <1 x double>, <1 x double>, <1 x double> }
289
290
291define %struct.__neon_float64x1x2_t @ld2_1df64(double* %A) nounwind {
292; CHECK-LABEL: ld2_1df64
293; Make sure we are using the operands defined by the ABI
294; CHECK: ld1.1d { v0, v1 }, [x0]
295; CHECK-NEXT: ret
296	%tmp2 = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
297	ret %struct.__neon_float64x1x2_t  %tmp2
298}
299
300define %struct.__neon_float64x1x3_t @ld3_1df64(double* %A) nounwind {
301; CHECK-LABEL: ld3_1df64
302; Make sure we are using the operands defined by the ABI
303; CHECK: ld1.1d { v0, v1, v2 }, [x0]
304; CHECK-NEXT: ret
305	%tmp2 = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
306	ret %struct.__neon_float64x1x3_t  %tmp2
307}
308
309define %struct.__neon_float64x1x4_t @ld4_1df64(double* %A) nounwind {
310; CHECK-LABEL: ld4_1df64
311; Make sure we are using the operands defined by the ABI
312; CHECK: ld1.1d { v0, v1, v2, v3 }, [x0]
313; CHECK-NEXT: ret
314	%tmp2 = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
315	ret %struct.__neon_float64x1x4_t  %tmp2
316}
317
318declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0f64(double*) nounwind readonly
319declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0f64(double*) nounwind readonly
320declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0f64(double*) nounwind readonly
321
322
323define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, i8* %A) nounwind {
324; Make sure we are using the operands defined by the ABI
325; CHECK: ld2lane_16b
326; CHECK: ld2.b { v0, v1 }[1], [x0]
327; CHECK-NEXT: ret
328	%tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, i64 1, i8* %A)
329	ret %struct.__neon_int8x16x2_t  %tmp2
330}
331
332define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i8* %A) nounwind {
333; Make sure we are using the operands defined by the ABI
334; CHECK: ld3lane_16b
335; CHECK: ld3.b { v0, v1, v2 }[1], [x0]
336; CHECK-NEXT: ret
337	%tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, i8* %A)
338	ret %struct.__neon_int8x16x3_t  %tmp2
339}
340
341define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i8* %A) nounwind {
342; Make sure we are using the operands defined by the ABI
343; CHECK: ld4lane_16b
344; CHECK: ld4.b { v0, v1, v2, v3 }[1], [x0]
345; CHECK-NEXT: ret
346	%tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, i8* %A)
347	ret %struct.__neon_int8x16x4_t  %tmp2
348}
349
350declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
351declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
352declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
353
354define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, i16* %A) nounwind {
355; Make sure we are using the operands defined by the ABI
356; CHECK: ld2lane_8h
357; CHECK: ld2.h { v0, v1 }[1], [x0]
358; CHECK-NEXT: ret
359	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, i64 1, i16* %A)
360	ret %struct.__neon_int16x8x2_t  %tmp2
361}
362
363define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i16* %A) nounwind {
364; Make sure we are using the operands defined by the ABI
365; CHECK: ld3lane_8h
366; CHECK: ld3.h { v0, v1, v2 }[1], [x0]
367; CHECK-NEXT: ret
368	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, i16* %A)
369	ret %struct.__neon_int16x8x3_t  %tmp2
370}
371
372define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i16* %A) nounwind {
373; Make sure we are using the operands defined by the ABI
374; CHECK: ld4lane_8h
375; CHECK: ld4.h { v0, v1, v2, v3 }[1], [x0]
376; CHECK-NEXT: ret
377	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, i16* %A)
378	ret %struct.__neon_int16x8x4_t  %tmp2
379}
380
381declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
382declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
383declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
384
385define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, i32* %A) nounwind {
386; Make sure we are using the operands defined by the ABI
387; CHECK: ld2lane_4s
388; CHECK: ld2.s { v0, v1 }[1], [x0]
389; CHECK-NEXT: ret
390	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, i64 1, i32* %A)
391	ret %struct.__neon_int32x4x2_t  %tmp2
392}
393
394define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i32* %A) nounwind {
395; Make sure we are using the operands defined by the ABI
396; CHECK: ld3lane_4s
397; CHECK: ld3.s { v0, v1, v2 }[1], [x0]
398; CHECK-NEXT: ret
399	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, i32* %A)
400	ret %struct.__neon_int32x4x3_t  %tmp2
401}
402
403define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i32* %A) nounwind {
404; Make sure we are using the operands defined by the ABI
405; CHECK: ld4lane_4s
406; CHECK: ld4.s { v0, v1, v2, v3 }[1], [x0]
407; CHECK-NEXT: ret
408	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, i32* %A)
409	ret %struct.__neon_int32x4x4_t  %tmp2
410}
411
412declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
413declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
414declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
415
416define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, i64* %A) nounwind {
417; Make sure we are using the operands defined by the ABI
418; CHECK: ld2lane_2d
419; CHECK: ld2.d { v0, v1 }[1], [x0]
420; CHECK-NEXT: ret
421	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, i64 1, i64* %A)
422	ret %struct.__neon_int64x2x2_t  %tmp2
423}
424
425define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64* %A) nounwind {
426; Make sure we are using the operands defined by the ABI
427; CHECK: ld3lane_2d
428; CHECK: ld3.d { v0, v1, v2 }[1], [x0]
429; CHECK-NEXT: ret
430	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, i64* %A)
431	ret %struct.__neon_int64x2x3_t  %tmp2
432}
433
434define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64* %A) nounwind {
435; Make sure we are using the operands defined by the ABI
436; CHECK: ld4lane_2d
437; CHECK: ld4.d { v0, v1, v2, v3 }[1], [x0]
438; CHECK-NEXT: ret
439	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, i64* %A)
440	ret %struct.__neon_int64x2x4_t  %tmp2
441}
442
443declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
444declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
445declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
446
447define <8 x i8> @ld1r_8b(i8* %bar) {
448; CHECK: ld1r_8b
449; Make sure we are using the operands defined by the ABI
450; CHECK: ld1r.8b { v0 }, [x0]
451; CHECK-NEXT: ret
452  %tmp1 = load i8, i8* %bar
453  %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
454  %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
455  %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
456  %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
457  %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
458  %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
459  %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
460  %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
461  ret <8 x i8> %tmp9
462}
463
464define <16 x i8> @ld1r_16b(i8* %bar) {
465; CHECK: ld1r_16b
466; Make sure we are using the operands defined by the ABI
467; CHECK: ld1r.16b { v0 }, [x0]
468; CHECK-NEXT: ret
469  %tmp1 = load i8, i8* %bar
470  %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
471  %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
472  %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
473  %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
474  %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
475  %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
476  %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
477  %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
478  %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
479  %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
480  %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
481  %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
482  %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
483  %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
484  %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
485  %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
486  ret <16 x i8> %tmp17
487}
488
489define <4 x i16> @ld1r_4h(i16* %bar) {
490; CHECK: ld1r_4h
491; Make sure we are using the operands defined by the ABI
492; CHECK: ld1r.4h { v0 }, [x0]
493; CHECK-NEXT: ret
494  %tmp1 = load i16, i16* %bar
495  %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
496  %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
497  %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
498  %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
499  ret <4 x i16> %tmp5
500}
501
502define <8 x i16> @ld1r_8h(i16* %bar) {
503; CHECK: ld1r_8h
504; Make sure we are using the operands defined by the ABI
505; CHECK: ld1r.8h { v0 }, [x0]
506; CHECK-NEXT: ret
507  %tmp1 = load i16, i16* %bar
508  %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
509  %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
510  %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
511  %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
512  %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
513  %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
514  %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
515  %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
516  ret <8 x i16> %tmp9
517}
518
519define <2 x i32> @ld1r_2s(i32* %bar) {
520; CHECK: ld1r_2s
521; Make sure we are using the operands defined by the ABI
522; CHECK: ld1r.2s { v0 }, [x0]
523; CHECK-NEXT: ret
524  %tmp1 = load i32, i32* %bar
525  %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
526  %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
527  ret <2 x i32> %tmp3
528}
529
530define <4 x i32> @ld1r_4s(i32* %bar) {
531; CHECK: ld1r_4s
532; Make sure we are using the operands defined by the ABI
533; CHECK: ld1r.4s { v0 }, [x0]
534; CHECK-NEXT: ret
535  %tmp1 = load i32, i32* %bar
536  %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
537  %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
538  %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
539  %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
540  ret <4 x i32> %tmp5
541}
542
543define <2 x i64> @ld1r_2d(i64* %bar) {
544; CHECK: ld1r_2d
545; Make sure we are using the operands defined by the ABI
546; CHECK: ld1r.2d { v0 }, [x0]
547; CHECK-NEXT: ret
548  %tmp1 = load i64, i64* %bar
549  %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
550  %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
551  ret <2 x i64> %tmp3
552}
553
554define %struct.__neon_int8x8x2_t @ld2r_8b(i8* %A) nounwind {
555; CHECK: ld2r_8b
556; Make sure we are using the operands defined by the ABI
557; CHECK: ld2r.8b { v0, v1 }, [x0]
558; CHECK-NEXT: ret
559	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
560	ret %struct.__neon_int8x8x2_t  %tmp2
561}
562
563define %struct.__neon_int8x8x3_t @ld3r_8b(i8* %A) nounwind {
564; CHECK: ld3r_8b
565; Make sure we are using the operands defined by the ABI
566; CHECK: ld3r.8b { v0, v1, v2 }, [x0]
567; CHECK-NEXT: ret
568	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
569	ret %struct.__neon_int8x8x3_t  %tmp2
570}
571
572define %struct.__neon_int8x8x4_t @ld4r_8b(i8* %A) nounwind {
573; CHECK: ld4r_8b
574; Make sure we are using the operands defined by the ABI
575; CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0]
576; CHECK-NEXT: ret
577	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
578	ret %struct.__neon_int8x8x4_t  %tmp2
579}
580
581declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly
582declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly
583declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly
584
585define %struct.__neon_int8x16x2_t @ld2r_16b(i8* %A) nounwind {
586; CHECK: ld2r_16b
587; Make sure we are using the operands defined by the ABI
588; CHECK: ld2r.16b { v0, v1 }, [x0]
589; CHECK-NEXT: ret
590	%tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
591	ret %struct.__neon_int8x16x2_t  %tmp2
592}
593
594define %struct.__neon_int8x16x3_t @ld3r_16b(i8* %A) nounwind {
595; CHECK: ld3r_16b
596; Make sure we are using the operands defined by the ABI
597; CHECK: ld3r.16b { v0, v1, v2 }, [x0]
598; CHECK-NEXT: ret
599	%tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
600	ret %struct.__neon_int8x16x3_t  %tmp2
601}
602
603define %struct.__neon_int8x16x4_t @ld4r_16b(i8* %A) nounwind {
604; CHECK: ld4r_16b
605; Make sure we are using the operands defined by the ABI
606; CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0]
607; CHECK-NEXT: ret
608	%tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
609	ret %struct.__neon_int8x16x4_t  %tmp2
610}
611
612declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly
613declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly
614declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly
615
616define %struct.__neon_int16x4x2_t @ld2r_4h(i16* %A) nounwind {
617; CHECK: ld2r_4h
618; Make sure we are using the operands defined by the ABI
619; CHECK: ld2r.4h { v0, v1 }, [x0]
620; CHECK-NEXT: ret
621	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
622	ret %struct.__neon_int16x4x2_t  %tmp2
623}
624
625define %struct.__neon_int16x4x3_t @ld3r_4h(i16* %A) nounwind {
626; CHECK: ld3r_4h
627; Make sure we are using the operands defined by the ABI
628; CHECK: ld3r.4h { v0, v1, v2 }, [x0]
629; CHECK-NEXT: ret
630	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
631	ret %struct.__neon_int16x4x3_t  %tmp2
632}
633
634define %struct.__neon_int16x4x4_t @ld4r_4h(i16* %A) nounwind {
635; CHECK: ld4r_4h
636; Make sure we are using the operands defined by the ABI
637; CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0]
638; CHECK-NEXT: ret
639	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
640	ret %struct.__neon_int16x4x4_t  %tmp2
641}
642
643declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly
644declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly
645declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly
646
647define %struct.__neon_int16x8x2_t @ld2r_8h(i16* %A) nounwind {
648; CHECK: ld2r_8h
649; Make sure we are using the operands defined by the ABI
650; CHECK: ld2r.8h { v0, v1 }, [x0]
651; CHECK-NEXT: ret
652  %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
653  ret %struct.__neon_int16x8x2_t  %tmp2
654}
655
656define %struct.__neon_int16x8x3_t @ld3r_8h(i16* %A) nounwind {
657; CHECK: ld3r_8h
658; Make sure we are using the operands defined by the ABI
659; CHECK: ld3r.8h { v0, v1, v2 }, [x0]
660; CHECK-NEXT: ret
661  %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
662  ret %struct.__neon_int16x8x3_t  %tmp2
663}
664
665define %struct.__neon_int16x8x4_t @ld4r_8h(i16* %A) nounwind {
666; CHECK: ld4r_8h
667; Make sure we are using the operands defined by the ABI
668; CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0]
669; CHECK-NEXT: ret
670  %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
671  ret %struct.__neon_int16x8x4_t  %tmp2
672}
673
674declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly
675declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly
676declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly
677
678define %struct.__neon_int32x2x2_t @ld2r_2s(i32* %A) nounwind {
679; CHECK: ld2r_2s
680; Make sure we are using the operands defined by the ABI
681; CHECK: ld2r.2s { v0, v1 }, [x0]
682; CHECK-NEXT: ret
683	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
684	ret %struct.__neon_int32x2x2_t  %tmp2
685}
686
687define %struct.__neon_int32x2x3_t @ld3r_2s(i32* %A) nounwind {
688; CHECK: ld3r_2s
689; Make sure we are using the operands defined by the ABI
690; CHECK: ld3r.2s { v0, v1, v2 }, [x0]
691; CHECK-NEXT: ret
692	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
693	ret %struct.__neon_int32x2x3_t  %tmp2
694}
695
696define %struct.__neon_int32x2x4_t @ld4r_2s(i32* %A) nounwind {
697; CHECK: ld4r_2s
698; Make sure we are using the operands defined by the ABI
699; CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0]
700; CHECK-NEXT: ret
701	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
702	ret %struct.__neon_int32x2x4_t  %tmp2
703}
704
705declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly
706declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly
707declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly
708
709define %struct.__neon_int32x4x2_t @ld2r_4s(i32* %A) nounwind {
710; CHECK: ld2r_4s
711; Make sure we are using the operands defined by the ABI
712; CHECK: ld2r.4s { v0, v1 }, [x0]
713; CHECK-NEXT: ret
714	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
715	ret %struct.__neon_int32x4x2_t  %tmp2
716}
717
718define %struct.__neon_int32x4x3_t @ld3r_4s(i32* %A) nounwind {
719; CHECK: ld3r_4s
720; Make sure we are using the operands defined by the ABI
721; CHECK: ld3r.4s { v0, v1, v2 }, [x0]
722; CHECK-NEXT: ret
723	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
724	ret %struct.__neon_int32x4x3_t  %tmp2
725}
726
727define %struct.__neon_int32x4x4_t @ld4r_4s(i32* %A) nounwind {
728; CHECK: ld4r_4s
729; Make sure we are using the operands defined by the ABI
730; CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0]
731; CHECK-NEXT: ret
732	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
733	ret %struct.__neon_int32x4x4_t  %tmp2
734}
735
736declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly
737declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly
738declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly
739
740define %struct.__neon_int64x1x2_t @ld2r_1d(i64* %A) nounwind {
741; CHECK: ld2r_1d
742; Make sure we are using the operands defined by the ABI
743; CHECK: ld2r.1d { v0, v1 }, [x0]
744; CHECK-NEXT: ret
745	%tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
746	ret %struct.__neon_int64x1x2_t  %tmp2
747}
748
749define %struct.__neon_int64x1x3_t @ld3r_1d(i64* %A) nounwind {
750; CHECK: ld3r_1d
751; Make sure we are using the operands defined by the ABI
752; CHECK: ld3r.1d { v0, v1, v2 }, [x0]
753; CHECK-NEXT: ret
754	%tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
755	ret %struct.__neon_int64x1x3_t  %tmp2
756}
757
758define %struct.__neon_int64x1x4_t @ld4r_1d(i64* %A) nounwind {
759; CHECK: ld4r_1d
760; Make sure we are using the operands defined by the ABI
761; CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0]
762; CHECK-NEXT: ret
763	%tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
764	ret %struct.__neon_int64x1x4_t  %tmp2
765}
766
767declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly
768declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly
769declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly
770
771define %struct.__neon_int64x2x2_t @ld2r_2d(i64* %A) nounwind {
772; CHECK: ld2r_2d
773; Make sure we are using the operands defined by the ABI
774; CHECK: ld2r.2d { v0, v1 }, [x0]
775; CHECK-NEXT: ret
776	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
777	ret %struct.__neon_int64x2x2_t  %tmp2
778}
779
780define %struct.__neon_int64x2x3_t @ld3r_2d(i64* %A) nounwind {
781; CHECK: ld3r_2d
782; Make sure we are using the operands defined by the ABI
783; CHECK: ld3r.2d { v0, v1, v2 }, [x0]
784; CHECK-NEXT: ret
785	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
786	ret %struct.__neon_int64x2x3_t  %tmp2
787}
788
789define %struct.__neon_int64x2x4_t @ld4r_2d(i64* %A) nounwind {
790; CHECK: ld4r_2d
791; Make sure we are using the operands defined by the ABI
792; CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0]
793; CHECK-NEXT: ret
794	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
795	ret %struct.__neon_int64x2x4_t  %tmp2
796}
797
798declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly
799declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly
800declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly
801
802define <16 x i8> @ld1_16b(<16 x i8> %V, i8* %bar) {
803; CHECK-LABEL: ld1_16b
804; Make sure we are using the operands defined by the ABI
805; CHECK: ld1.b { v0 }[0], [x0]
806; CHECK-NEXT: ret
807  %tmp1 = load i8, i8* %bar
808  %tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0
809  ret <16 x i8> %tmp2
810}
811
812define <8 x i16> @ld1_8h(<8 x i16> %V, i16* %bar) {
813; CHECK-LABEL: ld1_8h
814; Make sure we are using the operands defined by the ABI
815; CHECK: ld1.h { v0 }[0], [x0]
816; CHECK-NEXT: ret
817  %tmp1 = load i16, i16* %bar
818  %tmp2 = insertelement <8 x i16> %V, i16 %tmp1, i32 0
819  ret <8 x i16> %tmp2
820}
821
822define <4 x i32> @ld1_4s(<4 x i32> %V, i32* %bar) {
823; CHECK-LABEL: ld1_4s
824; Make sure we are using the operands defined by the ABI
825; CHECK: ld1.s { v0 }[0], [x0]
826; CHECK-NEXT: ret
827  %tmp1 = load i32, i32* %bar
828  %tmp2 = insertelement <4 x i32> %V, i32 %tmp1, i32 0
829  ret <4 x i32> %tmp2
830}
831
832define <4 x float> @ld1_4s_float(<4 x float> %V, float* %bar) {
833; CHECK-LABEL: ld1_4s_float:
834; Make sure we are using the operands defined by the ABI
835; CHECK: ld1.s { v0 }[0], [x0]
836; CHECK-NEXT: ret
837  %tmp1 = load float, float* %bar
838  %tmp2 = insertelement <4 x float> %V, float %tmp1, i32 0
839  ret <4 x float> %tmp2
840}
841
842define <2 x i64> @ld1_2d(<2 x i64> %V, i64* %bar) {
843; CHECK-LABEL: ld1_2d
844; Make sure we are using the operands defined by the ABI
845; CHECK: ld1.d { v0 }[0], [x0]
846; CHECK-NEXT: ret
847  %tmp1 = load i64, i64* %bar
848  %tmp2 = insertelement <2 x i64> %V, i64 %tmp1, i32 0
849  ret <2 x i64> %tmp2
850}
851
852define <2 x double> @ld1_2d_double(<2 x double> %V, double* %bar) {
853; CHECK-LABEL: ld1_2d_double:
854; Make sure we are using the operands defined by the ABI
855; CHECK: ld1.d { v0 }[0], [x0]
856; CHECK-NEXT: ret
857  %tmp1 = load double, double* %bar
858  %tmp2 = insertelement <2 x double> %V, double %tmp1, i32 0
859  ret <2 x double> %tmp2
860}
861
862define <1 x i64> @ld1_1d(<1 x i64>* %p) {
863; CHECK-LABEL: ld1_1d
864; Make sure we are using the operands defined by the ABI
865; CHECK: ldr [[REG:d[0-9]+]], [x0]
866; CHECK-NEXT: ret
867  %tmp = load <1 x i64>, <1 x i64>* %p, align 8
868  ret <1 x i64> %tmp
869}
870
871define <8 x i8> @ld1_8b(<8 x i8> %V, i8* %bar) {
872; CHECK-LABEL: ld1_8b
873; Make sure we are using the operands defined by the ABI
874; CHECK: ld1.b { v0 }[0], [x0]
875; CHECK-NEXT: ret
876  %tmp1 = load i8, i8* %bar
877  %tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0
878  ret <8 x i8> %tmp2
879}
880
881define <4 x i16> @ld1_4h(<4 x i16> %V, i16* %bar) {
882; CHECK-LABEL: ld1_4h
883; Make sure we are using the operands defined by the ABI
884; CHECK: ld1.h { v0 }[0], [x0]
885; CHECK-NEXT: ret
886  %tmp1 = load i16, i16* %bar
887  %tmp2 = insertelement <4 x i16> %V, i16 %tmp1, i32 0
888  ret <4 x i16> %tmp2
889}
890
891define <2 x i32> @ld1_2s(<2 x i32> %V, i32* %bar) {
892; CHECK-LABEL: ld1_2s:
893; Make sure we are using the operands defined by the ABI
894; CHECK: ld1.s { v0 }[0], [x0]
895; CHECK-NEXT: ret
896  %tmp1 = load i32, i32* %bar
897  %tmp2 = insertelement <2 x i32> %V, i32 %tmp1, i32 0
898  ret <2 x i32> %tmp2
899}
900
901define <2 x float> @ld1_2s_float(<2 x float> %V, float* %bar) {
902; CHECK-LABEL: ld1_2s_float:
903; Make sure we are using the operands defined by the ABI
904; CHECK: ld1.s { v0 }[0], [x0]
905; CHECK-NEXT: ret
906  %tmp1 = load float, float* %bar
907  %tmp2 = insertelement <2 x float> %V, float %tmp1, i32 0
908  ret <2 x float> %tmp2
909}
910
911
912; Add rdar://13098923 test case: vld1_dup_u32 doesn't generate ld1r.2s
913define void @ld1r_2s_from_dup(i8* nocapture %a, i8* nocapture %b, i16* nocapture %diff) nounwind ssp {
914entry:
915; CHECK: ld1r_2s_from_dup
916; CHECK: ld1r.2s { [[ARG1:v[0-9]+]] }, [x0]
917; CHECK-NEXT: ld1r.2s { [[ARG2:v[0-9]+]] }, [x1]
918; CHECK-NEXT: usubl.8h v[[RESREGNUM:[0-9]+]], [[ARG1]], [[ARG2]]
919; CHECK-NEXT: str d[[RESREGNUM]], [x2]
920; CHECK-NEXT: ret
921  %tmp = bitcast i8* %a to i32*
922  %tmp1 = load i32, i32* %tmp, align 4
923  %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
924  %lane = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
925  %tmp3 = bitcast <2 x i32> %lane to <8 x i8>
926  %tmp4 = bitcast i8* %b to i32*
927  %tmp5 = load i32, i32* %tmp4, align 4
928  %tmp6 = insertelement <2 x i32> undef, i32 %tmp5, i32 0
929  %lane1 = shufflevector <2 x i32> %tmp6, <2 x i32> undef, <2 x i32> zeroinitializer
930  %tmp7 = bitcast <2 x i32> %lane1 to <8 x i8>
931  %vmovl.i.i = zext <8 x i8> %tmp3 to <8 x i16>
932  %vmovl.i4.i = zext <8 x i8> %tmp7 to <8 x i16>
933  %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i4.i
934  %tmp8 = bitcast <8 x i16> %sub.i to <2 x i64>
935  %shuffle.i = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> zeroinitializer
936  %tmp9 = bitcast <1 x i64> %shuffle.i to <4 x i16>
937  %tmp10 = bitcast i16* %diff to <4 x i16>*
938  store <4 x i16> %tmp9, <4 x i16>* %tmp10, align 8
939  ret void
940}
941
942; Tests for rdar://11947069: vld1_dup_* and vld1q_dup_* code gen is suboptimal
943define <4 x float> @ld1r_4s_float(float* nocapture %x) {
944entry:
945; CHECK-LABEL: ld1r_4s_float
946; Make sure we are using the operands defined by the ABI
947; CHECK: ld1r.4s { v0 }, [x0]
948; CHECK-NEXT: ret
949  %tmp = load float, float* %x, align 4
950  %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
951  %tmp2 = insertelement <4 x float> %tmp1, float %tmp, i32 1
952  %tmp3 = insertelement <4 x float> %tmp2, float %tmp, i32 2
953  %tmp4 = insertelement <4 x float> %tmp3, float %tmp, i32 3
954  ret <4 x float> %tmp4
955}
956
957define <2 x float> @ld1r_2s_float(float* nocapture %x) {
958entry:
959; CHECK-LABEL: ld1r_2s_float
960; Make sure we are using the operands defined by the ABI
961; CHECK: ld1r.2s { v0 }, [x0]
962; CHECK-NEXT: ret
963  %tmp = load float, float* %x, align 4
964  %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
965  %tmp2 = insertelement <2 x float> %tmp1, float %tmp, i32 1
966  ret <2 x float> %tmp2
967}
968
969define <2 x double> @ld1r_2d_double(double* nocapture %x) {
970entry:
971; CHECK-LABEL: ld1r_2d_double
972; Make sure we are using the operands defined by the ABI
973; CHECK: ld1r.2d { v0 }, [x0]
974; CHECK-NEXT: ret
975  %tmp = load double, double* %x, align 4
976  %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
977  %tmp2 = insertelement <2 x double> %tmp1, double %tmp, i32 1
978  ret <2 x double> %tmp2
979}
980
981define <1 x double> @ld1r_1d_double(double* nocapture %x) {
982entry:
983; CHECK-LABEL: ld1r_1d_double
984; Make sure we are using the operands defined by the ABI
985; CHECK: ldr d0, [x0]
986; CHECK-NEXT: ret
987  %tmp = load double, double* %x, align 4
988  %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
989  ret <1 x double> %tmp1
990}
991
992define <4 x float> @ld1r_4s_float_shuff(float* nocapture %x) {
993entry:
994; CHECK-LABEL: ld1r_4s_float_shuff
995; Make sure we are using the operands defined by the ABI
996; CHECK: ld1r.4s { v0 }, [x0]
997; CHECK-NEXT: ret
998  %tmp = load float, float* %x, align 4
999  %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
1000  %lane = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
1001  ret <4 x float> %lane
1002}
1003
1004define <2 x float> @ld1r_2s_float_shuff(float* nocapture %x) {
1005entry:
1006; CHECK-LABEL: ld1r_2s_float_shuff
1007; Make sure we are using the operands defined by the ABI
1008; CHECK: ld1r.2s { v0 }, [x0]
1009; CHECK-NEXT: ret
1010  %tmp = load float, float* %x, align 4
1011  %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
1012  %lane = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
1013  ret <2 x float> %lane
1014}
1015
1016define <2 x double> @ld1r_2d_double_shuff(double* nocapture %x) {
1017entry:
1018; CHECK-LABEL: ld1r_2d_double_shuff
1019; Make sure we are using the operands defined by the ABI
1020; CHECK: ld1r.2d { v0 }, [x0]
1021; CHECK-NEXT: ret
1022  %tmp = load double, double* %x, align 4
1023  %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
1024  %lane = shufflevector <2 x double> %tmp1, <2 x double> undef, <2 x i32> zeroinitializer
1025  ret <2 x double> %lane
1026}
1027
1028define <1 x double> @ld1r_1d_double_shuff(double* nocapture %x) {
1029entry:
1030; CHECK-LABEL: ld1r_1d_double_shuff
1031; Make sure we are using the operands defined by the ABI
1032; CHECK: ldr d0, [x0]
1033; CHECK-NEXT: ret
1034  %tmp = load double, double* %x, align 4
1035  %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
1036  %lane = shufflevector <1 x double> %tmp1, <1 x double> undef, <1 x i32> zeroinitializer
1037  ret <1 x double> %lane
1038}
1039
1040%struct.__neon_float32x2x2_t = type { <2 x float>,  <2 x float> }
1041%struct.__neon_float32x2x3_t = type { <2 x float>,  <2 x float>,  <2 x float> }
1042%struct.__neon_float32x2x4_t = type { <2 x float>,  <2 x float>, <2 x float>,  <2 x float> }
1043
1044declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8*) nounwind readonly
1045declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16*) nounwind readonly
1046declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32*) nounwind readonly
1047declare %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float*) nounwind readonly
1048declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64*) nounwind readonly
1049declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double*) nounwind readonly
1050
1051define %struct.__neon_int8x8x2_t @ld1_x2_v8i8(i8* %addr) {
1052; CHECK-LABEL: ld1_x2_v8i8:
1053; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1054  %val = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %addr)
1055  ret %struct.__neon_int8x8x2_t %val
1056}
1057
1058define %struct.__neon_int16x4x2_t @ld1_x2_v4i16(i16* %addr) {
1059; CHECK-LABEL: ld1_x2_v4i16:
1060; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1061  %val = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %addr)
1062  ret %struct.__neon_int16x4x2_t %val
1063}
1064
1065define %struct.__neon_int32x2x2_t @ld1_x2_v2i32(i32* %addr) {
1066; CHECK-LABEL: ld1_x2_v2i32:
1067; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1068  %val = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %addr)
1069  ret %struct.__neon_int32x2x2_t %val
1070}
1071
1072define %struct.__neon_float32x2x2_t @ld1_x2_v2f32(float* %addr) {
1073; CHECK-LABEL: ld1_x2_v2f32:
1074; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1075  %val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %addr)
1076  ret %struct.__neon_float32x2x2_t %val
1077}
1078
1079define %struct.__neon_int64x1x2_t @ld1_x2_v1i64(i64* %addr) {
1080; CHECK-LABEL: ld1_x2_v1i64:
1081; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1082  %val = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %addr)
1083  ret %struct.__neon_int64x1x2_t %val
1084}
1085
1086define %struct.__neon_float64x1x2_t @ld1_x2_v1f64(double* %addr) {
1087; CHECK-LABEL: ld1_x2_v1f64:
1088; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1089  %val = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %addr)
1090  ret %struct.__neon_float64x1x2_t %val
1091}
1092
1093
1094%struct.__neon_float32x4x2_t = type { <4 x float>,  <4 x float> }
1095%struct.__neon_float32x4x3_t = type { <4 x float>,  <4 x float>,  <4 x float> }
1096%struct.__neon_float32x4x4_t = type { <4 x float>,  <4 x float>, <4 x float>,  <4 x float> }
1097
1098%struct.__neon_float64x2x2_t = type { <2 x double>,  <2 x double> }
1099%struct.__neon_float64x2x3_t = type { <2 x double>,  <2 x double>,  <2 x double> }
1100%struct.__neon_float64x2x4_t = type { <2 x double>,  <2 x double>, <2 x double>,  <2 x double> }
1101
1102declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8*) nounwind readonly
1103declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16*) nounwind readonly
1104declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32*) nounwind readonly
1105declare %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float*) nounwind readonly
1106declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64*) nounwind readonly
1107declare %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double*) nounwind readonly
1108
1109define %struct.__neon_int8x16x2_t @ld1_x2_v16i8(i8* %addr) {
1110; CHECK-LABEL: ld1_x2_v16i8:
1111; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1112  %val = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %addr)
1113  ret %struct.__neon_int8x16x2_t %val
1114}
1115
1116define %struct.__neon_int16x8x2_t @ld1_x2_v8i16(i16* %addr) {
1117; CHECK-LABEL: ld1_x2_v8i16:
1118; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1119  %val = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %addr)
1120  ret %struct.__neon_int16x8x2_t %val
1121}
1122
1123define %struct.__neon_int32x4x2_t @ld1_x2_v4i32(i32* %addr) {
1124; CHECK-LABEL: ld1_x2_v4i32:
1125; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1126  %val = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %addr)
1127  ret %struct.__neon_int32x4x2_t %val
1128}
1129
1130define %struct.__neon_float32x4x2_t @ld1_x2_v4f32(float* %addr) {
1131; CHECK-LABEL: ld1_x2_v4f32:
1132; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1133  %val = call %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %addr)
1134  ret %struct.__neon_float32x4x2_t %val
1135}
1136
1137define %struct.__neon_int64x2x2_t @ld1_x2_v2i64(i64* %addr) {
1138; CHECK-LABEL: ld1_x2_v2i64:
1139; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1140  %val = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %addr)
1141  ret %struct.__neon_int64x2x2_t %val
1142}
1143
1144define %struct.__neon_float64x2x2_t @ld1_x2_v2f64(double* %addr) {
1145; CHECK-LABEL: ld1_x2_v2f64:
1146; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1147  %val = call %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %addr)
1148  ret %struct.__neon_float64x2x2_t %val
1149}
1150
1151declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8*) nounwind readonly
1152declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16*) nounwind readonly
1153declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32*) nounwind readonly
1154declare %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float*) nounwind readonly
1155declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64*) nounwind readonly
1156declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double*) nounwind readonly
1157
1158define %struct.__neon_int8x8x3_t @ld1_x3_v8i8(i8* %addr) {
1159; CHECK-LABEL: ld1_x3_v8i8:
1160; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1161  %val = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %addr)
1162  ret %struct.__neon_int8x8x3_t %val
1163}
1164
1165define %struct.__neon_int16x4x3_t @ld1_x3_v4i16(i16* %addr) {
1166; CHECK-LABEL: ld1_x3_v4i16:
1167; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1168  %val = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %addr)
1169  ret %struct.__neon_int16x4x3_t %val
1170}
1171
1172define %struct.__neon_int32x2x3_t @ld1_x3_v2i32(i32* %addr) {
1173; CHECK-LABEL: ld1_x3_v2i32:
1174; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1175  %val = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %addr)
1176  ret %struct.__neon_int32x2x3_t %val
1177}
1178
1179define %struct.__neon_float32x2x3_t @ld1_x3_v2f32(float* %addr) {
1180; CHECK-LABEL: ld1_x3_v2f32:
1181; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1182  %val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %addr)
1183  ret %struct.__neon_float32x2x3_t %val
1184}
1185
1186define %struct.__neon_int64x1x3_t @ld1_x3_v1i64(i64* %addr) {
1187; CHECK-LABEL: ld1_x3_v1i64:
1188; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1189  %val = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %addr)
1190  ret %struct.__neon_int64x1x3_t %val
1191}
1192
1193define %struct.__neon_float64x1x3_t @ld1_x3_v1f64(double* %addr) {
1194; CHECK-LABEL: ld1_x3_v1f64:
1195; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1196  %val = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %addr)
1197  ret %struct.__neon_float64x1x3_t %val
1198}
1199
1200declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8*) nounwind readonly
1201declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16*) nounwind readonly
1202declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32*) nounwind readonly
1203declare %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float*) nounwind readonly
1204declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64*) nounwind readonly
1205declare %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double*) nounwind readonly
1206
1207define %struct.__neon_int8x16x3_t @ld1_x3_v16i8(i8* %addr) {
1208; CHECK-LABEL: ld1_x3_v16i8:
1209; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1210  %val = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %addr)
1211  ret %struct.__neon_int8x16x3_t %val
1212}
1213
1214define %struct.__neon_int16x8x3_t @ld1_x3_v8i16(i16* %addr) {
1215; CHECK-LABEL: ld1_x3_v8i16:
1216; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1217  %val = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %addr)
1218  ret %struct.__neon_int16x8x3_t %val
1219}
1220
1221define %struct.__neon_int32x4x3_t @ld1_x3_v4i32(i32* %addr) {
1222; CHECK-LABEL: ld1_x3_v4i32:
1223; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1224  %val = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %addr)
1225  ret %struct.__neon_int32x4x3_t %val
1226}
1227
1228define %struct.__neon_float32x4x3_t @ld1_x3_v4f32(float* %addr) {
1229; CHECK-LABEL: ld1_x3_v4f32:
1230; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1231  %val = call %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %addr)
1232  ret %struct.__neon_float32x4x3_t %val
1233}
1234
1235define %struct.__neon_int64x2x3_t @ld1_x3_v2i64(i64* %addr) {
1236; CHECK-LABEL: ld1_x3_v2i64:
1237; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1238  %val = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %addr)
1239  ret %struct.__neon_int64x2x3_t %val
1240}
1241
1242define %struct.__neon_float64x2x3_t @ld1_x3_v2f64(double* %addr) {
1243; CHECK-LABEL: ld1_x3_v2f64:
1244; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1245  %val = call %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %addr)
1246  ret %struct.__neon_float64x2x3_t %val
1247}
1248
1249declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8*) nounwind readonly
1250declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16*) nounwind readonly
1251declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32*) nounwind readonly
1252declare %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float*) nounwind readonly
1253declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64*) nounwind readonly
1254declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double*) nounwind readonly
1255
1256define %struct.__neon_int8x8x4_t @ld1_x4_v8i8(i8* %addr) {
1257; CHECK-LABEL: ld1_x4_v8i8:
1258; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1259  %val = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %addr)
1260  ret %struct.__neon_int8x8x4_t %val
1261}
1262
1263define %struct.__neon_int16x4x4_t @ld1_x4_v4i16(i16* %addr) {
1264; CHECK-LABEL: ld1_x4_v4i16:
1265; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1266  %val = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %addr)
1267  ret %struct.__neon_int16x4x4_t %val
1268}
1269
1270define %struct.__neon_int32x2x4_t @ld1_x4_v2i32(i32* %addr) {
1271; CHECK-LABEL: ld1_x4_v2i32:
1272; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1273  %val = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %addr)
1274  ret %struct.__neon_int32x2x4_t %val
1275}
1276
1277define %struct.__neon_float32x2x4_t @ld1_x4_v2f32(float* %addr) {
1278; CHECK-LABEL: ld1_x4_v2f32:
1279; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1280  %val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %addr)
1281  ret %struct.__neon_float32x2x4_t %val
1282}
1283
1284define %struct.__neon_int64x1x4_t @ld1_x4_v1i64(i64* %addr) {
1285; CHECK-LABEL: ld1_x4_v1i64:
1286; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1287  %val = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %addr)
1288  ret %struct.__neon_int64x1x4_t %val
1289}
1290
1291define %struct.__neon_float64x1x4_t @ld1_x4_v1f64(double* %addr) {
1292; CHECK-LABEL: ld1_x4_v1f64:
1293; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1294  %val = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %addr)
1295  ret %struct.__neon_float64x1x4_t %val
1296}
1297
1298declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8*) nounwind readonly
1299declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16*) nounwind readonly
1300declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32*) nounwind readonly
1301declare %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float*) nounwind readonly
1302declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64*) nounwind readonly
1303declare %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double*) nounwind readonly
1304
1305define %struct.__neon_int8x16x4_t @ld1_x4_v16i8(i8* %addr) {
1306; CHECK-LABEL: ld1_x4_v16i8:
1307; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1308  %val = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %addr)
1309  ret %struct.__neon_int8x16x4_t %val
1310}
1311
1312define %struct.__neon_int16x8x4_t @ld1_x4_v8i16(i16* %addr) {
1313; CHECK-LABEL: ld1_x4_v8i16:
1314; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1315  %val = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %addr)
1316  ret %struct.__neon_int16x8x4_t %val
1317}
1318
1319define %struct.__neon_int32x4x4_t @ld1_x4_v4i32(i32* %addr) {
1320; CHECK-LABEL: ld1_x4_v4i32:
1321; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1322  %val = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %addr)
1323  ret %struct.__neon_int32x4x4_t %val
1324}
1325
1326define %struct.__neon_float32x4x4_t @ld1_x4_v4f32(float* %addr) {
1327; CHECK-LABEL: ld1_x4_v4f32:
1328; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1329  %val = call %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %addr)
1330  ret %struct.__neon_float32x4x4_t %val
1331}
1332
1333define %struct.__neon_int64x2x4_t @ld1_x4_v2i64(i64* %addr) {
1334; CHECK-LABEL: ld1_x4_v2i64:
1335; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1336  %val = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %addr)
1337  ret %struct.__neon_int64x2x4_t %val
1338}
1339
1340define %struct.__neon_float64x2x4_t @ld1_x4_v2f64(double* %addr) {
1341; CHECK-LABEL: ld1_x4_v2f64:
1342; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1343  %val = call %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %addr)
1344  ret %struct.__neon_float64x2x4_t %val
1345}
1346