• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
4
5define arm_aapcs_vfpcc <4 x i32> @shuffle1_i32(<4 x i32> %src) {
6; CHECK-LABEL: shuffle1_i32:
7; CHECK:       @ %bb.0: @ %entry
8; CHECK-NEXT:    vmov.f32 s4, s3
9; CHECK-NEXT:    vmov.f32 s5, s2
10; CHECK-NEXT:    vmov.f32 s6, s1
11; CHECK-NEXT:    vmov.f32 s7, s0
12; CHECK-NEXT:    vmov q0, q1
13; CHECK-NEXT:    bx lr
14entry:
15  %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
16  ret <4 x i32> %out
17}
18
19define arm_aapcs_vfpcc <4 x i32> @shuffle2_i32(<4 x i32> %src) {
20; CHECK-LABEL: shuffle2_i32:
21; CHECK:       @ %bb.0: @ %entry
22; CHECK-NEXT:    bx lr
23entry:
24  %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
25  ret <4 x i32> %out
26}
27
28define arm_aapcs_vfpcc <4 x i32> @shuffle3_i32(<4 x i32> %src) {
29; CHECK-LABEL: shuffle3_i32:
30; CHECK:       @ %bb.0: @ %entry
31; CHECK-NEXT:    vmov.f32 s4, s3
32; CHECK-NEXT:    vmov.f32 s5, s1
33; CHECK-NEXT:    vmov.f32 s6, s2
34; CHECK-NEXT:    vmov.f32 s7, s0
35; CHECK-NEXT:    vmov q0, q1
36; CHECK-NEXT:    bx lr
37entry:
38  %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 2, i32 0>
39  ret <4 x i32> %out
40}
41
42define arm_aapcs_vfpcc <4 x i32> @shuffle5_i32(<4 x i32> %src) {
43; CHECK-LABEL: shuffle5_i32:
44; CHECK:       @ %bb.0: @ %entry
45; CHECK-NEXT:    vrev64.32 q1, q0
46; CHECK-NEXT:    vmov q0, q1
47; CHECK-NEXT:    bx lr
48entry:
49  %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
50  ret <4 x i32> %out
51}
52
53define arm_aapcs_vfpcc <4 x i32> @shuffle6_i32(<4 x i32> %src) {
54; CHECK-LABEL: shuffle6_i32:
55; CHECK:       @ %bb.0: @ %entry
56; CHECK-NEXT:    bx lr
57entry:
58  %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 3>
59  ret <4 x i32> %out
60}
61
62define arm_aapcs_vfpcc <8 x i16> @shuffle1_i16(<8 x i16> %src) {
63; CHECK-LABEL: shuffle1_i16:
64; CHECK:       @ %bb.0: @ %entry
65; CHECK-NEXT:    vmov q1, q0
66; CHECK-NEXT:    vmov.u16 r0, q0[7]
67; CHECK-NEXT:    vmov.16 q0[0], r0
68; CHECK-NEXT:    vmov.u16 r0, q1[6]
69; CHECK-NEXT:    vmov.16 q0[1], r0
70; CHECK-NEXT:    vmov.u16 r0, q1[5]
71; CHECK-NEXT:    vmov.16 q0[2], r0
72; CHECK-NEXT:    vmov.u16 r0, q1[4]
73; CHECK-NEXT:    vmov.16 q0[3], r0
74; CHECK-NEXT:    vmov.u16 r0, q1[3]
75; CHECK-NEXT:    vmov.16 q0[4], r0
76; CHECK-NEXT:    vmov.u16 r0, q1[2]
77; CHECK-NEXT:    vmov.16 q0[5], r0
78; CHECK-NEXT:    vmov.u16 r0, q1[1]
79; CHECK-NEXT:    vmov.16 q0[6], r0
80; CHECK-NEXT:    vmov.u16 r0, q1[0]
81; CHECK-NEXT:    vmov.16 q0[7], r0
82; CHECK-NEXT:    bx lr
83entry:
84  %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
85  ret <8 x i16> %out
86}
87
88define arm_aapcs_vfpcc <8 x i16> @shuffle2_i16(<8 x i16> %src) {
89; CHECK-LABEL: shuffle2_i16:
90; CHECK:       @ %bb.0: @ %entry
91; CHECK-NEXT:    bx lr
92entry:
93  %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
94  ret <8 x i16> %out
95}
96
97define arm_aapcs_vfpcc <8 x i16> @shuffle3_i16(<8 x i16> %src) {
98; CHECK-LABEL: shuffle3_i16:
99; CHECK:       @ %bb.0: @ %entry
100; CHECK-NEXT:    vmov q1, q0
101; CHECK-NEXT:    vmov.u16 r0, q0[7]
102; CHECK-NEXT:    vmov.16 q0[2], r0
103; CHECK-NEXT:    vmov.u16 r0, q1[6]
104; CHECK-NEXT:    vmov.16 q0[3], r0
105; CHECK-NEXT:    vmov.u16 r0, q1[3]
106; CHECK-NEXT:    vmov.16 q0[4], r0
107; CHECK-NEXT:    vmov.u16 r0, q1[1]
108; CHECK-NEXT:    vmov.16 q0[5], r0
109; CHECK-NEXT:    vmov.u16 r0, q1[2]
110; CHECK-NEXT:    vmov.16 q0[6], r0
111; CHECK-NEXT:    vmov.u16 r0, q1[0]
112; CHECK-NEXT:    vmov.16 q0[7], r0
113; CHECK-NEXT:    vmov.f32 s0, s6
114; CHECK-NEXT:    bx lr
115entry:
116  %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
117  ret <8 x i16> %out
118}
119
120define arm_aapcs_vfpcc <8 x i16> @shuffle5_i16(<8 x i16> %src) {
121; CHECK-LABEL: shuffle5_i16:
122; CHECK:       @ %bb.0: @ %entry
123; CHECK-NEXT:    vrev64.16 q1, q0
124; CHECK-NEXT:    vmov q0, q1
125; CHECK-NEXT:    bx lr
126entry:
127  %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
128  ret <8 x i16> %out
129}
130
131define arm_aapcs_vfpcc <8 x i16> @shuffle6_i16(<8 x i16> %src) {
132; CHECK-LABEL: shuffle6_i16:
133; CHECK:       @ %bb.0: @ %entry
134; CHECK-NEXT:    vrev32.16 q0, q0
135; CHECK-NEXT:    bx lr
136entry:
137  %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
138  ret <8 x i16> %out
139}
140
141define arm_aapcs_vfpcc <16 x i8> @shuffle1_i8(<16 x i8> %src) {
142; CHECK-LABEL: shuffle1_i8:
143; CHECK:       @ %bb.0: @ %entry
144; CHECK-NEXT:    vmov q1, q0
145; CHECK-NEXT:    vmov.u8 r0, q0[15]
146; CHECK-NEXT:    vmov.8 q0[0], r0
147; CHECK-NEXT:    vmov.u8 r0, q1[14]
148; CHECK-NEXT:    vmov.8 q0[1], r0
149; CHECK-NEXT:    vmov.u8 r0, q1[13]
150; CHECK-NEXT:    vmov.8 q0[2], r0
151; CHECK-NEXT:    vmov.u8 r0, q1[12]
152; CHECK-NEXT:    vmov.8 q0[3], r0
153; CHECK-NEXT:    vmov.u8 r0, q1[11]
154; CHECK-NEXT:    vmov.8 q0[4], r0
155; CHECK-NEXT:    vmov.u8 r0, q1[10]
156; CHECK-NEXT:    vmov.8 q0[5], r0
157; CHECK-NEXT:    vmov.u8 r0, q1[9]
158; CHECK-NEXT:    vmov.8 q0[6], r0
159; CHECK-NEXT:    vmov.u8 r0, q1[8]
160; CHECK-NEXT:    vmov.8 q0[7], r0
161; CHECK-NEXT:    vmov.u8 r0, q1[7]
162; CHECK-NEXT:    vmov.8 q0[8], r0
163; CHECK-NEXT:    vmov.u8 r0, q1[6]
164; CHECK-NEXT:    vmov.8 q0[9], r0
165; CHECK-NEXT:    vmov.u8 r0, q1[5]
166; CHECK-NEXT:    vmov.8 q0[10], r0
167; CHECK-NEXT:    vmov.u8 r0, q1[4]
168; CHECK-NEXT:    vmov.8 q0[11], r0
169; CHECK-NEXT:    vmov.u8 r0, q1[3]
170; CHECK-NEXT:    vmov.8 q0[12], r0
171; CHECK-NEXT:    vmov.u8 r0, q1[2]
172; CHECK-NEXT:    vmov.8 q0[13], r0
173; CHECK-NEXT:    vmov.u8 r0, q1[1]
174; CHECK-NEXT:    vmov.8 q0[14], r0
175; CHECK-NEXT:    vmov.u8 r0, q1[0]
176; CHECK-NEXT:    vmov.8 q0[15], r0
177; CHECK-NEXT:    bx lr
178entry:
179  %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
180  ret <16 x i8> %out
181}
182
183define arm_aapcs_vfpcc <16 x i8> @shuffle2_i8(<16 x i8> %src) {
184; CHECK-LABEL: shuffle2_i8:
185; CHECK:       @ %bb.0: @ %entry
186; CHECK-NEXT:    bx lr
187entry:
188  %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
189  ret <16 x i8> %out
190}
191
192define arm_aapcs_vfpcc <16 x i8> @shuffle3_i8(<16 x i8> %src) {
193; CHECK-LABEL: shuffle3_i8:
194; CHECK:       @ %bb.0: @ %entry
195; CHECK-NEXT:    vmov q1, q0
196; CHECK-NEXT:    vmov.u8 r0, q0[4]
197; CHECK-NEXT:    vmov.8 q0[0], r0
198; CHECK-NEXT:    vmov.u8 r0, q1[5]
199; CHECK-NEXT:    vmov.8 q0[1], r0
200; CHECK-NEXT:    vmov.u8 r0, q1[15]
201; CHECK-NEXT:    vmov.8 q0[2], r0
202; CHECK-NEXT:    vmov.u8 r0, q1[7]
203; CHECK-NEXT:    vmov.8 q0[3], r0
204; CHECK-NEXT:    vmov.u8 r0, q1[14]
205; CHECK-NEXT:    vmov.8 q0[4], r0
206; CHECK-NEXT:    vmov.u8 r0, q1[9]
207; CHECK-NEXT:    vmov.8 q0[5], r0
208; CHECK-NEXT:    vmov.u8 r0, q1[6]
209; CHECK-NEXT:    vmov.8 q0[6], r0
210; CHECK-NEXT:    vmov.u8 r0, q1[3]
211; CHECK-NEXT:    vmov.8 q0[7], r0
212; CHECK-NEXT:    vmov.u8 r0, q1[10]
213; CHECK-NEXT:    vmov.8 q0[8], r0
214; CHECK-NEXT:    vmov.u8 r0, q1[12]
215; CHECK-NEXT:    vmov.8 q0[9], r0
216; CHECK-NEXT:    vmov.u8 r0, q1[1]
217; CHECK-NEXT:    vmov.8 q0[10], r0
218; CHECK-NEXT:    vmov.u8 r0, q1[13]
219; CHECK-NEXT:    vmov.8 q0[11], r0
220; CHECK-NEXT:    vmov.u8 r0, q1[2]
221; CHECK-NEXT:    vmov.8 q0[12], r0
222; CHECK-NEXT:    vmov.u8 r0, q1[8]
223; CHECK-NEXT:    vmov.8 q0[13], r0
224; CHECK-NEXT:    vmov.u8 r0, q1[0]
225; CHECK-NEXT:    vmov.8 q0[14], r0
226; CHECK-NEXT:    vmov.u8 r0, q1[11]
227; CHECK-NEXT:    vmov.8 q0[15], r0
228; CHECK-NEXT:    bx lr
229entry:
230  %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 15, i32 7, i32 14, i32 9, i32 6, i32 3, i32 10, i32 12, i32 1, i32 13, i32 2, i32 8, i32 0, i32 11>
231  ret <16 x i8> %out
232}
233
234define arm_aapcs_vfpcc <16 x i8> @shuffle5_i8(<16 x i8> %src) {
235; CHECK-LABEL: shuffle5_i8:
236; CHECK:       @ %bb.0: @ %entry
237; CHECK-NEXT:    vrev64.8 q1, q0
238; CHECK-NEXT:    vmov q0, q1
239; CHECK-NEXT:    bx lr
240entry:
241  %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
242  ret <16 x i8> %out
243}
244
245define arm_aapcs_vfpcc <16 x i8> @shuffle6_i8(<16 x i8> %src) {
246; CHECK-LABEL: shuffle6_i8:
247; CHECK:       @ %bb.0: @ %entry
248; CHECK-NEXT:    vrev32.8 q0, q0
249; CHECK-NEXT:    bx lr
250entry:
251  %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
252  ret <16 x i8> %out
253}
254
255define arm_aapcs_vfpcc <16 x i8> @shuffle7_i8(<16 x i8> %src) {
256; CHECK-LABEL: shuffle7_i8:
257; CHECK:       @ %bb.0: @ %entry
258; CHECK-NEXT:    vrev16.8 q0, q0
259; CHECK-NEXT:    bx lr
260entry:
261  %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
262  ret <16 x i8> %out
263}
264
265define arm_aapcs_vfpcc <2 x i64> @shuffle1_i64(<2 x i64> %src) {
266; CHECK-LABEL: shuffle1_i64:
267; CHECK:       @ %bb.0: @ %entry
268; CHECK-NEXT:    bx lr
269entry:
270  %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 0, i32 1>
271  ret <2 x i64> %out
272}
273
274define arm_aapcs_vfpcc <2 x i64> @shuffle2_i64(<2 x i64> %src) {
275; CHECK-LABEL: shuffle2_i64:
276; CHECK:       @ %bb.0: @ %entry
277; CHECK-NEXT:    vmov.f32 s4, s2
278; CHECK-NEXT:    vmov.f32 s5, s3
279; CHECK-NEXT:    vmov.f32 s6, s0
280; CHECK-NEXT:    vmov.f32 s7, s1
281; CHECK-NEXT:    vmov q0, q1
282; CHECK-NEXT:    bx lr
283entry:
284  %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
285  ret <2 x i64> %out
286}
287
288define arm_aapcs_vfpcc <2 x i64> @shuffle3_i64(<2 x i64> %src) {
289; CHECK-LABEL: shuffle3_i64:
290; CHECK:       @ %bb.0: @ %entry
291; CHECK-NEXT:    bx lr
292entry:
293  %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 undef, i32 1>
294  ret <2 x i64> %out
295}
296
297define arm_aapcs_vfpcc <4 x float> @shuffle1_f32(<4 x float> %src) {
298; CHECK-LABEL: shuffle1_f32:
299; CHECK:       @ %bb.0: @ %entry
300; CHECK-NEXT:    vmov.f32 s4, s3
301; CHECK-NEXT:    vmov.f32 s5, s2
302; CHECK-NEXT:    vmov.f32 s6, s1
303; CHECK-NEXT:    vmov.f32 s7, s0
304; CHECK-NEXT:    vmov q0, q1
305; CHECK-NEXT:    bx lr
306entry:
307  %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
308  ret <4 x float> %out
309}
310
311define arm_aapcs_vfpcc <4 x float> @shuffle2_f32(<4 x float> %src) {
312; CHECK-LABEL: shuffle2_f32:
313; CHECK:       @ %bb.0: @ %entry
314; CHECK-NEXT:    bx lr
315entry:
316  %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
317  ret <4 x float> %out
318}
319
320define arm_aapcs_vfpcc <4 x float> @shuffle3_f32(<4 x float> %src) {
321; CHECK-LABEL: shuffle3_f32:
322; CHECK:       @ %bb.0: @ %entry
323; CHECK-NEXT:    vmov.f32 s4, s3
324; CHECK-NEXT:    vmov.f32 s5, s1
325; CHECK-NEXT:    vmov.f32 s6, s2
326; CHECK-NEXT:    vmov.f32 s7, s0
327; CHECK-NEXT:    vmov q0, q1
328; CHECK-NEXT:    bx lr
329entry:
330  %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 3, i32 1, i32 2, i32 0>
331  ret <4 x float> %out
332}
333
334define arm_aapcs_vfpcc <4 x float> @shuffle5_f32(<4 x float> %src) {
335; CHECK-LABEL: shuffle5_f32:
336; CHECK:       @ %bb.0: @ %entry
337; CHECK-NEXT:    vrev64.32 q1, q0
338; CHECK-NEXT:    vmov q0, q1
339; CHECK-NEXT:    bx lr
340entry:
341  %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
342  ret <4 x float> %out
343}
344
345define arm_aapcs_vfpcc <8 x half> @shuffle1_f16(<8 x half> %src) {
346; CHECK-LABEL: shuffle1_f16:
347; CHECK:       @ %bb.0: @ %entry
348; CHECK-NEXT:    vmovx.f16 s4, s3
349; CHECK-NEXT:    vmov r0, s3
350; CHECK-NEXT:    vmov r1, s4
351; CHECK-NEXT:    vmovx.f16 s8, s2
352; CHECK-NEXT:    vmov.16 q1[0], r1
353; CHECK-NEXT:    vmov.16 q1[1], r0
354; CHECK-NEXT:    vmov r0, s8
355; CHECK-NEXT:    vmov.16 q1[2], r0
356; CHECK-NEXT:    vmov r0, s2
357; CHECK-NEXT:    vmovx.f16 s8, s1
358; CHECK-NEXT:    vmov.16 q1[3], r0
359; CHECK-NEXT:    vmov r0, s8
360; CHECK-NEXT:    vmovx.f16 s8, s0
361; CHECK-NEXT:    vmov.16 q1[4], r0
362; CHECK-NEXT:    vmov r0, s1
363; CHECK-NEXT:    vmov.16 q1[5], r0
364; CHECK-NEXT:    vmov r0, s8
365; CHECK-NEXT:    vmov.16 q1[6], r0
366; CHECK-NEXT:    vmov r0, s0
367; CHECK-NEXT:    vmov.16 q1[7], r0
368; CHECK-NEXT:    vmov q0, q1
369; CHECK-NEXT:    bx lr
370entry:
371  %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
372  ret <8 x half> %out
373}
374
375define arm_aapcs_vfpcc <8 x half> @shuffle2_f16(<8 x half> %src) {
376; CHECK-LABEL: shuffle2_f16:
377; CHECK:       @ %bb.0: @ %entry
378; CHECK-NEXT:    bx lr
379entry:
380  %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
381  ret <8 x half> %out
382}
383
384define arm_aapcs_vfpcc <8 x half> @shuffle3_f16(<8 x half> %src) {
385; CHECK-LABEL: shuffle3_f16:
386; CHECK:       @ %bb.0: @ %entry
387; CHECK-NEXT:    vmovx.f16 s4, s3
388; CHECK-NEXT:    vmov r0, s3
389; CHECK-NEXT:    vmov r1, s4
390; CHECK-NEXT:    vmovx.f16 s8, s1
391; CHECK-NEXT:    vmov.16 q1[2], r1
392; CHECK-NEXT:    vmov.16 q1[3], r0
393; CHECK-NEXT:    vmov r0, s8
394; CHECK-NEXT:    vmovx.f16 s8, s0
395; CHECK-NEXT:    vmov.16 q1[4], r0
396; CHECK-NEXT:    vmov r0, s8
397; CHECK-NEXT:    vmov.16 q1[5], r0
398; CHECK-NEXT:    vmov r0, s1
399; CHECK-NEXT:    vmov.16 q1[6], r0
400; CHECK-NEXT:    vmov r0, s0
401; CHECK-NEXT:    vmov.16 q1[7], r0
402; CHECK-NEXT:    vmov.f32 s4, s2
403; CHECK-NEXT:    vmov q0, q1
404; CHECK-NEXT:    bx lr
405entry:
406  %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
407  ret <8 x half> %out
408}
409
410define arm_aapcs_vfpcc <8 x half> @shuffle5_f16(<8 x half> %src) {
411; CHECK-LABEL: shuffle5_f16:
412; CHECK:       @ %bb.0: @ %entry
413; CHECK-NEXT:    vrev64.16 q1, q0
414; CHECK-NEXT:    vmov q0, q1
415; CHECK-NEXT:    bx lr
416entry:
417  %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
418  ret <8 x half> %out
419}
420
421define arm_aapcs_vfpcc <8 x half> @shuffle6_f16(<8 x half> %src) {
422; CHECK-LABEL: shuffle6_f16:
423; CHECK:       @ %bb.0: @ %entry
424; CHECK-NEXT:    vrev32.16 q0, q0
425; CHECK-NEXT:    bx lr
426entry:
427  %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
428  ret <8 x half> %out
429}
430
431define arm_aapcs_vfpcc <2 x double> @shuffle1_f64(<2 x double> %src) {
432; CHECK-LABEL: shuffle1_f64:
433; CHECK:       @ %bb.0: @ %entry
434; CHECK-NEXT:    bx lr
435entry:
436  %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 0, i32 1>
437  ret <2 x double> %out
438}
439
440define arm_aapcs_vfpcc <2 x double> @shuffle2_f64(<2 x double> %src) {
441; CHECK-LABEL: shuffle2_f64:
442; CHECK:       @ %bb.0: @ %entry
443; CHECK-NEXT:    vmov.f32 s4, s2
444; CHECK-NEXT:    vmov.f32 s5, s3
445; CHECK-NEXT:    vmov.f32 s6, s0
446; CHECK-NEXT:    vmov.f32 s7, s1
447; CHECK-NEXT:    vmov q0, q1
448; CHECK-NEXT:    bx lr
449entry:
450  %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 0>
451  ret <2 x double> %out
452}
453
454define arm_aapcs_vfpcc <2 x double> @shuffle3_f64(<2 x double> %src) {
455; CHECK-LABEL: shuffle3_f64:
456; CHECK:       @ %bb.0: @ %entry
457; CHECK-NEXT:    bx lr
458entry:
459  %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 undef, i32 1>
460  ret <2 x double> %out
461}
462
463
464define arm_aapcs_vfpcc <4 x i32> @insert_i32(i32 %a) {
465; CHECK-LABEL: insert_i32:
466; CHECK:       @ %bb.0: @ %entry
467; CHECK-NEXT:    vmov.32 q0[0], r0
468; CHECK-NEXT:    bx lr
469entry:
470  %res = insertelement <4 x i32> undef, i32 %a, i32 0
471  ret <4 x i32> %res
472}
473
474define arm_aapcs_vfpcc <8 x i16> @insert_i16(i16 %a) {
475; CHECK-LABEL: insert_i16:
476; CHECK:       @ %bb.0: @ %entry
477; CHECK-NEXT:    vmov.16 q0[0], r0
478; CHECK-NEXT:    bx lr
479entry:
480  %res = insertelement <8 x i16> undef, i16 %a, i32 0
481  ret <8 x i16> %res
482}
483
484define arm_aapcs_vfpcc <16 x i8> @insert_i8(i8 %a) {
485; CHECK-LABEL: insert_i8:
486; CHECK:       @ %bb.0: @ %entry
487; CHECK-NEXT:    vmov.8 q0[0], r0
488; CHECK-NEXT:    bx lr
489entry:
490  %res = insertelement <16 x i8> undef, i8 %a, i32 0
491  ret <16 x i8> %res
492}
493
494define arm_aapcs_vfpcc <2 x i64> @insert_i64(i64 %a) {
495; CHECK-LABEL: insert_i64:
496; CHECK:       @ %bb.0: @ %entry
497; CHECK-NEXT:    vmov.32 q0[0], r0
498; CHECK-NEXT:    vmov.32 q0[1], r1
499; CHECK-NEXT:    bx lr
500entry:
501  %res = insertelement <2 x i64> undef, i64 %a, i32 0
502  ret <2 x i64> %res
503}
504
505define arm_aapcs_vfpcc <4 x float> @insert_f32(float %a) {
506; CHECK-LABEL: insert_f32:
507; CHECK:       @ %bb.0: @ %entry
508; CHECK-NEXT:    @ kill: def $s0 killed $s0 def $q0
509; CHECK-NEXT:    bx lr
510entry:
511  %res = insertelement <4 x float> undef, float %a, i32 0
512  ret <4 x float> %res
513}
514
515; TODO: Calling convention needs fixing to pass half types directly to functions
516define arm_aapcs_vfpcc <8 x half> @insert_f16(half *%aa) {
517; CHECK-LABEL: insert_f16:
518; CHECK:       @ %bb.0: @ %entry
519; CHECK-NEXT:    vldr.16 s0, [r0]
520; CHECK-NEXT:    bx lr
521entry:
522  %a = load half, half* %aa
523  %res = insertelement <8 x half> undef, half %a, i32 0
524  ret <8 x half> %res
525}
526
527define arm_aapcs_vfpcc <2 x double> @insert_f64(double %a) {
528; CHECK-LABEL: insert_f64:
529; CHECK:       @ %bb.0: @ %entry
530; CHECK-NEXT:    .save {r4, r6, r7, lr}
531; CHECK-NEXT:    push {r4, r6, r7, lr}
532; CHECK-NEXT:    .setfp r7, sp, #8
533; CHECK-NEXT:    add r7, sp, #8
534; CHECK-NEXT:    .pad #16
535; CHECK-NEXT:    sub sp, #16
536; CHECK-NEXT:    mov r4, sp
537; CHECK-NEXT:    bfc r4, #0, #4
538; CHECK-NEXT:    mov sp, r4
539; CHECK-NEXT:    sub.w r4, r7, #8
540; CHECK-NEXT:    vstr d0, [sp]
541; CHECK-NEXT:    mov r0, sp
542; CHECK-NEXT:    vldrw.u32 q0, [r0]
543; CHECK-NEXT:    mov sp, r4
544; CHECK-NEXT:    pop {r4, r6, r7, pc}
545entry:
546  %res = insertelement <2 x double> undef, double %a, i32 0
547  ret <2 x double> %res
548}
549
550define arm_aapcs_vfpcc i64 @scalar_to_vector_i32(<8 x i16> %v) {
551; CHECK-LABEL: scalar_to_vector_i32:
552; CHECK:       @ %bb.0: @ %entry
553; CHECK-NEXT:    .pad #8
554; CHECK-NEXT:    sub sp, #8
555; CHECK-NEXT:    adr r1, .LCPI38_0
556; CHECK-NEXT:    vmov.u16 r0, q0[0]
557; CHECK-NEXT:    vldrw.u32 q1, [r1]
558; CHECK-NEXT:    vmov.32 q0[0], r0
559; CHECK-NEXT:    mov r2, sp
560; CHECK-NEXT:    vmov.f32 s1, s5
561; CHECK-NEXT:    vmov.f32 s2, s6
562; CHECK-NEXT:    vmov.f32 s3, s7
563; CHECK-NEXT:    vstrh.32 q0, [r2]
564; CHECK-NEXT:    ldrd r0, r1, [sp], #8
565; CHECK-NEXT:    bx lr
566; CHECK-NEXT:    .p2align 4
567; CHECK-NEXT:  @ %bb.1:
568; CHECK-NEXT:  .LCPI38_0:
569; CHECK-NEXT:    .zero 4
570; CHECK-NEXT:    .long 7 @ 0x7
571; CHECK-NEXT:    .long 1 @ 0x1
572; CHECK-NEXT:    .long 9 @ 0x9
573entry:
574  %f = shufflevector <8 x i16> %v, <8 x i16> <i16 undef, i16 7, i16 1, i16 9, i16 undef, i16 undef, i16 undef, i16 undef>, <4 x i32> <i32 0, i32 9, i32 10, i32 11>
575  %0 = bitcast <4 x i16> %f to i64
576  ret i64 %0
577}
578
579
580define arm_aapcs_vfpcc i32 @extract_i32_0(<4 x i32> %a) {
581; CHECK-LABEL: extract_i32_0:
582; CHECK:       @ %bb.0: @ %entry
583; CHECK-NEXT:    vmov r0, s0
584; CHECK-NEXT:    bx lr
585entry:
586  %res = extractelement <4 x i32> %a, i32 0
587  ret i32 %res
588}
589
590define arm_aapcs_vfpcc i32 @extract_i32_3(<4 x i32> %a) {
591; CHECK-LABEL: extract_i32_3:
592; CHECK:       @ %bb.0: @ %entry
593; CHECK-NEXT:    vmov r0, s3
594; CHECK-NEXT:    bx lr
595entry:
596  %res = extractelement <4 x i32> %a, i32 3
597  ret i32 %res
598}
599
600define arm_aapcs_vfpcc i16 @extract_i16_0(<8 x i16> %a) {
601; CHECK-LABEL: extract_i16_0:
602; CHECK:       @ %bb.0: @ %entry
603; CHECK-NEXT:    vmov.u16 r0, q0[0]
604; CHECK-NEXT:    bx lr
605entry:
606  %res = extractelement <8 x i16> %a, i32 0
607  ret i16 %res
608}
609
610define arm_aapcs_vfpcc i16 @extract_i16_3(<8 x i16> %a) {
611; CHECK-LABEL: extract_i16_3:
612; CHECK:       @ %bb.0: @ %entry
613; CHECK-NEXT:    vmov.u16 r0, q0[3]
614; CHECK-NEXT:    bx lr
615entry:
616  %res = extractelement <8 x i16> %a, i32 3
617  ret i16 %res
618}
619
620define arm_aapcs_vfpcc i8 @extract_i8_0(<16 x i8> %a) {
621; CHECK-LABEL: extract_i8_0:
622; CHECK:       @ %bb.0: @ %entry
623; CHECK-NEXT:    vmov.u8 r0, q0[0]
624; CHECK-NEXT:    bx lr
625entry:
626  %res = extractelement <16 x i8> %a, i32 0
627  ret i8 %res
628}
629
630define arm_aapcs_vfpcc i8 @extract_i8_3(<16 x i8> %a) {
631; CHECK-LABEL: extract_i8_3:
632; CHECK:       @ %bb.0: @ %entry
633; CHECK-NEXT:    vmov.u8 r0, q0[3]
634; CHECK-NEXT:    bx lr
635entry:
636  %res = extractelement <16 x i8> %a, i32 3
637  ret i8 %res
638}
639
640define arm_aapcs_vfpcc i64 @extract_i64_0(<2 x i64> %a) {
641; CHECK-LABEL: extract_i64_0:
642; CHECK:       @ %bb.0: @ %entry
643; CHECK-NEXT:    vmov r0, s0
644; CHECK-NEXT:    vmov r1, s1
645; CHECK-NEXT:    bx lr
646entry:
647  %res = extractelement <2 x i64> %a, i32 0
648  ret i64 %res
649}
650
651define arm_aapcs_vfpcc i64 @extract_i64_1(<2 x i64> %a) {
652; CHECK-LABEL: extract_i64_1:
653; CHECK:       @ %bb.0: @ %entry
654; CHECK-NEXT:    vmov r0, s2
655; CHECK-NEXT:    vmov r1, s3
656; CHECK-NEXT:    bx lr
657entry:
658  %res = extractelement <2 x i64> %a, i32 1
659  ret i64 %res
660}
661
662define arm_aapcs_vfpcc float @extract_f32_0(<4 x float> %a) {
663; CHECK-LABEL: extract_f32_0:
664; CHECK:       @ %bb.0: @ %entry
665; CHECK-NEXT:    @ kill: def $s0 killed $s0 killed $q0
666; CHECK-NEXT:    bx lr
667entry:
668  %res = extractelement <4 x float> %a, i32 0
669  ret float %res
670}
671
672define arm_aapcs_vfpcc float @extract_f32_3(<4 x float> %a) {
673; CHECK-LABEL: extract_f32_3:
674; CHECK:       @ %bb.0: @ %entry
675; CHECK-NEXT:    vmov.f32 s0, s3
676; CHECK-NEXT:    bx lr
677entry:
678  %res = extractelement <4 x float> %a, i32 3
679  ret float %res
680}
681
682define arm_aapcs_vfpcc half @extract_f16_0(<8 x half> %a) {
683; CHECK-LABEL: extract_f16_0:
684; CHECK:       @ %bb.0: @ %entry
685; CHECK-NEXT:    @ kill: def $s0 killed $s0 killed $q0
686; CHECK-NEXT:    bx lr
687entry:
688  %res = extractelement <8 x half> %a, i32 0
689  ret half %res
690}
691
692define arm_aapcs_vfpcc half @extract_f16_3(<8 x half> %a) {
693; CHECK-LABEL: extract_f16_3:
694; CHECK:       @ %bb.0: @ %entry
695; CHECK-NEXT:    vmovx.f16 s0, s1
696; CHECK-NEXT:    bx lr
697entry:
698  %res = extractelement <8 x half> %a, i32 3
699  ret half %res
700}
701
702define arm_aapcs_vfpcc double @extract_f64_0(<2 x double> %a) {
703; CHECK-LABEL: extract_f64_0:
704; CHECK:       @ %bb.0: @ %entry
705; CHECK-NEXT:    @ kill: def $d0 killed $d0 killed $q0
706; CHECK-NEXT:    bx lr
707entry:
708  %res = extractelement <2 x double> %a, i32 0
709  ret double %res
710}
711
712define arm_aapcs_vfpcc double @extract_f64_1(<2 x double> %a) {
713; CHECK-LABEL: extract_f64_1:
714; CHECK:       @ %bb.0: @ %entry
715; CHECK-NEXT:    vmov.f32 s0, s2
716; CHECK-NEXT:    vmov.f32 s1, s3
717; CHECK-NEXT:    bx lr
718entry:
719  %res = extractelement <2 x double> %a, i32 1
720  ret double %res
721}
722
723