• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s
3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
4
5; i16
6
7define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_45670123(<8 x i16> %s1, <8 x i16> %s2) {
8; CHECK-LABEL: shuffle_i16_45670123:
9; CHECK:       @ %bb.0: @ %entry
10; CHECK-NEXT:    vmov.f32 s4, s2
11; CHECK-NEXT:    vmov.f32 s5, s3
12; CHECK-NEXT:    vmov.f32 s6, s0
13; CHECK-NEXT:    vmov.f32 s7, s1
14; CHECK-NEXT:    vmov q0, q1
15; CHECK-NEXT:    bx lr
16entry:
17  %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
18  ret <8 x i16> %out
19}
20
21define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_67452301(<8 x i16> %s1, <8 x i16> %s2) {
22; CHECK-LABEL: shuffle_i16_67452301:
23; CHECK:       @ %bb.0: @ %entry
24; CHECK-NEXT:    vmov.f32 s4, s3
25; CHECK-NEXT:    vmov.f32 s5, s2
26; CHECK-NEXT:    vmov.f32 s6, s1
27; CHECK-NEXT:    vmov.f32 s7, s0
28; CHECK-NEXT:    vmov q0, q1
29; CHECK-NEXT:    bx lr
30entry:
31  %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
32  ret <8 x i16> %out
33}
34
35define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_76543210(<8 x i16> %s1, <8 x i16> %s2) {
36; CHECK-LABEL: shuffle_i16_76543210:
37; CHECK:       @ %bb.0: @ %entry
38; CHECK-NEXT:    vmov q1, q0
39; CHECK-NEXT:    vmov.u16 r0, q0[7]
40; CHECK-NEXT:    vmov.16 q0[0], r0
41; CHECK-NEXT:    vmov.u16 r0, q1[6]
42; CHECK-NEXT:    vmov.16 q0[1], r0
43; CHECK-NEXT:    vmov.u16 r0, q1[5]
44; CHECK-NEXT:    vmov.16 q0[2], r0
45; CHECK-NEXT:    vmov.u16 r0, q1[4]
46; CHECK-NEXT:    vmov.16 q0[3], r0
47; CHECK-NEXT:    vmov.u16 r0, q1[3]
48; CHECK-NEXT:    vmov.16 q0[4], r0
49; CHECK-NEXT:    vmov.u16 r0, q1[2]
50; CHECK-NEXT:    vmov.16 q0[5], r0
51; CHECK-NEXT:    vmov.u16 r0, q1[1]
52; CHECK-NEXT:    vmov.16 q0[6], r0
53; CHECK-NEXT:    vmov.u16 r0, q1[0]
54; CHECK-NEXT:    vmov.16 q0[7], r0
55; CHECK-NEXT:    bx lr
56entry:
57  %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
58  ret <8 x i16> %out
59}
60
61define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_01234567(<8 x i16> %s1, <8 x i16> %s2) {
62; CHECK-LABEL: shuffle_i16_01234567:
63; CHECK:       @ %bb.0: @ %entry
64; CHECK-NEXT:    bx lr
65entry:
66  %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
67  ret <8 x i16> %out
68}
69
70define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_0123cdef(<8 x i16> %s1, <8 x i16> %s2) {
71; CHECK-LABEL: shuffle_i16_0123cdef:
72; CHECK:       @ %bb.0: @ %entry
73; CHECK-NEXT:    vmov.f32 s2, s6
74; CHECK-NEXT:    vmov.f32 s3, s7
75; CHECK-NEXT:    bx lr
76entry:
77  %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
78  ret <8 x i16> %out
79}
80
81define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_u7u5u3u1(<8 x i16> %s1, <8 x i16> %s2) {
82; CHECK-LABEL: shuffle_i16_u7u5u3u1:
83; CHECK:       @ %bb.0: @ %entry
84; CHECK-NEXT:    vmov.f32 s4, s3
85; CHECK-NEXT:    vmov.f32 s5, s2
86; CHECK-NEXT:    vmov.f32 s6, s1
87; CHECK-NEXT:    vmov.f32 s7, s0
88; CHECK-NEXT:    vmov q0, q1
89; CHECK-NEXT:    bx lr
90entry:
91  %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 undef, i32 7, i32 undef, i32 5, i32 undef, i32 3, i32 undef, i32 1>
92  ret <8 x i16> %out
93}
94
95define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_6u4u2u0u(<8 x i16> %s1, <8 x i16> %s2) {
96; CHECK-LABEL: shuffle_i16_6u4u2u0u:
97; CHECK:       @ %bb.0: @ %entry
98; CHECK-NEXT:    vmov.f32 s4, s3
99; CHECK-NEXT:    vmov.f32 s5, s2
100; CHECK-NEXT:    vmov.f32 s6, s1
101; CHECK-NEXT:    vmov.f32 s7, s0
102; CHECK-NEXT:    vmov q0, q1
103; CHECK-NEXT:    bx lr
104entry:
105  %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 6, i32 undef, i32 4, i32 undef, i32 2, i32 undef, i32 0, i32 undef>
106  ret <8 x i16> %out
107}
108
109define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_0uuuuuuu(<8 x i16> %s1, <8 x i16> %s2) {
110; CHECK-LABEL: shuffle_i16_0uuuuuuu:
111; CHECK:       @ %bb.0: @ %entry
112; CHECK-NEXT:    bx lr
113entry:
114  %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
115  ret <8 x i16> %out
116}
117
118define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_uuuu0uuu(<8 x i16> %s1, <8 x i16> %s2) {
119; CHECK-LABEL: shuffle_i16_uuuu0uuu:
120; CHECK:       @ %bb.0: @ %entry
121; CHECK-NEXT:    vmov.u16 r0, q0[0]
122; CHECK-NEXT:    vdup.16 q0, r0
123; CHECK-NEXT:    bx lr
124entry:
125  %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef>
126  ret <8 x i16> %out
127}
128
129
130; i8
131
132define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cdef89ab45670123(<16 x i8> %s1, <16 x i8> %s2) {
133; CHECK-LABEL: shuffle_i8_cdef89ab45670123:
134; CHECK:       @ %bb.0: @ %entry
135; CHECK-NEXT:    vmov.f32 s4, s3
136; CHECK-NEXT:    vmov.f32 s5, s2
137; CHECK-NEXT:    vmov.f32 s6, s1
138; CHECK-NEXT:    vmov.f32 s7, s0
139; CHECK-NEXT:    vmov q0, q1
140; CHECK-NEXT:    bx lr
141entry:
142  %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
143  ret <16 x i8> %out
144}
145
146define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_efcdab8967452301(<16 x i8> %s1, <16 x i8> %s2) {
147; CHECK-LABEL: shuffle_i8_efcdab8967452301:
148; CHECK:       @ %bb.0: @ %entry
149; CHECK-NEXT:    vmov q1, q0
150; CHECK-NEXT:    vmov.u8 r0, q0[14]
151; CHECK-NEXT:    vmov.8 q0[0], r0
152; CHECK-NEXT:    vmov.u8 r0, q1[15]
153; CHECK-NEXT:    vmov.8 q0[1], r0
154; CHECK-NEXT:    vmov.u8 r0, q1[12]
155; CHECK-NEXT:    vmov.8 q0[2], r0
156; CHECK-NEXT:    vmov.u8 r0, q1[13]
157; CHECK-NEXT:    vmov.8 q0[3], r0
158; CHECK-NEXT:    vmov.u8 r0, q1[10]
159; CHECK-NEXT:    vmov.8 q0[4], r0
160; CHECK-NEXT:    vmov.u8 r0, q1[11]
161; CHECK-NEXT:    vmov.8 q0[5], r0
162; CHECK-NEXT:    vmov.u8 r0, q1[8]
163; CHECK-NEXT:    vmov.8 q0[6], r0
164; CHECK-NEXT:    vmov.u8 r0, q1[9]
165; CHECK-NEXT:    vmov.8 q0[7], r0
166; CHECK-NEXT:    vmov.u8 r0, q1[6]
167; CHECK-NEXT:    vmov.8 q0[8], r0
168; CHECK-NEXT:    vmov.u8 r0, q1[7]
169; CHECK-NEXT:    vmov.8 q0[9], r0
170; CHECK-NEXT:    vmov.u8 r0, q1[4]
171; CHECK-NEXT:    vmov.8 q0[10], r0
172; CHECK-NEXT:    vmov.u8 r0, q1[5]
173; CHECK-NEXT:    vmov.8 q0[11], r0
174; CHECK-NEXT:    vmov.u8 r0, q1[2]
175; CHECK-NEXT:    vmov.8 q0[12], r0
176; CHECK-NEXT:    vmov.u8 r0, q1[3]
177; CHECK-NEXT:    vmov.8 q0[13], r0
178; CHECK-NEXT:    vmov.u8 r0, q1[0]
179; CHECK-NEXT:    vmov.8 q0[14], r0
180; CHECK-NEXT:    vmov.u8 r0, q1[1]
181; CHECK-NEXT:    vmov.8 q0[15], r0
182; CHECK-NEXT:    bx lr
183entry:
184  %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9, i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
185  ret <16 x i8> %out
186}
187
188define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_fedcba9876543210(<16 x i8> %s1, <16 x i8> %s2) {
189; CHECK-LABEL: shuffle_i8_fedcba9876543210:
190; CHECK:       @ %bb.0: @ %entry
191; CHECK-NEXT:    vmov q1, q0
192; CHECK-NEXT:    vmov.u8 r0, q0[15]
193; CHECK-NEXT:    vmov.8 q0[0], r0
194; CHECK-NEXT:    vmov.u8 r0, q1[14]
195; CHECK-NEXT:    vmov.8 q0[1], r0
196; CHECK-NEXT:    vmov.u8 r0, q1[13]
197; CHECK-NEXT:    vmov.8 q0[2], r0
198; CHECK-NEXT:    vmov.u8 r0, q1[12]
199; CHECK-NEXT:    vmov.8 q0[3], r0
200; CHECK-NEXT:    vmov.u8 r0, q1[11]
201; CHECK-NEXT:    vmov.8 q0[4], r0
202; CHECK-NEXT:    vmov.u8 r0, q1[10]
203; CHECK-NEXT:    vmov.8 q0[5], r0
204; CHECK-NEXT:    vmov.u8 r0, q1[9]
205; CHECK-NEXT:    vmov.8 q0[6], r0
206; CHECK-NEXT:    vmov.u8 r0, q1[8]
207; CHECK-NEXT:    vmov.8 q0[7], r0
208; CHECK-NEXT:    vmov.u8 r0, q1[7]
209; CHECK-NEXT:    vmov.8 q0[8], r0
210; CHECK-NEXT:    vmov.u8 r0, q1[6]
211; CHECK-NEXT:    vmov.8 q0[9], r0
212; CHECK-NEXT:    vmov.u8 r0, q1[5]
213; CHECK-NEXT:    vmov.8 q0[10], r0
214; CHECK-NEXT:    vmov.u8 r0, q1[4]
215; CHECK-NEXT:    vmov.8 q0[11], r0
216; CHECK-NEXT:    vmov.u8 r0, q1[3]
217; CHECK-NEXT:    vmov.8 q0[12], r0
218; CHECK-NEXT:    vmov.u8 r0, q1[2]
219; CHECK-NEXT:    vmov.8 q0[13], r0
220; CHECK-NEXT:    vmov.u8 r0, q1[1]
221; CHECK-NEXT:    vmov.8 q0[14], r0
222; CHECK-NEXT:    vmov.u8 r0, q1[0]
223; CHECK-NEXT:    vmov.8 q0[15], r0
224; CHECK-NEXT:    bx lr
225entry:
226  %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
227  ret <16 x i8> %out
228}
229
230define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_0123456789abcdef(<16 x i8> %s1, <16 x i8> %s2) {
231; CHECK-LABEL: shuffle_i8_0123456789abcdef:
232; CHECK:       @ %bb.0: @ %entry
233; CHECK-NEXT:    bx lr
234entry:
235  %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
236  ret <16 x i8> %out
237}
238
239define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_0123ghij4567klmn(<16 x i8> %s1, <16 x i8> %s2) {
240; CHECK-LABEL: shuffle_i8_0123ghij4567klmn:
241; CHECK:       @ %bb.0: @ %entry
242; CHECK-NEXT:    vmov.f32 s8, s0
243; CHECK-NEXT:    vmov.f32 s9, s4
244; CHECK-NEXT:    vmov.f32 s10, s1
245; CHECK-NEXT:    vmov.f32 s11, s5
246; CHECK-NEXT:    vmov q0, q2
247; CHECK-NEXT:    bx lr
248entry:
249  %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23>
250  ret <16 x i8> %out
251}
252
253define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cdeu89ub4u67u123(<16 x i8> %s1, <16 x i8> %s2) {
254; CHECK-LABEL: shuffle_i8_cdeu89ub4u67u123:
255; CHECK:       @ %bb.0: @ %entry
256; CHECK-NEXT:    vmov.f32 s4, s3
257; CHECK-NEXT:    vmov.f32 s5, s2
258; CHECK-NEXT:    vmov.f32 s6, s1
259; CHECK-NEXT:    vmov.f32 s7, s0
260; CHECK-NEXT:    vmov q0, q1
261; CHECK-NEXT:    bx lr
262entry:
263  %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 13, i32 14, i32 undef, i32 8, i32 9, i32 undef, i32 11, i32 4, i32 undef, i32 6, i32 7, i32 undef, i32 1, i32 2, i32 3>
264  ret <16 x i8> %out
265}
266
267define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cduu8uubuu67u12u(<16 x i8> %s1, <16 x i8> %s2) {
268; CHECK-LABEL: shuffle_i8_cduu8uubuu67u12u:
269; CHECK:       @ %bb.0: @ %entry
270; CHECK-NEXT:    vmov.f32 s4, s3
271; CHECK-NEXT:    vmov.f32 s5, s2
272; CHECK-NEXT:    vmov.f32 s6, s1
273; CHECK-NEXT:    vmov.f32 s7, s0
274; CHECK-NEXT:    vmov q0, q1
275; CHECK-NEXT:    bx lr
276entry:
277  %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 13, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 1, i32 2, i32 undef>
278  ret <16 x i8> %out
279}
280
281define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cuuuuuubuu6uuu2u(<16 x i8> %s1, <16 x i8> %s2) {
282; CHECK-LABEL: shuffle_i8_cuuuuuubuu6uuu2u:
283; CHECK:       @ %bb.0: @ %entry
284; CHECK-NEXT:    vmov.f32 s4, s3
285; CHECK-NEXT:    vmov.f32 s5, s2
286; CHECK-NEXT:    vmov.f32 s6, s1
287; CHECK-NEXT:    vmov.f32 s7, s0
288; CHECK-NEXT:    vmov q0, q1
289; CHECK-NEXT:    bx lr
290entry:
291  %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 6, i32 undef, i32 undef, i32 undef, i32 2, i32 undef>
292  ret <16 x i8> %out
293}
294
295define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cdef89ab45u700123(<16 x i8> %s1, <16 x i8> %s2) {
296; CHECK-LABEL: shuffle_i8_cdef89ab45u700123:
297; CHECK:       @ %bb.0: @ %entry
298; CHECK-NEXT:    vmov.u8 r0, q0[4]
299; CHECK-NEXT:    vmov.8 q1[8], r0
300; CHECK-NEXT:    vmov.u8 r0, q0[5]
301; CHECK-NEXT:    vmov.8 q1[9], r0
302; CHECK-NEXT:    vmov.u8 r0, q0[0]
303; CHECK-NEXT:    vmov.8 q1[11], r0
304; CHECK-NEXT:    vmov.f32 s4, s3
305; CHECK-NEXT:    vmov.f32 s5, s2
306; CHECK-NEXT:    vmov.f32 s7, s0
307; CHECK-NEXT:    vmov q0, q1
308; CHECK-NEXT:    bx lr
309entry:
310  %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 undef, i32 0, i32 0, i32 1, i32 2, i32 3>
311  ret <16 x i8> %out
312}
313
314
315
316; f16
317
318define arm_aapcs_vfpcc <8 x half> @shuffle_f16_45670123(<8 x half> %s1, <8 x half> %s2) {
319; CHECK-LABEL: shuffle_f16_45670123:
320; CHECK:       @ %bb.0: @ %entry
321; CHECK-NEXT:    vmov.f32 s4, s2
322; CHECK-NEXT:    vmov.f32 s5, s3
323; CHECK-NEXT:    vmov.f32 s6, s0
324; CHECK-NEXT:    vmov.f32 s7, s1
325; CHECK-NEXT:    vmov q0, q1
326; CHECK-NEXT:    bx lr
327entry:
328  %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
329  ret <8 x half> %out
330}
331
332define arm_aapcs_vfpcc <8 x half> @shuffle_f16_67452301(<8 x half> %s1, <8 x half> %s2) {
333; CHECK-LABEL: shuffle_f16_67452301:
334; CHECK:       @ %bb.0: @ %entry
335; CHECK-NEXT:    vmov.f32 s4, s3
336; CHECK-NEXT:    vmov.f32 s5, s2
337; CHECK-NEXT:    vmov.f32 s6, s1
338; CHECK-NEXT:    vmov.f32 s7, s0
339; CHECK-NEXT:    vmov q0, q1
340; CHECK-NEXT:    bx lr
341entry:
342  %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
343  ret <8 x half> %out
344}
345
346define arm_aapcs_vfpcc <8 x half> @shuffle_f16_76543210(<8 x half> %s1, <8 x half> %s2) {
347; CHECK-LABEL: shuffle_f16_76543210:
348; CHECK:       @ %bb.0: @ %entry
349; CHECK-NEXT:    vmovx.f16 s4, s3
350; CHECK-NEXT:    vmov r0, s3
351; CHECK-NEXT:    vmov r1, s4
352; CHECK-NEXT:    vmovx.f16 s8, s2
353; CHECK-NEXT:    vmov.16 q1[0], r1
354; CHECK-NEXT:    vmov.16 q1[1], r0
355; CHECK-NEXT:    vmov r0, s8
356; CHECK-NEXT:    vmov.16 q1[2], r0
357; CHECK-NEXT:    vmov r0, s2
358; CHECK-NEXT:    vmovx.f16 s8, s1
359; CHECK-NEXT:    vmov.16 q1[3], r0
360; CHECK-NEXT:    vmov r0, s8
361; CHECK-NEXT:    vmovx.f16 s8, s0
362; CHECK-NEXT:    vmov.16 q1[4], r0
363; CHECK-NEXT:    vmov r0, s1
364; CHECK-NEXT:    vmov.16 q1[5], r0
365; CHECK-NEXT:    vmov r0, s8
366; CHECK-NEXT:    vmov.16 q1[6], r0
367; CHECK-NEXT:    vmov r0, s0
368; CHECK-NEXT:    vmov.16 q1[7], r0
369; CHECK-NEXT:    vmov q0, q1
370; CHECK-NEXT:    bx lr
371entry:
372  %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
373  ret <8 x half> %out
374}
375
376define arm_aapcs_vfpcc <8 x half> @shuffle_f16_01234567(<8 x half> %s1, <8 x half> %s2) {
377; CHECK-LABEL: shuffle_f16_01234567:
378; CHECK:       @ %bb.0: @ %entry
379; CHECK-NEXT:    bx lr
380entry:
381  %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
382  ret <8 x half> %out
383}
384
385define arm_aapcs_vfpcc <8 x half> @shuffle_f16_0123cdef(<8 x half> %s1, <8 x half> %s2) {
386; CHECK-LABEL: shuffle_f16_0123cdef:
387; CHECK:       @ %bb.0: @ %entry
388; CHECK-NEXT:    vmov.f32 s2, s6
389; CHECK-NEXT:    vmov.f32 s3, s7
390; CHECK-NEXT:    bx lr
391entry:
392  %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
393  ret <8 x half> %out
394}
395
396define arm_aapcs_vfpcc <8 x half> @shuffle_f16_u7u5u3u1(<8 x half> %s1, <8 x half> %s2) {
397; CHECK-LABEL: shuffle_f16_u7u5u3u1:
398; CHECK:       @ %bb.0: @ %entry
399; CHECK-NEXT:    vmov.f32 s4, s3
400; CHECK-NEXT:    vmov.f32 s5, s2
401; CHECK-NEXT:    vmov.f32 s6, s1
402; CHECK-NEXT:    vmov.f32 s7, s0
403; CHECK-NEXT:    vmov q0, q1
404; CHECK-NEXT:    bx lr
405entry:
406  %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 undef, i32 7, i32 undef, i32 5, i32 undef, i32 3, i32 undef, i32 1>
407  ret <8 x half> %out
408}
409
410define arm_aapcs_vfpcc <8 x half> @shuffle_f16_6u4u2u0u(<8 x half> %s1, <8 x half> %s2) {
411; CHECK-LABEL: shuffle_f16_6u4u2u0u:
412; CHECK:       @ %bb.0: @ %entry
413; CHECK-NEXT:    vmov.f32 s4, s3
414; CHECK-NEXT:    vmov.f32 s5, s2
415; CHECK-NEXT:    vmov.f32 s6, s1
416; CHECK-NEXT:    vmov.f32 s7, s0
417; CHECK-NEXT:    vmov q0, q1
418; CHECK-NEXT:    bx lr
419entry:
420  %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 6, i32 undef, i32 4, i32 undef, i32 2, i32 undef, i32 0, i32 undef>
421  ret <8 x half> %out
422}
423
424define arm_aapcs_vfpcc <8 x half> @shuffle_f16_0uuuuuuu(<8 x half> %s1, <8 x half> %s2) {
425; CHECK-LABEL: shuffle_f16_0uuuuuuu:
426; CHECK:       @ %bb.0: @ %entry
427; CHECK-NEXT:    bx lr
428entry:
429  %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
430  ret <8 x half> %out
431}
432
433define arm_aapcs_vfpcc <8 x half> @shuffle_f16_uuuu0uuu(<8 x half> %s1, <8 x half> %s2) {
434; CHECK-LABEL: shuffle_f16_uuuu0uuu:
435; CHECK:       @ %bb.0: @ %entry
436; CHECK-NEXT:    vmov.u16 r0, q0[0]
437; CHECK-NEXT:    vdup.16 q0, r0
438; CHECK-NEXT:    bx lr
439entry:
440  %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef>
441  ret <8 x half> %out
442}
443