• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
3
4define <4 x i32> @shuffle1_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
5; CHECK-LABEL: shuffle1_v4i32:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vmov d1, r2, r3
8; CHECK-NEXT:    vmov d0, r0, r1
9; CHECK-NEXT:    vcmp.i32 eq, q0, zr
10; CHECK-NEXT:    vmrs r0, p0
11; CHECK-NEXT:    rbit r0, r0
12; CHECK-NEXT:    lsrs r0, r0, #16
13; CHECK-NEXT:    vmsr p0, r0
14; CHECK-NEXT:    add r0, sp, #16
15; CHECK-NEXT:    vldrw.u32 q0, [r0]
16; CHECK-NEXT:    mov r0, sp
17; CHECK-NEXT:    vldrw.u32 q1, [r0]
18; CHECK-NEXT:    vpsel q0, q1, q0
19; CHECK-NEXT:    vmov r0, r1, d0
20; CHECK-NEXT:    vmov r2, r3, d1
21; CHECK-NEXT:    bx lr
22entry:
23  %c = icmp eq <4 x i32> %src, zeroinitializer
24  %sh = shufflevector <4 x i1> %c, <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
25  %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b
26  ret <4 x i32> %s
27}
28
29define <8 x i16> @shuffle1_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) {
30; CHECK-LABEL: shuffle1_v8i16:
31; CHECK:       @ %bb.0: @ %entry
32; CHECK-NEXT:    vmov d1, r2, r3
33; CHECK-NEXT:    vmov d0, r0, r1
34; CHECK-NEXT:    vcmp.i16 eq, q0, zr
35; CHECK-NEXT:    vmrs r0, p0
36; CHECK-NEXT:    rbit r0, r0
37; CHECK-NEXT:    lsrs r0, r0, #16
38; CHECK-NEXT:    vmsr p0, r0
39; CHECK-NEXT:    add r0, sp, #16
40; CHECK-NEXT:    vldrw.u32 q0, [r0]
41; CHECK-NEXT:    mov r0, sp
42; CHECK-NEXT:    vldrw.u32 q1, [r0]
43; CHECK-NEXT:    vpsel q0, q1, q0
44; CHECK-NEXT:    vmov r0, r1, d0
45; CHECK-NEXT:    vmov r2, r3, d1
46; CHECK-NEXT:    bx lr
47entry:
48  %c = icmp eq <8 x i16> %src, zeroinitializer
49  %sh = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
50  %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b
51  ret <8 x i16> %s
52}
53
54define <16 x i8> @shuffle1_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) {
55; CHECK-LABEL: shuffle1_v16i8:
56; CHECK:       @ %bb.0: @ %entry
57; CHECK-NEXT:    vmov d1, r2, r3
58; CHECK-NEXT:    vmov d0, r0, r1
59; CHECK-NEXT:    vcmp.i8 eq, q0, zr
60; CHECK-NEXT:    vmrs r0, p0
61; CHECK-NEXT:    rbit r0, r0
62; CHECK-NEXT:    lsrs r0, r0, #16
63; CHECK-NEXT:    vmsr p0, r0
64; CHECK-NEXT:    add r0, sp, #16
65; CHECK-NEXT:    vldrw.u32 q0, [r0]
66; CHECK-NEXT:    mov r0, sp
67; CHECK-NEXT:    vldrw.u32 q1, [r0]
68; CHECK-NEXT:    vpsel q0, q1, q0
69; CHECK-NEXT:    vmov r0, r1, d0
70; CHECK-NEXT:    vmov r2, r3, d1
71; CHECK-NEXT:    bx lr
72entry:
73  %c = icmp eq <16 x i8> %src, zeroinitializer
74  %sh = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
75  %s = select <16 x i1> %sh, <16 x i8> %a, <16 x i8> %b
76  ret <16 x i8> %s
77}
78
79define <4 x i32> @shuffle2_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
80; CHECK-LABEL: shuffle2_v4i32:
81; CHECK:       @ %bb.0: @ %entry
82; CHECK-NEXT:    vmov d1, r2, r3
83; CHECK-NEXT:    vmov d0, r0, r1
84; CHECK-NEXT:    add r0, sp, #16
85; CHECK-NEXT:    vcmp.i32 eq, q0, zr
86; CHECK-NEXT:    vldrw.u32 q0, [r0]
87; CHECK-NEXT:    mov r0, sp
88; CHECK-NEXT:    vldrw.u32 q1, [r0]
89; CHECK-NEXT:    vpsel q0, q1, q0
90; CHECK-NEXT:    vmov r0, r1, d0
91; CHECK-NEXT:    vmov r2, r3, d1
92; CHECK-NEXT:    bx lr
93entry:
94  %c = icmp eq <4 x i32> %src, zeroinitializer
95  %sh = shufflevector <4 x i1> %c, <4 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
96  %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b
97  ret <4 x i32> %s
98}
99
100define <8 x i16> @shuffle2_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) {
101; CHECK-LABEL: shuffle2_v8i16:
102; CHECK:       @ %bb.0: @ %entry
103; CHECK-NEXT:    vmov d1, r2, r3
104; CHECK-NEXT:    vmov d0, r0, r1
105; CHECK-NEXT:    add r0, sp, #16
106; CHECK-NEXT:    vcmp.i16 eq, q0, zr
107; CHECK-NEXT:    vldrw.u32 q0, [r0]
108; CHECK-NEXT:    mov r0, sp
109; CHECK-NEXT:    vldrw.u32 q1, [r0]
110; CHECK-NEXT:    vpsel q0, q1, q0
111; CHECK-NEXT:    vmov r0, r1, d0
112; CHECK-NEXT:    vmov r2, r3, d1
113; CHECK-NEXT:    bx lr
114entry:
115  %c = icmp eq <8 x i16> %src, zeroinitializer
116  %sh = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
117  %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b
118  ret <8 x i16> %s
119}
120
121define <16 x i8> @shuffle2_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) {
122; CHECK-LABEL: shuffle2_v16i8:
123; CHECK:       @ %bb.0: @ %entry
124; CHECK-NEXT:    vmov d1, r2, r3
125; CHECK-NEXT:    vmov d0, r0, r1
126; CHECK-NEXT:    add r0, sp, #16
127; CHECK-NEXT:    vcmp.i8 eq, q0, zr
128; CHECK-NEXT:    vldrw.u32 q0, [r0]
129; CHECK-NEXT:    mov r0, sp
130; CHECK-NEXT:    vldrw.u32 q1, [r0]
131; CHECK-NEXT:    vpsel q0, q1, q0
132; CHECK-NEXT:    vmov r0, r1, d0
133; CHECK-NEXT:    vmov r2, r3, d1
134; CHECK-NEXT:    bx lr
135entry:
136  %c = icmp eq <16 x i8> %src, zeroinitializer
137  %sh = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
138  %s = select <16 x i1> %sh, <16 x i8> %a, <16 x i8> %b
139  ret <16 x i8> %s
140}
141
142define <4 x i32> @shuffle3_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
143; CHECK-LABEL: shuffle3_v4i32:
144; CHECK:       @ %bb.0: @ %entry
145; CHECK-NEXT:    vmov d1, r2, r3
146; CHECK-NEXT:    vmov.i8 q1, #0xff
147; CHECK-NEXT:    vmov d0, r0, r1
148; CHECK-NEXT:    vcmp.i32 eq, q0, zr
149; CHECK-NEXT:    vmov.i8 q0, #0x0
150; CHECK-NEXT:    vpsel q0, q1, q0
151; CHECK-NEXT:    vmov r0, s0
152; CHECK-NEXT:    vdup.32 q0, r0
153; CHECK-NEXT:    add r0, sp, #16
154; CHECK-NEXT:    vcmp.i32 ne, q0, zr
155; CHECK-NEXT:    vldrw.u32 q0, [r0]
156; CHECK-NEXT:    mov r0, sp
157; CHECK-NEXT:    vldrw.u32 q1, [r0]
158; CHECK-NEXT:    vpsel q0, q1, q0
159; CHECK-NEXT:    vmov r0, r1, d0
160; CHECK-NEXT:    vmov r2, r3, d1
161; CHECK-NEXT:    bx lr
162entry:
163  %c = icmp eq <4 x i32> %src, zeroinitializer
164  %sh = shufflevector <4 x i1> %c, <4 x i1> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
165  %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b
166  ret <4 x i32> %s
167}
168
169define <8 x i16> @shuffle3_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) {
170; CHECK-LABEL: shuffle3_v8i16:
171; CHECK:       @ %bb.0: @ %entry
172; CHECK-NEXT:    vmov d1, r2, r3
173; CHECK-NEXT:    vmov.i8 q1, #0xff
174; CHECK-NEXT:    vmov d0, r0, r1
175; CHECK-NEXT:    vcmp.i16 eq, q0, zr
176; CHECK-NEXT:    vmov.i8 q0, #0x0
177; CHECK-NEXT:    vpsel q0, q1, q0
178; CHECK-NEXT:    vmov.u16 r0, q0[0]
179; CHECK-NEXT:    vdup.16 q0, r0
180; CHECK-NEXT:    add r0, sp, #16
181; CHECK-NEXT:    vcmp.i16 ne, q0, zr
182; CHECK-NEXT:    vldrw.u32 q0, [r0]
183; CHECK-NEXT:    mov r0, sp
184; CHECK-NEXT:    vldrw.u32 q1, [r0]
185; CHECK-NEXT:    vpsel q0, q1, q0
186; CHECK-NEXT:    vmov r0, r1, d0
187; CHECK-NEXT:    vmov r2, r3, d1
188; CHECK-NEXT:    bx lr
189entry:
190  %c = icmp eq <8 x i16> %src, zeroinitializer
191  %sh = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
192  %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b
193  ret <8 x i16> %s
194}
195
196define <16 x i8> @shuffle3_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) {
197; CHECK-LABEL: shuffle3_v16i8:
198; CHECK:       @ %bb.0: @ %entry
199; CHECK-NEXT:    vmov d1, r2, r3
200; CHECK-NEXT:    vmov.i8 q1, #0xff
201; CHECK-NEXT:    vmov d0, r0, r1
202; CHECK-NEXT:    vcmp.i8 eq, q0, zr
203; CHECK-NEXT:    vmov.i8 q0, #0x0
204; CHECK-NEXT:    vpsel q0, q1, q0
205; CHECK-NEXT:    vmov.u8 r0, q0[0]
206; CHECK-NEXT:    vdup.8 q0, r0
207; CHECK-NEXT:    add r0, sp, #16
208; CHECK-NEXT:    vcmp.i8 ne, q0, zr
209; CHECK-NEXT:    vldrw.u32 q0, [r0]
210; CHECK-NEXT:    mov r0, sp
211; CHECK-NEXT:    vldrw.u32 q1, [r0]
212; CHECK-NEXT:    vpsel q0, q1, q0
213; CHECK-NEXT:    vmov r0, r1, d0
214; CHECK-NEXT:    vmov r2, r3, d1
215; CHECK-NEXT:    bx lr
216entry:
217  %c = icmp eq <16 x i8> %src, zeroinitializer
218  %sh = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
219  %s = select <16 x i1> %sh, <16 x i8> %a, <16 x i8> %b
220  ret <16 x i8> %s
221}
222
223define <4 x i32> @shuffle4_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
224; CHECK-LABEL: shuffle4_v4i32:
225; CHECK:       @ %bb.0: @ %entry
226; CHECK-NEXT:    vmov d1, r2, r3
227; CHECK-NEXT:    vmov.i8 q1, #0xff
228; CHECK-NEXT:    vmov d0, r0, r1
229; CHECK-NEXT:    add r0, sp, #16
230; CHECK-NEXT:    vcmp.i32 eq, q0, zr
231; CHECK-NEXT:    vmov.i8 q0, #0x0
232; CHECK-NEXT:    vpsel q0, q1, q0
233; CHECK-NEXT:    vmov.f32 s4, s0
234; CHECK-NEXT:    vmov.f32 s5, s0
235; CHECK-NEXT:    vmov.f32 s6, s0
236; CHECK-NEXT:    vmov.f32 s7, s1
237; CHECK-NEXT:    vldrw.u32 q0, [r0]
238; CHECK-NEXT:    mov r0, sp
239; CHECK-NEXT:    vcmp.i32 ne, q1, zr
240; CHECK-NEXT:    vldrw.u32 q1, [r0]
241; CHECK-NEXT:    vpsel q0, q1, q0
242; CHECK-NEXT:    vmov r0, r1, d0
243; CHECK-NEXT:    vmov r2, r3, d1
244; CHECK-NEXT:    bx lr
245entry:
246  %c = icmp eq <4 x i32> %src, zeroinitializer
247  %sh = shufflevector <4 x i1> %c, <4 x i1> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
248  %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b
249  ret <4 x i32> %s
250}
251
252define <8 x i16> @shuffle4_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) {
253; CHECK-LABEL: shuffle4_v8i16:
254; CHECK:       @ %bb.0: @ %entry
255; CHECK-NEXT:    vmov d1, r2, r3
256; CHECK-NEXT:    vmov.i8 q1, #0xff
257; CHECK-NEXT:    vmov d0, r0, r1
258; CHECK-NEXT:    vcmp.i16 eq, q0, zr
259; CHECK-NEXT:    vmov.i8 q0, #0x0
260; CHECK-NEXT:    vpsel q0, q1, q0
261; CHECK-NEXT:    vmov.u16 r0, q0[0]
262; CHECK-NEXT:    vdup.16 q1, r0
263; CHECK-NEXT:    add r0, sp, #16
264; CHECK-NEXT:    vmov.f32 s7, s0
265; CHECK-NEXT:    vldrw.u32 q0, [r0]
266; CHECK-NEXT:    mov r0, sp
267; CHECK-NEXT:    vcmp.i16 ne, q1, zr
268; CHECK-NEXT:    vldrw.u32 q1, [r0]
269; CHECK-NEXT:    vpsel q0, q1, q0
270; CHECK-NEXT:    vmov r0, r1, d0
271; CHECK-NEXT:    vmov r2, r3, d1
272; CHECK-NEXT:    bx lr
273entry:
274  %c = icmp eq <8 x i16> %src, zeroinitializer
275  %sh = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1>
276  %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b
277  ret <8 x i16> %s
278}
279
280define <16 x i8> @shuffle4_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) {
281; CHECK-LABEL: shuffle4_v16i8:
282; CHECK:       @ %bb.0: @ %entry
283; CHECK-NEXT:    vmov d1, r2, r3
284; CHECK-NEXT:    vmov.i8 q1, #0xff
285; CHECK-NEXT:    vmov d0, r0, r1
286; CHECK-NEXT:    vcmp.i8 eq, q0, zr
287; CHECK-NEXT:    vmov.i8 q0, #0x0
288; CHECK-NEXT:    vpsel q0, q1, q0
289; CHECK-NEXT:    vmov.u8 r0, q0[0]
290; CHECK-NEXT:    vdup.8 q1, r0
291; CHECK-NEXT:    vmov.u8 r0, q0[1]
292; CHECK-NEXT:    vmov.8 q1[15], r0
293; CHECK-NEXT:    add r0, sp, #16
294; CHECK-NEXT:    vldrw.u32 q0, [r0]
295; CHECK-NEXT:    mov r0, sp
296; CHECK-NEXT:    vcmp.i8 ne, q1, zr
297; CHECK-NEXT:    vldrw.u32 q1, [r0]
298; CHECK-NEXT:    vpsel q0, q1, q0
299; CHECK-NEXT:    vmov r0, r1, d0
300; CHECK-NEXT:    vmov r2, r3, d1
301; CHECK-NEXT:    bx lr
302entry:
303  %c = icmp eq <16 x i8> %src, zeroinitializer
304  %sh = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1>
305  %s = select <16 x i1> %sh, <16 x i8> %a, <16 x i8> %b
306  ret <16 x i8> %s
307}
308
309define <4 x i32> @shuffle5_b_v4i32(<8 x i16> %src, <4 x i32> %a, <4 x i32> %b) {
310; CHECK-LABEL: shuffle5_b_v4i32:
311; CHECK:       @ %bb.0: @ %entry
312; CHECK-NEXT:    vmov d1, r2, r3
313; CHECK-NEXT:    vmov.i8 q1, #0xff
314; CHECK-NEXT:    vmov d0, r0, r1
315; CHECK-NEXT:    vcmp.i16 eq, q0, zr
316; CHECK-NEXT:    vmov.i8 q0, #0x0
317; CHECK-NEXT:    vpsel q0, q1, q0
318; CHECK-NEXT:    vmov.u16 r0, q0[0]
319; CHECK-NEXT:    vmov.32 q1[0], r0
320; CHECK-NEXT:    vmov.u16 r0, q0[1]
321; CHECK-NEXT:    vmov.32 q1[1], r0
322; CHECK-NEXT:    vmov.u16 r0, q0[2]
323; CHECK-NEXT:    vmov.32 q1[2], r0
324; CHECK-NEXT:    vmov.u16 r0, q0[3]
325; CHECK-NEXT:    vmov.32 q1[3], r0
326; CHECK-NEXT:    add r0, sp, #16
327; CHECK-NEXT:    vldrw.u32 q0, [r0]
328; CHECK-NEXT:    mov r0, sp
329; CHECK-NEXT:    vcmp.i32 ne, q1, zr
330; CHECK-NEXT:    vldrw.u32 q1, [r0]
331; CHECK-NEXT:    vpsel q0, q1, q0
332; CHECK-NEXT:    vmov r0, r1, d0
333; CHECK-NEXT:    vmov r2, r3, d1
334; CHECK-NEXT:    bx lr
335entry:
336  %c = icmp eq <8 x i16> %src, zeroinitializer
337  %sh = shufflevector <8 x i1> %c, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
338  %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b
339  ret <4 x i32> %s
340}
341
342define <4 x i32> @shuffle5_t_v4i32(<8 x i16> %src, <4 x i32> %a, <4 x i32> %b) {
343; CHECK-LABEL: shuffle5_t_v4i32:
344; CHECK:       @ %bb.0: @ %entry
345; CHECK-NEXT:    vmov d1, r2, r3
346; CHECK-NEXT:    vmov.i8 q1, #0xff
347; CHECK-NEXT:    vmov d0, r0, r1
348; CHECK-NEXT:    vcmp.i16 eq, q0, zr
349; CHECK-NEXT:    vmov.i8 q0, #0x0
350; CHECK-NEXT:    vpsel q0, q1, q0
351; CHECK-NEXT:    vmov.u16 r0, q0[4]
352; CHECK-NEXT:    vmov.32 q1[0], r0
353; CHECK-NEXT:    vmov.u16 r0, q0[5]
354; CHECK-NEXT:    vmov.32 q1[1], r0
355; CHECK-NEXT:    vmov.u16 r0, q0[6]
356; CHECK-NEXT:    vmov.32 q1[2], r0
357; CHECK-NEXT:    vmov.u16 r0, q0[7]
358; CHECK-NEXT:    vmov.32 q1[3], r0
359; CHECK-NEXT:    add r0, sp, #16
360; CHECK-NEXT:    vldrw.u32 q0, [r0]
361; CHECK-NEXT:    mov r0, sp
362; CHECK-NEXT:    vcmp.i32 ne, q1, zr
363; CHECK-NEXT:    vldrw.u32 q1, [r0]
364; CHECK-NEXT:    vpsel q0, q1, q0
365; CHECK-NEXT:    vmov r0, r1, d0
366; CHECK-NEXT:    vmov r2, r3, d1
367; CHECK-NEXT:    bx lr
368entry:
369  %c = icmp eq <8 x i16> %src, zeroinitializer
370  %sh = shufflevector <8 x i1> %c, <8 x i1> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
371  %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b
372  ret <4 x i32> %s
373}
374
375define <8 x i16> @shuffle5_b_v8i16(<16 x i8> %src, <8 x i16> %a, <8 x i16> %b) {
376; CHECK-LABEL: shuffle5_b_v8i16:
377; CHECK:       @ %bb.0: @ %entry
378; CHECK-NEXT:    vmov d1, r2, r3
379; CHECK-NEXT:    vmov.i8 q1, #0xff
380; CHECK-NEXT:    vmov d0, r0, r1
381; CHECK-NEXT:    vcmp.i8 eq, q0, zr
382; CHECK-NEXT:    vmov.i8 q0, #0x0
383; CHECK-NEXT:    vpsel q1, q1, q0
384; CHECK-NEXT:    vmov.u8 r0, q1[0]
385; CHECK-NEXT:    vmov.16 q0[0], r0
386; CHECK-NEXT:    vmov.u8 r0, q1[1]
387; CHECK-NEXT:    vmov.16 q0[1], r0
388; CHECK-NEXT:    vmov.u8 r0, q1[2]
389; CHECK-NEXT:    vmov.16 q0[2], r0
390; CHECK-NEXT:    vmov.u8 r0, q1[3]
391; CHECK-NEXT:    vmov.16 q0[3], r0
392; CHECK-NEXT:    vmov.u8 r0, q1[4]
393; CHECK-NEXT:    vmov.16 q0[4], r0
394; CHECK-NEXT:    vmov.u8 r0, q1[5]
395; CHECK-NEXT:    vmov.16 q0[5], r0
396; CHECK-NEXT:    vmov.u8 r0, q1[6]
397; CHECK-NEXT:    vmov.16 q0[6], r0
398; CHECK-NEXT:    vmov.u8 r0, q1[7]
399; CHECK-NEXT:    vmov.16 q0[7], r0
400; CHECK-NEXT:    add r0, sp, #16
401; CHECK-NEXT:    vcmp.i16 ne, q0, zr
402; CHECK-NEXT:    vldrw.u32 q0, [r0]
403; CHECK-NEXT:    mov r0, sp
404; CHECK-NEXT:    vldrw.u32 q1, [r0]
405; CHECK-NEXT:    vpsel q0, q1, q0
406; CHECK-NEXT:    vmov r0, r1, d0
407; CHECK-NEXT:    vmov r2, r3, d1
408; CHECK-NEXT:    bx lr
409entry:
410  %c = icmp eq <16 x i8> %src, zeroinitializer
411  %sh = shufflevector <16 x i1> %c, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
412  %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b
413  ret <8 x i16> %s
414}
415
416define <8 x i16> @shuffle5_t_v8i16(<16 x i8> %src, <8 x i16> %a, <8 x i16> %b) {
417; CHECK-LABEL: shuffle5_t_v8i16:
418; CHECK:       @ %bb.0: @ %entry
419; CHECK-NEXT:    vmov d1, r2, r3
420; CHECK-NEXT:    vmov.i8 q1, #0xff
421; CHECK-NEXT:    vmov d0, r0, r1
422; CHECK-NEXT:    vcmp.i8 eq, q0, zr
423; CHECK-NEXT:    vmov.i8 q0, #0x0
424; CHECK-NEXT:    vpsel q1, q1, q0
425; CHECK-NEXT:    vmov.u8 r0, q1[8]
426; CHECK-NEXT:    vmov.16 q0[0], r0
427; CHECK-NEXT:    vmov.u8 r0, q1[9]
428; CHECK-NEXT:    vmov.16 q0[1], r0
429; CHECK-NEXT:    vmov.u8 r0, q1[10]
430; CHECK-NEXT:    vmov.16 q0[2], r0
431; CHECK-NEXT:    vmov.u8 r0, q1[11]
432; CHECK-NEXT:    vmov.16 q0[3], r0
433; CHECK-NEXT:    vmov.u8 r0, q1[12]
434; CHECK-NEXT:    vmov.16 q0[4], r0
435; CHECK-NEXT:    vmov.u8 r0, q1[13]
436; CHECK-NEXT:    vmov.16 q0[5], r0
437; CHECK-NEXT:    vmov.u8 r0, q1[14]
438; CHECK-NEXT:    vmov.16 q0[6], r0
439; CHECK-NEXT:    vmov.u8 r0, q1[15]
440; CHECK-NEXT:    vmov.16 q0[7], r0
441; CHECK-NEXT:    add r0, sp, #16
442; CHECK-NEXT:    vcmp.i16 ne, q0, zr
443; CHECK-NEXT:    vldrw.u32 q0, [r0]
444; CHECK-NEXT:    mov r0, sp
445; CHECK-NEXT:    vldrw.u32 q1, [r0]
446; CHECK-NEXT:    vpsel q0, q1, q0
447; CHECK-NEXT:    vmov r0, r1, d0
448; CHECK-NEXT:    vmov r2, r3, d1
449; CHECK-NEXT:    bx lr
450entry:
451  %c = icmp eq <16 x i8> %src, zeroinitializer
452  %sh = shufflevector <16 x i1> %c, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
453  %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b
454  ret <8 x i16> %s
455}
456
457define <8 x i16> @shuffle6_v4i32(<4 x i32> %src1, <4 x i32> %src2, <8 x i16> %a, <8 x i16> %b) {
458; CHECK-LABEL: shuffle6_v4i32:
459; CHECK:       @ %bb.0: @ %entry
460; CHECK-NEXT:    vmov d1, r2, r3
461; CHECK-NEXT:    vmov.i8 q1, #0x0
462; CHECK-NEXT:    vmov d0, r0, r1
463; CHECK-NEXT:    vmov.i8 q2, #0xff
464; CHECK-NEXT:    vcmp.i32 eq, q0, zr
465; CHECK-NEXT:    vpsel q3, q2, q1
466; CHECK-NEXT:    vmov r0, s12
467; CHECK-NEXT:    vmov.16 q0[0], r0
468; CHECK-NEXT:    vmov r0, s13
469; CHECK-NEXT:    vmov.16 q0[1], r0
470; CHECK-NEXT:    vmov r0, s14
471; CHECK-NEXT:    vmov.16 q0[2], r0
472; CHECK-NEXT:    vmov r0, s15
473; CHECK-NEXT:    vmov.16 q0[3], r0
474; CHECK-NEXT:    mov r0, sp
475; CHECK-NEXT:    vldrw.u32 q3, [r0]
476; CHECK-NEXT:    vcmp.i32 eq, q3, zr
477; CHECK-NEXT:    vpsel q1, q2, q1
478; CHECK-NEXT:    vmov r0, s4
479; CHECK-NEXT:    vmov.16 q0[4], r0
480; CHECK-NEXT:    vmov r0, s5
481; CHECK-NEXT:    vmov.16 q0[5], r0
482; CHECK-NEXT:    vmov r0, s6
483; CHECK-NEXT:    vmov.16 q0[6], r0
484; CHECK-NEXT:    vmov r0, s7
485; CHECK-NEXT:    vmov.16 q0[7], r0
486; CHECK-NEXT:    add r0, sp, #32
487; CHECK-NEXT:    vcmp.i16 ne, q0, zr
488; CHECK-NEXT:    vldrw.u32 q0, [r0]
489; CHECK-NEXT:    add r0, sp, #16
490; CHECK-NEXT:    vldrw.u32 q1, [r0]
491; CHECK-NEXT:    vpsel q0, q1, q0
492; CHECK-NEXT:    vmov r0, r1, d0
493; CHECK-NEXT:    vmov r2, r3, d1
494; CHECK-NEXT:    bx lr
495entry:
496  %c1 = icmp eq <4 x i32> %src1, zeroinitializer
497  %c2 = icmp eq <4 x i32> %src2, zeroinitializer
498  %sh = shufflevector <4 x i1> %c1, <4 x i1> %c2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
499  %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b
500  ret <8 x i16> %s
501}
502
503define <16 x i8> @shuffle6_v8i16(<8 x i16> %src1, <8 x i16> %src2, <16 x i8> %a, <16 x i8> %b) {
504; CHECK-LABEL: shuffle6_v8i16:
505; CHECK:       @ %bb.0: @ %entry
506; CHECK-NEXT:    vmov d1, r2, r3
507; CHECK-NEXT:    vmov.i8 q1, #0x0
508; CHECK-NEXT:    vmov d0, r0, r1
509; CHECK-NEXT:    vmov.i8 q2, #0xff
510; CHECK-NEXT:    vcmp.i16 eq, q0, zr
511; CHECK-NEXT:    vpsel q3, q2, q1
512; CHECK-NEXT:    vmov.u16 r0, q3[0]
513; CHECK-NEXT:    vmov.8 q0[0], r0
514; CHECK-NEXT:    vmov.u16 r0, q3[1]
515; CHECK-NEXT:    vmov.8 q0[1], r0
516; CHECK-NEXT:    vmov.u16 r0, q3[2]
517; CHECK-NEXT:    vmov.8 q0[2], r0
518; CHECK-NEXT:    vmov.u16 r0, q3[3]
519; CHECK-NEXT:    vmov.8 q0[3], r0
520; CHECK-NEXT:    vmov.u16 r0, q3[4]
521; CHECK-NEXT:    vmov.8 q0[4], r0
522; CHECK-NEXT:    vmov.u16 r0, q3[5]
523; CHECK-NEXT:    vmov.8 q0[5], r0
524; CHECK-NEXT:    vmov.u16 r0, q3[6]
525; CHECK-NEXT:    vmov.8 q0[6], r0
526; CHECK-NEXT:    vmov.u16 r0, q3[7]
527; CHECK-NEXT:    vmov.8 q0[7], r0
528; CHECK-NEXT:    mov r0, sp
529; CHECK-NEXT:    vldrw.u32 q3, [r0]
530; CHECK-NEXT:    vcmp.i16 eq, q3, zr
531; CHECK-NEXT:    vpsel q1, q2, q1
532; CHECK-NEXT:    vmov.u16 r0, q1[0]
533; CHECK-NEXT:    vmov.8 q0[8], r0
534; CHECK-NEXT:    vmov.u16 r0, q1[1]
535; CHECK-NEXT:    vmov.8 q0[9], r0
536; CHECK-NEXT:    vmov.u16 r0, q1[2]
537; CHECK-NEXT:    vmov.8 q0[10], r0
538; CHECK-NEXT:    vmov.u16 r0, q1[3]
539; CHECK-NEXT:    vmov.8 q0[11], r0
540; CHECK-NEXT:    vmov.u16 r0, q1[4]
541; CHECK-NEXT:    vmov.8 q0[12], r0
542; CHECK-NEXT:    vmov.u16 r0, q1[5]
543; CHECK-NEXT:    vmov.8 q0[13], r0
544; CHECK-NEXT:    vmov.u16 r0, q1[6]
545; CHECK-NEXT:    vmov.8 q0[14], r0
546; CHECK-NEXT:    vmov.u16 r0, q1[7]
547; CHECK-NEXT:    vmov.8 q0[15], r0
548; CHECK-NEXT:    add r0, sp, #32
549; CHECK-NEXT:    vcmp.i8 ne, q0, zr
550; CHECK-NEXT:    vldrw.u32 q0, [r0]
551; CHECK-NEXT:    add r0, sp, #16
552; CHECK-NEXT:    vldrw.u32 q1, [r0]
553; CHECK-NEXT:    vpsel q0, q1, q0
554; CHECK-NEXT:    vmov r0, r1, d0
555; CHECK-NEXT:    vmov r2, r3, d1
556; CHECK-NEXT:    bx lr
557entry:
558  %c1 = icmp eq <8 x i16> %src1, zeroinitializer
559  %c2 = icmp eq <8 x i16> %src2, zeroinitializer
560  %sh = shufflevector <8 x i1> %c1, <8 x i1> %c2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
561  %s = select <16 x i1> %sh, <16 x i8> %a, <16 x i8> %b
562  ret <16 x i8> %s
563}
564