• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve %s -o - | FileCheck %s
3
4define <4 x i32> @v4i32(i32 %index, i32 %TC, <4 x i32> %V1, <4 x i32> %V2) {
5; CHECK-LABEL: v4i32:
6; CHECK:       @ %bb.0:
7; CHECK-NEXT:    adr.w r12, .LCPI0_0
8; CHECK-NEXT:    vdup.32 q1, r0
9; CHECK-NEXT:    vldrw.u32 q0, [r12]
10; CHECK-NEXT:    vadd.i32 q0, q0, r0
11; CHECK-NEXT:    add r0, sp, #8
12; CHECK-NEXT:    vcmp.u32 hi, q1, q0
13; CHECK-NEXT:    vdup.32 q1, r1
14; CHECK-NEXT:    vpnot
15; CHECK-NEXT:    vpst
16; CHECK-NEXT:    vcmpt.u32 hi, q1, q0
17; CHECK-NEXT:    vmov d0, r2, r3
18; CHECK-NEXT:    vldr d1, [sp]
19; CHECK-NEXT:    vldrw.u32 q1, [r0]
20; CHECK-NEXT:    vpsel q0, q0, q1
21; CHECK-NEXT:    vmov r0, r1, d0
22; CHECK-NEXT:    vmov r2, r3, d1
23; CHECK-NEXT:    bx lr
24; CHECK-NEXT:    .p2align 4
25; CHECK-NEXT:  @ %bb.1:
26; CHECK-NEXT:  .LCPI0_0:
27; CHECK-NEXT:    .long 0 @ 0x0
28; CHECK-NEXT:    .long 1 @ 0x1
29; CHECK-NEXT:    .long 2 @ 0x2
30; CHECK-NEXT:    .long 3 @ 0x3
31  %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC)
32  %select = select <4 x i1> %active.lane.mask, <4 x i32> %V1, <4 x i32> %V2
33  ret <4 x i32> %select
34}
35
36define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) {
37; CHECK-LABEL: v7i32:
38; CHECK:       @ %bb.0:
39; CHECK-NEXT:    adr r3, .LCPI1_0
40; CHECK-NEXT:    vdup.32 q1, r1
41; CHECK-NEXT:    vldrw.u32 q0, [r3]
42; CHECK-NEXT:    vadd.i32 q2, q0, r1
43; CHECK-NEXT:    vdup.32 q0, r2
44; CHECK-NEXT:    vcmp.u32 hi, q1, q2
45; CHECK-NEXT:    ldr r2, [sp, #32]
46; CHECK-NEXT:    vpnot
47; CHECK-NEXT:    vpst
48; CHECK-NEXT:    vcmpt.u32 hi, q0, q2
49; CHECK-NEXT:    vmov.32 q2[0], r2
50; CHECK-NEXT:    ldr r2, [sp, #36]
51; CHECK-NEXT:    vmov.32 q2[1], r2
52; CHECK-NEXT:    ldr r2, [sp, #40]
53; CHECK-NEXT:    vmov.32 q2[2], r2
54; CHECK-NEXT:    ldr r2, [sp, #44]
55; CHECK-NEXT:    vmov.32 q2[3], r2
56; CHECK-NEXT:    ldr r2, [sp]
57; CHECK-NEXT:    vmov.32 q3[0], r2
58; CHECK-NEXT:    ldr r2, [sp, #4]
59; CHECK-NEXT:    vmov.32 q3[1], r2
60; CHECK-NEXT:    ldr r2, [sp, #8]
61; CHECK-NEXT:    vmov.32 q3[2], r2
62; CHECK-NEXT:    ldr r2, [sp, #12]
63; CHECK-NEXT:    vmov.32 q3[3], r2
64; CHECK-NEXT:    adr r2, .LCPI1_1
65; CHECK-NEXT:    vpsel q2, q3, q2
66; CHECK-NEXT:    vstrw.32 q2, [r0]
67; CHECK-NEXT:    vldrw.u32 q2, [r2]
68; CHECK-NEXT:    movw r2, #4095
69; CHECK-NEXT:    vadd.i32 q2, q2, r1
70; CHECK-NEXT:    vcmp.u32 hi, q1, q2
71; CHECK-NEXT:    vmrs r1, p0
72; CHECK-NEXT:    eors r1, r2
73; CHECK-NEXT:    vmsr p0, r1
74; CHECK-NEXT:    ldr r1, [sp, #48]
75; CHECK-NEXT:    vpst
76; CHECK-NEXT:    vcmpt.u32 hi, q0, q2
77; CHECK-NEXT:    vmov.32 q0[0], r1
78; CHECK-NEXT:    ldr r1, [sp, #52]
79; CHECK-NEXT:    vmov.32 q0[1], r1
80; CHECK-NEXT:    ldr r1, [sp, #56]
81; CHECK-NEXT:    vmov.32 q0[2], r1
82; CHECK-NEXT:    ldr r1, [sp, #16]
83; CHECK-NEXT:    vmov.32 q1[0], r1
84; CHECK-NEXT:    ldr r1, [sp, #20]
85; CHECK-NEXT:    vmov.32 q1[1], r1
86; CHECK-NEXT:    ldr r1, [sp, #24]
87; CHECK-NEXT:    vmov.32 q1[2], r1
88; CHECK-NEXT:    vpsel q0, q1, q0
89; CHECK-NEXT:    vmov r1, s2
90; CHECK-NEXT:    vmov.f32 s2, s1
91; CHECK-NEXT:    vmov r3, s0
92; CHECK-NEXT:    vmov r2, s2
93; CHECK-NEXT:    strd r3, r2, [r0, #16]
94; CHECK-NEXT:    str r1, [r0, #24]
95; CHECK-NEXT:    bx lr
96; CHECK-NEXT:    .p2align 4
97; CHECK-NEXT:  @ %bb.1:
98; CHECK-NEXT:  .LCPI1_0:
99; CHECK-NEXT:    .long 0 @ 0x0
100; CHECK-NEXT:    .long 1 @ 0x1
101; CHECK-NEXT:    .long 2 @ 0x2
102; CHECK-NEXT:    .long 3 @ 0x3
103; CHECK-NEXT:  .LCPI1_1:
104; CHECK-NEXT:    .long 4 @ 0x4
105; CHECK-NEXT:    .long 5 @ 0x5
106; CHECK-NEXT:    .long 6 @ 0x6
107; CHECK-NEXT:    .zero 4
108  %active.lane.mask = call <7 x i1> @llvm.get.active.lane.mask.v7i1.i32(i32 %index, i32 %TC)
109  %select = select <7 x i1> %active.lane.mask, <7 x i32> %V1, <7 x i32> %V2
110  ret <7 x i32> %select
111}
112
113define <8 x i16> @v8i16(i32 %index, i32 %TC, <8 x i16> %V1, <8 x i16> %V2) {
114; CHECK-LABEL: v8i16:
115; CHECK:       @ %bb.0:
116; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
117; CHECK-NEXT:    adr.w r12, .LCPI2_0
118; CHECK-NEXT:    vdup.32 q5, r1
119; CHECK-NEXT:    vldrw.u32 q0, [r12]
120; CHECK-NEXT:    vmov.i8 q1, #0x0
121; CHECK-NEXT:    vmov.i8 q2, #0xff
122; CHECK-NEXT:    vadd.i32 q3, q0, r0
123; CHECK-NEXT:    vcmp.u32 hi, q5, q3
124; CHECK-NEXT:    vpsel q4, q2, q1
125; CHECK-NEXT:    vmov r1, s16
126; CHECK-NEXT:    vmov.16 q0[0], r1
127; CHECK-NEXT:    vmov r1, s17
128; CHECK-NEXT:    vmov.16 q0[1], r1
129; CHECK-NEXT:    vmov r1, s18
130; CHECK-NEXT:    vmov.16 q0[2], r1
131; CHECK-NEXT:    vmov r1, s19
132; CHECK-NEXT:    vmov.16 q0[3], r1
133; CHECK-NEXT:    adr r1, .LCPI2_1
134; CHECK-NEXT:    vldrw.u32 q4, [r1]
135; CHECK-NEXT:    vadd.i32 q4, q4, r0
136; CHECK-NEXT:    vcmp.u32 hi, q5, q4
137; CHECK-NEXT:    vpsel q5, q2, q1
138; CHECK-NEXT:    vmov r1, s20
139; CHECK-NEXT:    vmov.16 q0[4], r1
140; CHECK-NEXT:    vmov r1, s21
141; CHECK-NEXT:    vmov.16 q0[5], r1
142; CHECK-NEXT:    vmov r1, s22
143; CHECK-NEXT:    vmov.16 q0[6], r1
144; CHECK-NEXT:    vmov r1, s23
145; CHECK-NEXT:    vdup.32 q5, r0
146; CHECK-NEXT:    vmov.16 q0[7], r1
147; CHECK-NEXT:    vcmp.u32 hi, q5, q3
148; CHECK-NEXT:    vpsel q6, q2, q1
149; CHECK-NEXT:    vcmp.u32 hi, q5, q4
150; CHECK-NEXT:    vmov r0, s24
151; CHECK-NEXT:    vpsel q1, q2, q1
152; CHECK-NEXT:    vmov.16 q3[0], r0
153; CHECK-NEXT:    vmov r0, s25
154; CHECK-NEXT:    vmov.16 q3[1], r0
155; CHECK-NEXT:    vmov r0, s26
156; CHECK-NEXT:    vmov.16 q3[2], r0
157; CHECK-NEXT:    vmov r0, s27
158; CHECK-NEXT:    vmov.16 q3[3], r0
159; CHECK-NEXT:    vmov r0, s4
160; CHECK-NEXT:    vmov.16 q3[4], r0
161; CHECK-NEXT:    vmov r0, s5
162; CHECK-NEXT:    vmov.16 q3[5], r0
163; CHECK-NEXT:    vmov r0, s6
164; CHECK-NEXT:    vmov.16 q3[6], r0
165; CHECK-NEXT:    vmov r0, s7
166; CHECK-NEXT:    vmov.16 q3[7], r0
167; CHECK-NEXT:    add r0, sp, #56
168; CHECK-NEXT:    vcmp.i16 ne, q3, zr
169; CHECK-NEXT:    vldrw.u32 q1, [r0]
170; CHECK-NEXT:    vpnot
171; CHECK-NEXT:    vpst
172; CHECK-NEXT:    vcmpt.i16 ne, q0, zr
173; CHECK-NEXT:    vmov d0, r2, r3
174; CHECK-NEXT:    vldr d1, [sp, #48]
175; CHECK-NEXT:    vpsel q0, q0, q1
176; CHECK-NEXT:    vmov r0, r1, d0
177; CHECK-NEXT:    vmov r2, r3, d1
178; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
179; CHECK-NEXT:    bx lr
180; CHECK-NEXT:    .p2align 4
181; CHECK-NEXT:  @ %bb.1:
182; CHECK-NEXT:  .LCPI2_0:
183; CHECK-NEXT:    .long 0 @ 0x0
184; CHECK-NEXT:    .long 1 @ 0x1
185; CHECK-NEXT:    .long 2 @ 0x2
186; CHECK-NEXT:    .long 3 @ 0x3
187; CHECK-NEXT:  .LCPI2_1:
188; CHECK-NEXT:    .long 4 @ 0x4
189; CHECK-NEXT:    .long 5 @ 0x5
190; CHECK-NEXT:    .long 6 @ 0x6
191; CHECK-NEXT:    .long 7 @ 0x7
192  %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC)
193  %select = select <8 x i1> %active.lane.mask, <8 x i16> %V1, <8 x i16> %V2
194  ret <8 x i16> %select
195}
196
197define <16 x i8> @v16i8(i32 %index, i32 %TC, <16 x i8> %V1, <16 x i8> %V2) {
198; CHECK-LABEL: v16i8:
199; CHECK:       @ %bb.0:
200; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
201; CHECK-NEXT:    sub sp, #16
202; CHECK-NEXT:    adr.w r12, .LCPI3_0
203; CHECK-NEXT:    vdup.32 q7, r1
204; CHECK-NEXT:    vldrw.u32 q0, [r12]
205; CHECK-NEXT:    vmov.i8 q5, #0x0
206; CHECK-NEXT:    vmov.i8 q4, #0xff
207; CHECK-NEXT:    vadd.i32 q1, q0, r0
208; CHECK-NEXT:    vcmp.u32 hi, q7, q1
209; CHECK-NEXT:    vpsel q0, q4, q5
210; CHECK-NEXT:    vmov r1, s0
211; CHECK-NEXT:    vmov.16 q2[0], r1
212; CHECK-NEXT:    vmov r1, s1
213; CHECK-NEXT:    vmov.16 q2[1], r1
214; CHECK-NEXT:    vmov r1, s2
215; CHECK-NEXT:    vmov.16 q2[2], r1
216; CHECK-NEXT:    vmov r1, s3
217; CHECK-NEXT:    vmov.16 q2[3], r1
218; CHECK-NEXT:    adr r1, .LCPI3_1
219; CHECK-NEXT:    vldrw.u32 q0, [r1]
220; CHECK-NEXT:    vadd.i32 q3, q0, r0
221; CHECK-NEXT:    vcmp.u32 hi, q7, q3
222; CHECK-NEXT:    vpsel q0, q4, q5
223; CHECK-NEXT:    vmov r1, s0
224; CHECK-NEXT:    vmov.16 q2[4], r1
225; CHECK-NEXT:    vmov r1, s1
226; CHECK-NEXT:    vmov.16 q2[5], r1
227; CHECK-NEXT:    vmov r1, s2
228; CHECK-NEXT:    vmov.16 q2[6], r1
229; CHECK-NEXT:    vmov r1, s3
230; CHECK-NEXT:    vmov.16 q2[7], r1
231; CHECK-NEXT:    vcmp.i16 ne, q2, zr
232; CHECK-NEXT:    vpsel q0, q4, q5
233; CHECK-NEXT:    vmov.u16 r1, q0[0]
234; CHECK-NEXT:    vmov.8 q2[0], r1
235; CHECK-NEXT:    vmov.u16 r1, q0[1]
236; CHECK-NEXT:    vmov.8 q2[1], r1
237; CHECK-NEXT:    vmov.u16 r1, q0[2]
238; CHECK-NEXT:    vmov.8 q2[2], r1
239; CHECK-NEXT:    vmov.u16 r1, q0[3]
240; CHECK-NEXT:    vmov.8 q2[3], r1
241; CHECK-NEXT:    vmov.u16 r1, q0[4]
242; CHECK-NEXT:    vmov.8 q2[4], r1
243; CHECK-NEXT:    vmov.u16 r1, q0[5]
244; CHECK-NEXT:    vmov.8 q2[5], r1
245; CHECK-NEXT:    vmov.u16 r1, q0[6]
246; CHECK-NEXT:    vmov.8 q2[6], r1
247; CHECK-NEXT:    vmov.u16 r1, q0[7]
248; CHECK-NEXT:    vmov.8 q2[7], r1
249; CHECK-NEXT:    adr r1, .LCPI3_2
250; CHECK-NEXT:    vldrw.u32 q0, [r1]
251; CHECK-NEXT:    vadd.i32 q0, q0, r0
252; CHECK-NEXT:    vcmp.u32 hi, q7, q0
253; CHECK-NEXT:    vstrw.32 q0, [sp] @ 16-byte Spill
254; CHECK-NEXT:    vpsel q6, q4, q5
255; CHECK-NEXT:    vmov r1, s24
256; CHECK-NEXT:    vmov.16 q0[0], r1
257; CHECK-NEXT:    vmov r1, s25
258; CHECK-NEXT:    vmov.16 q0[1], r1
259; CHECK-NEXT:    vmov r1, s26
260; CHECK-NEXT:    vmov.16 q0[2], r1
261; CHECK-NEXT:    vmov r1, s27
262; CHECK-NEXT:    vmov.16 q0[3], r1
263; CHECK-NEXT:    adr r1, .LCPI3_3
264; CHECK-NEXT:    vldrw.u32 q6, [r1]
265; CHECK-NEXT:    vadd.i32 q6, q6, r0
266; CHECK-NEXT:    vcmp.u32 hi, q7, q6
267; CHECK-NEXT:    vpsel q7, q4, q5
268; CHECK-NEXT:    vmov r1, s28
269; CHECK-NEXT:    vmov.16 q0[4], r1
270; CHECK-NEXT:    vmov r1, s29
271; CHECK-NEXT:    vmov.16 q0[5], r1
272; CHECK-NEXT:    vmov r1, s30
273; CHECK-NEXT:    vmov.16 q0[6], r1
274; CHECK-NEXT:    vmov r1, s31
275; CHECK-NEXT:    vmov.16 q0[7], r1
276; CHECK-NEXT:    vdup.32 q7, r0
277; CHECK-NEXT:    vcmp.i16 ne, q0, zr
278; CHECK-NEXT:    vpsel q0, q4, q5
279; CHECK-NEXT:    vcmp.u32 hi, q7, q1
280; CHECK-NEXT:    vmov.u16 r1, q0[0]
281; CHECK-NEXT:    vpsel q1, q4, q5
282; CHECK-NEXT:    vmov.8 q2[8], r1
283; CHECK-NEXT:    vmov.u16 r1, q0[1]
284; CHECK-NEXT:    vmov.8 q2[9], r1
285; CHECK-NEXT:    vmov.u16 r1, q0[2]
286; CHECK-NEXT:    vmov.8 q2[10], r1
287; CHECK-NEXT:    vmov.u16 r1, q0[3]
288; CHECK-NEXT:    vmov.8 q2[11], r1
289; CHECK-NEXT:    vmov.u16 r1, q0[4]
290; CHECK-NEXT:    vmov.8 q2[12], r1
291; CHECK-NEXT:    vmov.u16 r1, q0[5]
292; CHECK-NEXT:    vmov.8 q2[13], r1
293; CHECK-NEXT:    vmov.u16 r1, q0[6]
294; CHECK-NEXT:    vmov r0, s4
295; CHECK-NEXT:    vmov.8 q2[14], r1
296; CHECK-NEXT:    vmov.u16 r1, q0[7]
297; CHECK-NEXT:    vmov.16 q0[0], r0
298; CHECK-NEXT:    vmov r0, s5
299; CHECK-NEXT:    vcmp.u32 hi, q7, q3
300; CHECK-NEXT:    vmov.16 q0[1], r0
301; CHECK-NEXT:    vmov r0, s6
302; CHECK-NEXT:    vmov.16 q0[2], r0
303; CHECK-NEXT:    vmov r0, s7
304; CHECK-NEXT:    vpsel q1, q4, q5
305; CHECK-NEXT:    vmov.16 q0[3], r0
306; CHECK-NEXT:    vmov r0, s4
307; CHECK-NEXT:    vmov.8 q2[15], r1
308; CHECK-NEXT:    vmov.16 q0[4], r0
309; CHECK-NEXT:    vmov r0, s5
310; CHECK-NEXT:    vmov.16 q0[5], r0
311; CHECK-NEXT:    vmov r0, s6
312; CHECK-NEXT:    vmov.16 q0[6], r0
313; CHECK-NEXT:    vmov r0, s7
314; CHECK-NEXT:    vmov.16 q0[7], r0
315; CHECK-NEXT:    vcmp.i16 ne, q0, zr
316; CHECK-NEXT:    vpsel q0, q4, q5
317; CHECK-NEXT:    vmov.u16 r0, q0[0]
318; CHECK-NEXT:    vmov.8 q3[0], r0
319; CHECK-NEXT:    vmov.u16 r0, q0[1]
320; CHECK-NEXT:    vmov.8 q3[1], r0
321; CHECK-NEXT:    vmov.u16 r0, q0[2]
322; CHECK-NEXT:    vmov.8 q3[2], r0
323; CHECK-NEXT:    vmov.u16 r0, q0[3]
324; CHECK-NEXT:    vmov.8 q3[3], r0
325; CHECK-NEXT:    vmov.u16 r0, q0[4]
326; CHECK-NEXT:    vmov.8 q3[4], r0
327; CHECK-NEXT:    vmov.u16 r0, q0[5]
328; CHECK-NEXT:    vmov.8 q3[5], r0
329; CHECK-NEXT:    vmov.u16 r0, q0[6]
330; CHECK-NEXT:    vmov.8 q3[6], r0
331; CHECK-NEXT:    vmov.u16 r0, q0[7]
332; CHECK-NEXT:    vldrw.u32 q0, [sp] @ 16-byte Reload
333; CHECK-NEXT:    vmov.8 q3[7], r0
334; CHECK-NEXT:    vcmp.u32 hi, q7, q0
335; CHECK-NEXT:    vpsel q1, q4, q5
336; CHECK-NEXT:    vcmp.u32 hi, q7, q6
337; CHECK-NEXT:    vmov r0, s4
338; CHECK-NEXT:    vmov.16 q0[0], r0
339; CHECK-NEXT:    vmov r0, s5
340; CHECK-NEXT:    vmov.16 q0[1], r0
341; CHECK-NEXT:    vmov r0, s6
342; CHECK-NEXT:    vmov.16 q0[2], r0
343; CHECK-NEXT:    vmov r0, s7
344; CHECK-NEXT:    vpsel q1, q4, q5
345; CHECK-NEXT:    vmov.16 q0[3], r0
346; CHECK-NEXT:    vmov r0, s4
347; CHECK-NEXT:    vmov.16 q0[4], r0
348; CHECK-NEXT:    vmov r0, s5
349; CHECK-NEXT:    vmov.16 q0[5], r0
350; CHECK-NEXT:    vmov r0, s6
351; CHECK-NEXT:    vmov.16 q0[6], r0
352; CHECK-NEXT:    vmov r0, s7
353; CHECK-NEXT:    vmov.16 q0[7], r0
354; CHECK-NEXT:    vcmp.i16 ne, q0, zr
355; CHECK-NEXT:    vpsel q0, q4, q5
356; CHECK-NEXT:    vmov.u16 r0, q0[0]
357; CHECK-NEXT:    vmov.8 q3[8], r0
358; CHECK-NEXT:    vmov.u16 r0, q0[1]
359; CHECK-NEXT:    vmov.8 q3[9], r0
360; CHECK-NEXT:    vmov.u16 r0, q0[2]
361; CHECK-NEXT:    vmov.8 q3[10], r0
362; CHECK-NEXT:    vmov.u16 r0, q0[3]
363; CHECK-NEXT:    vmov.8 q3[11], r0
364; CHECK-NEXT:    vmov.u16 r0, q0[4]
365; CHECK-NEXT:    vmov.8 q3[12], r0
366; CHECK-NEXT:    vmov.u16 r0, q0[5]
367; CHECK-NEXT:    vmov.8 q3[13], r0
368; CHECK-NEXT:    vmov.u16 r0, q0[6]
369; CHECK-NEXT:    vmov.8 q3[14], r0
370; CHECK-NEXT:    vmov.u16 r0, q0[7]
371; CHECK-NEXT:    vmov.8 q3[15], r0
372; CHECK-NEXT:    vmov d0, r2, r3
373; CHECK-NEXT:    add r0, sp, #88
374; CHECK-NEXT:    vcmp.i8 ne, q3, zr
375; CHECK-NEXT:    vldr d1, [sp, #80]
376; CHECK-NEXT:    vldrw.u32 q1, [r0]
377; CHECK-NEXT:    vpnot
378; CHECK-NEXT:    vpst
379; CHECK-NEXT:    vcmpt.i8 ne, q2, zr
380; CHECK-NEXT:    vpsel q0, q0, q1
381; CHECK-NEXT:    vmov r0, r1, d0
382; CHECK-NEXT:    vmov r2, r3, d1
383; CHECK-NEXT:    add sp, #16
384; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
385; CHECK-NEXT:    bx lr
386; CHECK-NEXT:    .p2align 4
387; CHECK-NEXT:  @ %bb.1:
388; CHECK-NEXT:  .LCPI3_0:
389; CHECK-NEXT:    .long 0 @ 0x0
390; CHECK-NEXT:    .long 1 @ 0x1
391; CHECK-NEXT:    .long 2 @ 0x2
392; CHECK-NEXT:    .long 3 @ 0x3
393; CHECK-NEXT:  .LCPI3_1:
394; CHECK-NEXT:    .long 4 @ 0x4
395; CHECK-NEXT:    .long 5 @ 0x5
396; CHECK-NEXT:    .long 6 @ 0x6
397; CHECK-NEXT:    .long 7 @ 0x7
398; CHECK-NEXT:  .LCPI3_2:
399; CHECK-NEXT:    .long 8 @ 0x8
400; CHECK-NEXT:    .long 9 @ 0x9
401; CHECK-NEXT:    .long 10 @ 0xa
402; CHECK-NEXT:    .long 11 @ 0xb
403; CHECK-NEXT:  .LCPI3_3:
404; CHECK-NEXT:    .long 12 @ 0xc
405; CHECK-NEXT:    .long 13 @ 0xd
406; CHECK-NEXT:    .long 14 @ 0xe
407; CHECK-NEXT:    .long 15 @ 0xf
408  %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC)
409  %select = select <16 x i1> %active.lane.mask, <16 x i8> %V1, <16 x i8> %V2
410  ret <16 x i8> %select
411}
412
413define void @test_width2(i32* nocapture readnone %x, i32* nocapture %y, i8 zeroext %m) {
414; CHECK-LABEL: test_width2:
415; CHECK:       @ %bb.0: @ %entry
416; CHECK-NEXT:    push {r4, r5, r6, lr}
417; CHECK-NEXT:    vpush {d8, d9, d10, d11}
418; CHECK-NEXT:    sub sp, #8
419; CHECK-NEXT:    cmp r2, #0
420; CHECK-NEXT:    beq.w .LBB4_3
421; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
422; CHECK-NEXT:    adds r0, r2, #1
423; CHECK-NEXT:    movs r3, #1
424; CHECK-NEXT:    bic r0, r0, #1
425; CHECK-NEXT:    vmov.32 q2[0], r2
426; CHECK-NEXT:    subs r0, #2
427; CHECK-NEXT:    vmov.i64 q0, #0xffffffff
428; CHECK-NEXT:    vmov.32 q2[2], r2
429; CHECK-NEXT:    movs r6, #0
430; CHECK-NEXT:    add.w lr, r3, r0, lsr #1
431; CHECK-NEXT:    adr r3, .LCPI4_0
432; CHECK-NEXT:    dls lr, lr
433; CHECK-NEXT:    vldrw.u32 q1, [r3]
434; CHECK-NEXT:    vand q2, q2, q0
435; CHECK-NEXT:  .LBB4_2: @ %vector.body
436; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
437; CHECK-NEXT:    vmov.32 q3[0], r6
438; CHECK-NEXT:    vmov r5, s8
439; CHECK-NEXT:    vmov.32 q3[2], r6
440; CHECK-NEXT:    vmov r0, s9
441; CHECK-NEXT:    vand q3, q3, q0
442; CHECK-NEXT:    adds r6, #2
443; CHECK-NEXT:    vmov r3, s14
444; CHECK-NEXT:    vmov r2, s15
445; CHECK-NEXT:    adds r3, #1
446; CHECK-NEXT:    adc r12, r2, #0
447; CHECK-NEXT:    vmov r2, s12
448; CHECK-NEXT:    vmov.32 q3[0], r2
449; CHECK-NEXT:    vmov.32 q3[2], r3
450; CHECK-NEXT:    vand q3, q3, q0
451; CHECK-NEXT:    vmov r4, s12
452; CHECK-NEXT:    teq.w r4, r2
453; CHECK-NEXT:    cset r2, ne
454; CHECK-NEXT:    tst.w r2, #1
455; CHECK-NEXT:    csetm r2, ne
456; CHECK-NEXT:    vmov.32 q4[0], r2
457; CHECK-NEXT:    vmov.32 q4[1], r2
458; CHECK-NEXT:    vmov r2, s14
459; CHECK-NEXT:    eors r3, r2
460; CHECK-NEXT:    orrs.w r3, r3, r12
461; CHECK-NEXT:    cset r3, ne
462; CHECK-NEXT:    tst.w r3, #1
463; CHECK-NEXT:    csetm r3, ne
464; CHECK-NEXT:    subs r5, r4, r5
465; CHECK-NEXT:    vmov.32 q4[2], r3
466; CHECK-NEXT:    vmov r5, s10
467; CHECK-NEXT:    vmov.32 q4[3], r3
468; CHECK-NEXT:    vmov r3, s13
469; CHECK-NEXT:    veor q4, q4, q1
470; CHECK-NEXT:    sbcs.w r0, r3, r0
471; CHECK-NEXT:    vmov r3, s11
472; CHECK-NEXT:    mov.w r0, #0
473; CHECK-NEXT:    it lo
474; CHECK-NEXT:    movlo r0, #1
475; CHECK-NEXT:    cmp r0, #0
476; CHECK-NEXT:    csetm r0, ne
477; CHECK-NEXT:    subs r2, r2, r5
478; CHECK-NEXT:    vmov.32 q5[0], r0
479; CHECK-NEXT:    vmov.32 q5[1], r0
480; CHECK-NEXT:    vmov r0, s15
481; CHECK-NEXT:    @ implicit-def: $q3
482; CHECK-NEXT:    sbcs r0, r3
483; CHECK-NEXT:    mov.w r0, #0
484; CHECK-NEXT:    it lo
485; CHECK-NEXT:    movlo r0, #1
486; CHECK-NEXT:    cmp r0, #0
487; CHECK-NEXT:    csetm r0, ne
488; CHECK-NEXT:    vmov.32 q5[2], r0
489; CHECK-NEXT:    vmov.32 q5[3], r0
490; CHECK-NEXT:    vand q4, q4, q5
491; CHECK-NEXT:    vmov r2, s16
492; CHECK-NEXT:    vmov r0, s18
493; CHECK-NEXT:    and r2, r2, #1
494; CHECK-NEXT:    orr.w r3, r2, r0, lsl #1
495; CHECK-NEXT:    sub.w r2, r1, #8
496; CHECK-NEXT:    lsls r0, r3, #31
497; CHECK-NEXT:    itt ne
498; CHECK-NEXT:    ldrne r0, [r2]
499; CHECK-NEXT:    vmovne.32 q3[0], r0
500; CHECK-NEXT:    and r0, r3, #3
501; CHECK-NEXT:    lsls r0, r0, #30
502; CHECK-NEXT:    itt mi
503; CHECK-NEXT:    ldrmi r0, [r2, #4]
504; CHECK-NEXT:    vmovmi.32 q3[2], r0
505; CHECK-NEXT:    vmov r2, s16
506; CHECK-NEXT:    vmov r0, s18
507; CHECK-NEXT:    and r2, r2, #1
508; CHECK-NEXT:    orr.w r2, r2, r0, lsl #1
509; CHECK-NEXT:    lsls r0, r2, #31
510; CHECK-NEXT:    itt ne
511; CHECK-NEXT:    vmovne r0, s12
512; CHECK-NEXT:    strne r0, [r1]
513; CHECK-NEXT:    and r0, r2, #3
514; CHECK-NEXT:    lsls r0, r0, #30
515; CHECK-NEXT:    itt mi
516; CHECK-NEXT:    vmovmi r0, s14
517; CHECK-NEXT:    strmi r0, [r1, #4]
518; CHECK-NEXT:    adds r1, #8
519; CHECK-NEXT:    le lr, .LBB4_2
520; CHECK-NEXT:  .LBB4_3: @ %for.cond.cleanup
521; CHECK-NEXT:    add sp, #8
522; CHECK-NEXT:    vpop {d8, d9, d10, d11}
523; CHECK-NEXT:    pop {r4, r5, r6, pc}
524; CHECK-NEXT:    .p2align 4
525; CHECK-NEXT:  @ %bb.4:
526; CHECK-NEXT:  .LCPI4_0:
527; CHECK-NEXT:    .long 1 @ 0x1
528; CHECK-NEXT:    .long 0 @ 0x0
529; CHECK-NEXT:    .long 1 @ 0x1
530; CHECK-NEXT:    .long 0 @ 0x0
531entry:
532  %cmp9.not = icmp eq i8 %m, 0
533  br i1 %cmp9.not, label %for.cond.cleanup, label %for.body.preheader
534
535for.body.preheader:                               ; preds = %entry
536  %wide.trip.count = zext i8 %m to i32
537  %n.rnd.up = add nuw nsw i32 %wide.trip.count, 1
538  %n.vec = and i32 %n.rnd.up, 510
539  br label %vector.body
540
541vector.body:                                      ; preds = %vector.body, %for.body.preheader
542  %index = phi i32 [ 0, %for.body.preheader ], [ %index.next, %vector.body ]
543  %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 %index, i32 %wide.trip.count)
544  %0 = add nsw i32 %index, -2
545  %1 = getelementptr inbounds i32, i32* %y, i32 %0
546  %2 = bitcast i32* %1 to <2 x i32>*
547  %wide.masked.load = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %2, i32 4, <2 x i1> %active.lane.mask, <2 x i32> undef)
548  %3 = getelementptr inbounds i32, i32* %y, i32 %index
549  %4 = bitcast i32* %3 to <2 x i32>*
550  call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %wide.masked.load, <2 x i32>* %4, i32 4, <2 x i1> %active.lane.mask)
551  %index.next = add i32 %index, 2
552  %5 = icmp eq i32 %index.next, %n.vec
553  br i1 %5, label %for.cond.cleanup, label %vector.body
554
555for.cond.cleanup:                                 ; preds = %vector.body, %entry
556  ret void
557}
558
559declare <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32, i32)
560declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
561declare <7 x i1> @llvm.get.active.lane.mask.v7i1.i32(i32, i32)
562declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)
563declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32)
564declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
565declare void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>)
566