• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
3; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
4
5define void @foo_v4i32_v4i32(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i32> *%src) {
6; CHECK-LABEL: foo_v4i32_v4i32:
7; CHECK:       @ %bb.0: @ %entry
8; CHECK-NEXT:    vldrw.u32 q0, [r1]
9; CHECK-NEXT:    vptt.s32 gt, q0, zr
10; CHECK-NEXT:    vldrwt.u32 q0, [r2]
11; CHECK-NEXT:    vstrwt.32 q0, [r0]
12; CHECK-NEXT:    bx lr
13entry:
14  %0 = load <4 x i32>, <4 x i32>* %mask, align 4
15  %1 = icmp sgt <4 x i32> %0, zeroinitializer
16  %2 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef)
17  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %2, <4 x i32>* %dest, i32 4, <4 x i1> %1)
18  ret void
19}
20
21define void @foo_sext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *%src) {
22; CHECK-LABEL: foo_sext_v4i32_v4i8:
23; CHECK:       @ %bb.0: @ %entry
24; CHECK-NEXT:    vldrw.u32 q0, [r1]
25; CHECK-NEXT:    vptt.s32 gt, q0, zr
26; CHECK-NEXT:    vldrbt.s32 q0, [r2]
27; CHECK-NEXT:    vstrwt.32 q0, [r0]
28; CHECK-NEXT:    bx lr
29entry:
30  %0 = load <4 x i32>, <4 x i32>* %mask, align 4
31  %1 = icmp sgt <4 x i32> %0, zeroinitializer
32  %2 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %src, i32 1, <4 x i1> %1, <4 x i8> undef)
33  %3 = sext <4 x i8> %2 to <4 x i32>
34  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1)
35  ret void
36}
37
38define void @foo_sext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16> *%src) {
39; CHECK-LABEL: foo_sext_v4i32_v4i16:
40; CHECK:       @ %bb.0: @ %entry
41; CHECK-NEXT:    vldrw.u32 q0, [r1]
42; CHECK-NEXT:    vptt.s32 gt, q0, zr
43; CHECK-NEXT:    vldrht.s32 q0, [r2]
44; CHECK-NEXT:    vstrwt.32 q0, [r0]
45; CHECK-NEXT:    bx lr
46entry:
47  %0 = load <4 x i32>, <4 x i32>* %mask, align 4
48  %1 = icmp sgt <4 x i32> %0, zeroinitializer
49  %2 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %src, i32 2, <4 x i1> %1, <4 x i16> undef)
50  %3 = sext <4 x i16> %2 to <4 x i32>
51  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1)
52  ret void
53}
54
55define void @foo_zext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *%src) {
56; CHECK-LABEL: foo_zext_v4i32_v4i8:
57; CHECK:       @ %bb.0: @ %entry
58; CHECK-NEXT:    vldrw.u32 q0, [r1]
59; CHECK-NEXT:    vptt.s32 gt, q0, zr
60; CHECK-NEXT:    vldrbt.u32 q0, [r2]
61; CHECK-NEXT:    vstrwt.32 q0, [r0]
62; CHECK-NEXT:    bx lr
63entry:
64  %0 = load <4 x i32>, <4 x i32>* %mask, align 4
65  %1 = icmp sgt <4 x i32> %0, zeroinitializer
66  %2 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %src, i32 1, <4 x i1> %1, <4 x i8> undef)
67  %3 = zext <4 x i8> %2 to <4 x i32>
68  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1)
69  ret void
70}
71
72define void @foo_zext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16> *%src) {
73; CHECK-LABEL: foo_zext_v4i32_v4i16:
74; CHECK:       @ %bb.0: @ %entry
75; CHECK-NEXT:    vldrw.u32 q0, [r1]
76; CHECK-NEXT:    vptt.s32 gt, q0, zr
77; CHECK-NEXT:    vldrht.u32 q0, [r2]
78; CHECK-NEXT:    vstrwt.32 q0, [r0]
79; CHECK-NEXT:    bx lr
80entry:
81  %0 = load <4 x i32>, <4 x i32>* %mask, align 4
82  %1 = icmp sgt <4 x i32> %0, zeroinitializer
83  %2 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %src, i32 2, <4 x i1> %1, <4 x i16> undef)
84  %3 = zext <4 x i16> %2 to <4 x i32>
85  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1)
86  ret void
87}
88
89define void @foo_sext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> *%src) {
90; CHECK-LE-LABEL: foo_sext_v2i64_v2i32:
91; CHECK-LE:       @ %bb.0: @ %entry
92; CHECK-LE-NEXT:    .save {r4, r5, r7, lr}
93; CHECK-LE-NEXT:    push {r4, r5, r7, lr}
94; CHECK-LE-NEXT:    .pad #4
95; CHECK-LE-NEXT:    sub sp, #4
96; CHECK-LE-NEXT:    ldrd lr, r12, [r1]
97; CHECK-LE-NEXT:    movs r1, #0
98; CHECK-LE-NEXT:    @ implicit-def: $q1
99; CHECK-LE-NEXT:    movs r4, #0
100; CHECK-LE-NEXT:    rsbs.w r3, lr, #0
101; CHECK-LE-NEXT:    vmov.32 q0[0], lr
102; CHECK-LE-NEXT:    sbcs.w r3, r1, lr, asr #31
103; CHECK-LE-NEXT:    mov.w lr, #0
104; CHECK-LE-NEXT:    it lt
105; CHECK-LE-NEXT:    movlt.w lr, #1
106; CHECK-LE-NEXT:    rsbs.w r3, r12, #0
107; CHECK-LE-NEXT:    sbcs.w r3, r1, r12, asr #31
108; CHECK-LE-NEXT:    it lt
109; CHECK-LE-NEXT:    movlt r1, #1
110; CHECK-LE-NEXT:    cmp r1, #0
111; CHECK-LE-NEXT:    it ne
112; CHECK-LE-NEXT:    mvnne r1, #1
113; CHECK-LE-NEXT:    bfi r1, lr, #0, #1
114; CHECK-LE-NEXT:    vmov.32 q0[2], r12
115; CHECK-LE-NEXT:    and r3, r1, #3
116; CHECK-LE-NEXT:    lsls r1, r1, #31
117; CHECK-LE-NEXT:    itt ne
118; CHECK-LE-NEXT:    ldrne r1, [r2]
119; CHECK-LE-NEXT:    vmovne.32 q1[0], r1
120; CHECK-LE-NEXT:    lsls r1, r3, #30
121; CHECK-LE-NEXT:    itt mi
122; CHECK-LE-NEXT:    ldrmi r1, [r2, #4]
123; CHECK-LE-NEXT:    vmovmi.32 q1[2], r1
124; CHECK-LE-NEXT:    vmov r2, s0
125; CHECK-LE-NEXT:    vmov r3, s4
126; CHECK-LE-NEXT:    vmov r1, s6
127; CHECK-LE-NEXT:    vmov.32 q1[0], r3
128; CHECK-LE-NEXT:    rsbs r5, r2, #0
129; CHECK-LE-NEXT:    sbcs.w r2, r4, r2, asr #31
130; CHECK-LE-NEXT:    vmov r2, s2
131; CHECK-LE-NEXT:    asr.w lr, r3, #31
132; CHECK-LE-NEXT:    vmov.32 q1[1], lr
133; CHECK-LE-NEXT:    asr.w r12, r1, #31
134; CHECK-LE-NEXT:    vmov.32 q1[2], r1
135; CHECK-LE-NEXT:    mov.w r1, #0
136; CHECK-LE-NEXT:    it lt
137; CHECK-LE-NEXT:    movlt r1, #1
138; CHECK-LE-NEXT:    vmov.32 q1[3], r12
139; CHECK-LE-NEXT:    rsbs r3, r2, #0
140; CHECK-LE-NEXT:    sbcs.w r2, r4, r2, asr #31
141; CHECK-LE-NEXT:    it lt
142; CHECK-LE-NEXT:    movlt r4, #1
143; CHECK-LE-NEXT:    cmp r4, #0
144; CHECK-LE-NEXT:    it ne
145; CHECK-LE-NEXT:    mvnne r4, #1
146; CHECK-LE-NEXT:    bfi r4, r1, #0, #1
147; CHECK-LE-NEXT:    and r1, r4, #3
148; CHECK-LE-NEXT:    lsls r2, r4, #31
149; CHECK-LE-NEXT:    it ne
150; CHECK-LE-NEXT:    vstrne d2, [r0]
151; CHECK-LE-NEXT:    lsls r1, r1, #30
152; CHECK-LE-NEXT:    it mi
153; CHECK-LE-NEXT:    vstrmi d3, [r0, #8]
154; CHECK-LE-NEXT:    add sp, #4
155; CHECK-LE-NEXT:    pop {r4, r5, r7, pc}
156;
157; CHECK-BE-LABEL: foo_sext_v2i64_v2i32:
158; CHECK-BE:       @ %bb.0: @ %entry
159; CHECK-BE-NEXT:    .save {r4, r5, r7, lr}
160; CHECK-BE-NEXT:    push {r4, r5, r7, lr}
161; CHECK-BE-NEXT:    .pad #4
162; CHECK-BE-NEXT:    sub sp, #4
163; CHECK-BE-NEXT:    ldrd r12, lr, [r1]
164; CHECK-BE-NEXT:    rsbs.w r1, lr, #0
165; CHECK-BE-NEXT:    mov.w r3, #0
166; CHECK-BE-NEXT:    sbcs.w r1, r3, lr, asr #31
167; CHECK-BE-NEXT:    vmov.32 q0[1], r12
168; CHECK-BE-NEXT:    @ implicit-def: $q2
169; CHECK-BE-NEXT:    vmov.32 q0[3], lr
170; CHECK-BE-NEXT:    mov.w lr, #0
171; CHECK-BE-NEXT:    it lt
172; CHECK-BE-NEXT:    movlt.w lr, #1
173; CHECK-BE-NEXT:    rsbs.w r1, r12, #0
174; CHECK-BE-NEXT:    sbcs.w r1, r3, r12, asr #31
175; CHECK-BE-NEXT:    it lt
176; CHECK-BE-NEXT:    movlt r3, #1
177; CHECK-BE-NEXT:    cmp r3, #0
178; CHECK-BE-NEXT:    it ne
179; CHECK-BE-NEXT:    mvnne r3, #1
180; CHECK-BE-NEXT:    bfi r3, lr, #0, #1
181; CHECK-BE-NEXT:    and r1, r3, #3
182; CHECK-BE-NEXT:    lsls r3, r3, #31
183; CHECK-BE-NEXT:    beq .LBB5_2
184; CHECK-BE-NEXT:  @ %bb.1: @ %cond.load
185; CHECK-BE-NEXT:    ldr r3, [r2]
186; CHECK-BE-NEXT:    vmov.32 q1[1], r3
187; CHECK-BE-NEXT:    vrev64.32 q2, q1
188; CHECK-BE-NEXT:  .LBB5_2: @ %else
189; CHECK-BE-NEXT:    vrev64.32 q1, q0
190; CHECK-BE-NEXT:    lsls r1, r1, #30
191; CHECK-BE-NEXT:    bpl .LBB5_4
192; CHECK-BE-NEXT:  @ %bb.3: @ %cond.load1
193; CHECK-BE-NEXT:    ldr r1, [r2, #4]
194; CHECK-BE-NEXT:    vrev64.32 q0, q2
195; CHECK-BE-NEXT:    vmov.32 q0[3], r1
196; CHECK-BE-NEXT:    vrev64.32 q2, q0
197; CHECK-BE-NEXT:  .LBB5_4: @ %else2
198; CHECK-BE-NEXT:    vrev64.32 q0, q2
199; CHECK-BE-NEXT:    vrev64.32 q2, q1
200; CHECK-BE-NEXT:    vmov r2, s11
201; CHECK-BE-NEXT:    movs r4, #0
202; CHECK-BE-NEXT:    vmov r3, s1
203; CHECK-BE-NEXT:    vmov r1, s3
204; CHECK-BE-NEXT:    rsbs r5, r2, #0
205; CHECK-BE-NEXT:    sbcs.w r2, r4, r2, asr #31
206; CHECK-BE-NEXT:    vmov r2, s9
207; CHECK-BE-NEXT:    asr.w lr, r3, #31
208; CHECK-BE-NEXT:    vmov.32 q1[0], lr
209; CHECK-BE-NEXT:    asr.w r12, r1, #31
210; CHECK-BE-NEXT:    vmov.32 q1[1], r3
211; CHECK-BE-NEXT:    vmov.32 q1[2], r12
212; CHECK-BE-NEXT:    vmov.32 q1[3], r1
213; CHECK-BE-NEXT:    mov.w r1, #0
214; CHECK-BE-NEXT:    it lt
215; CHECK-BE-NEXT:    movlt r1, #1
216; CHECK-BE-NEXT:    vrev64.32 q0, q1
217; CHECK-BE-NEXT:    rsbs r3, r2, #0
218; CHECK-BE-NEXT:    sbcs.w r2, r4, r2, asr #31
219; CHECK-BE-NEXT:    it lt
220; CHECK-BE-NEXT:    movlt r4, #1
221; CHECK-BE-NEXT:    cmp r4, #0
222; CHECK-BE-NEXT:    it ne
223; CHECK-BE-NEXT:    mvnne r4, #1
224; CHECK-BE-NEXT:    bfi r4, r1, #0, #1
225; CHECK-BE-NEXT:    and r1, r4, #3
226; CHECK-BE-NEXT:    lsls r2, r4, #31
227; CHECK-BE-NEXT:    it ne
228; CHECK-BE-NEXT:    vstrne d0, [r0]
229; CHECK-BE-NEXT:    lsls r1, r1, #30
230; CHECK-BE-NEXT:    it mi
231; CHECK-BE-NEXT:    vstrmi d1, [r0, #8]
232; CHECK-BE-NEXT:    add sp, #4
233; CHECK-BE-NEXT:    pop {r4, r5, r7, pc}
234entry:
235  %0 = load <2 x i32>, <2 x i32>* %mask, align 4
236  %1 = icmp sgt <2 x i32> %0, zeroinitializer
237  %2 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %src, i32 4, <2 x i1> %1, <2 x i32> undef)
238  %3 = sext <2 x i32> %2 to <2 x i64>
239  call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %3, <2 x i64>* %dest, i32 8, <2 x i1> %1)
240  ret void
241}
242
243define void @foo_sext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> *%src) {
244; CHECK-LE-LABEL: foo_sext_v2i64_v2i32_unaligned:
245; CHECK-LE:       @ %bb.0: @ %entry
246; CHECK-LE-NEXT:    .save {r4, r5, r7, lr}
247; CHECK-LE-NEXT:    push {r4, r5, r7, lr}
248; CHECK-LE-NEXT:    .pad #4
249; CHECK-LE-NEXT:    sub sp, #4
250; CHECK-LE-NEXT:    ldrd lr, r12, [r1]
251; CHECK-LE-NEXT:    movs r1, #0
252; CHECK-LE-NEXT:    @ implicit-def: $q1
253; CHECK-LE-NEXT:    movs r4, #0
254; CHECK-LE-NEXT:    rsbs.w r3, lr, #0
255; CHECK-LE-NEXT:    vmov.32 q0[0], lr
256; CHECK-LE-NEXT:    sbcs.w r3, r1, lr, asr #31
257; CHECK-LE-NEXT:    mov.w lr, #0
258; CHECK-LE-NEXT:    it lt
259; CHECK-LE-NEXT:    movlt.w lr, #1
260; CHECK-LE-NEXT:    rsbs.w r3, r12, #0
261; CHECK-LE-NEXT:    sbcs.w r3, r1, r12, asr #31
262; CHECK-LE-NEXT:    it lt
263; CHECK-LE-NEXT:    movlt r1, #1
264; CHECK-LE-NEXT:    cmp r1, #0
265; CHECK-LE-NEXT:    it ne
266; CHECK-LE-NEXT:    mvnne r1, #1
267; CHECK-LE-NEXT:    bfi r1, lr, #0, #1
268; CHECK-LE-NEXT:    vmov.32 q0[2], r12
269; CHECK-LE-NEXT:    and r3, r1, #3
270; CHECK-LE-NEXT:    lsls r1, r1, #31
271; CHECK-LE-NEXT:    itt ne
272; CHECK-LE-NEXT:    ldrne r1, [r2]
273; CHECK-LE-NEXT:    vmovne.32 q1[0], r1
274; CHECK-LE-NEXT:    lsls r1, r3, #30
275; CHECK-LE-NEXT:    itt mi
276; CHECK-LE-NEXT:    ldrmi r1, [r2, #4]
277; CHECK-LE-NEXT:    vmovmi.32 q1[2], r1
278; CHECK-LE-NEXT:    vmov r2, s0
279; CHECK-LE-NEXT:    vmov r3, s4
280; CHECK-LE-NEXT:    vmov r1, s6
281; CHECK-LE-NEXT:    vmov.32 q1[0], r3
282; CHECK-LE-NEXT:    rsbs r5, r2, #0
283; CHECK-LE-NEXT:    sbcs.w r2, r4, r2, asr #31
284; CHECK-LE-NEXT:    vmov r2, s2
285; CHECK-LE-NEXT:    asr.w lr, r3, #31
286; CHECK-LE-NEXT:    vmov.32 q1[1], lr
287; CHECK-LE-NEXT:    asr.w r12, r1, #31
288; CHECK-LE-NEXT:    vmov.32 q1[2], r1
289; CHECK-LE-NEXT:    mov.w r1, #0
290; CHECK-LE-NEXT:    it lt
291; CHECK-LE-NEXT:    movlt r1, #1
292; CHECK-LE-NEXT:    vmov.32 q1[3], r12
293; CHECK-LE-NEXT:    rsbs r3, r2, #0
294; CHECK-LE-NEXT:    sbcs.w r2, r4, r2, asr #31
295; CHECK-LE-NEXT:    it lt
296; CHECK-LE-NEXT:    movlt r4, #1
297; CHECK-LE-NEXT:    cmp r4, #0
298; CHECK-LE-NEXT:    it ne
299; CHECK-LE-NEXT:    mvnne r4, #1
300; CHECK-LE-NEXT:    bfi r4, r1, #0, #1
301; CHECK-LE-NEXT:    and r1, r4, #3
302; CHECK-LE-NEXT:    lsls r2, r4, #31
303; CHECK-LE-NEXT:    itt ne
304; CHECK-LE-NEXT:    vmovne r2, r3, d2
305; CHECK-LE-NEXT:    strdne r2, r3, [r0]
306; CHECK-LE-NEXT:    lsls r1, r1, #30
307; CHECK-LE-NEXT:    itt mi
308; CHECK-LE-NEXT:    vmovmi r1, r2, d3
309; CHECK-LE-NEXT:    strdmi r1, r2, [r0, #8]
310; CHECK-LE-NEXT:    add sp, #4
311; CHECK-LE-NEXT:    pop {r4, r5, r7, pc}
312;
313; CHECK-BE-LABEL: foo_sext_v2i64_v2i32_unaligned:
314; CHECK-BE:       @ %bb.0: @ %entry
315; CHECK-BE-NEXT:    .save {r4, r5, r7, lr}
316; CHECK-BE-NEXT:    push {r4, r5, r7, lr}
317; CHECK-BE-NEXT:    .pad #4
318; CHECK-BE-NEXT:    sub sp, #4
319; CHECK-BE-NEXT:    ldrd r12, lr, [r1]
320; CHECK-BE-NEXT:    rsbs.w r1, lr, #0
321; CHECK-BE-NEXT:    mov.w r3, #0
322; CHECK-BE-NEXT:    sbcs.w r1, r3, lr, asr #31
323; CHECK-BE-NEXT:    vmov.32 q0[1], r12
324; CHECK-BE-NEXT:    @ implicit-def: $q2
325; CHECK-BE-NEXT:    vmov.32 q0[3], lr
326; CHECK-BE-NEXT:    mov.w lr, #0
327; CHECK-BE-NEXT:    it lt
328; CHECK-BE-NEXT:    movlt.w lr, #1
329; CHECK-BE-NEXT:    rsbs.w r1, r12, #0
330; CHECK-BE-NEXT:    sbcs.w r1, r3, r12, asr #31
331; CHECK-BE-NEXT:    it lt
332; CHECK-BE-NEXT:    movlt r3, #1
333; CHECK-BE-NEXT:    cmp r3, #0
334; CHECK-BE-NEXT:    it ne
335; CHECK-BE-NEXT:    mvnne r3, #1
336; CHECK-BE-NEXT:    bfi r3, lr, #0, #1
337; CHECK-BE-NEXT:    and r1, r3, #3
338; CHECK-BE-NEXT:    lsls r3, r3, #31
339; CHECK-BE-NEXT:    beq .LBB6_2
340; CHECK-BE-NEXT:  @ %bb.1: @ %cond.load
341; CHECK-BE-NEXT:    ldr r3, [r2]
342; CHECK-BE-NEXT:    vmov.32 q1[1], r3
343; CHECK-BE-NEXT:    vrev64.32 q2, q1
344; CHECK-BE-NEXT:  .LBB6_2: @ %else
345; CHECK-BE-NEXT:    vrev64.32 q1, q0
346; CHECK-BE-NEXT:    lsls r1, r1, #30
347; CHECK-BE-NEXT:    bpl .LBB6_4
348; CHECK-BE-NEXT:  @ %bb.3: @ %cond.load1
349; CHECK-BE-NEXT:    ldr r1, [r2, #4]
350; CHECK-BE-NEXT:    vrev64.32 q0, q2
351; CHECK-BE-NEXT:    vmov.32 q0[3], r1
352; CHECK-BE-NEXT:    vrev64.32 q2, q0
353; CHECK-BE-NEXT:  .LBB6_4: @ %else2
354; CHECK-BE-NEXT:    vrev64.32 q0, q2
355; CHECK-BE-NEXT:    vrev64.32 q2, q1
356; CHECK-BE-NEXT:    vmov r2, s11
357; CHECK-BE-NEXT:    movs r4, #0
358; CHECK-BE-NEXT:    vmov r3, s1
359; CHECK-BE-NEXT:    vmov r1, s3
360; CHECK-BE-NEXT:    rsbs r5, r2, #0
361; CHECK-BE-NEXT:    sbcs.w r2, r4, r2, asr #31
362; CHECK-BE-NEXT:    vmov r2, s9
363; CHECK-BE-NEXT:    asr.w lr, r3, #31
364; CHECK-BE-NEXT:    vmov.32 q1[0], lr
365; CHECK-BE-NEXT:    asr.w r12, r1, #31
366; CHECK-BE-NEXT:    vmov.32 q1[1], r3
367; CHECK-BE-NEXT:    vmov.32 q1[2], r12
368; CHECK-BE-NEXT:    vmov.32 q1[3], r1
369; CHECK-BE-NEXT:    mov.w r1, #0
370; CHECK-BE-NEXT:    it lt
371; CHECK-BE-NEXT:    movlt r1, #1
372; CHECK-BE-NEXT:    vrev64.32 q0, q1
373; CHECK-BE-NEXT:    rsbs r3, r2, #0
374; CHECK-BE-NEXT:    sbcs.w r2, r4, r2, asr #31
375; CHECK-BE-NEXT:    it lt
376; CHECK-BE-NEXT:    movlt r4, #1
377; CHECK-BE-NEXT:    cmp r4, #0
378; CHECK-BE-NEXT:    it ne
379; CHECK-BE-NEXT:    mvnne r4, #1
380; CHECK-BE-NEXT:    bfi r4, r1, #0, #1
381; CHECK-BE-NEXT:    and r1, r4, #3
382; CHECK-BE-NEXT:    lsls r2, r4, #31
383; CHECK-BE-NEXT:    itt ne
384; CHECK-BE-NEXT:    vmovne r2, r3, d0
385; CHECK-BE-NEXT:    strdne r3, r2, [r0]
386; CHECK-BE-NEXT:    lsls r1, r1, #30
387; CHECK-BE-NEXT:    itt mi
388; CHECK-BE-NEXT:    vmovmi r1, r2, d1
389; CHECK-BE-NEXT:    strdmi r2, r1, [r0, #8]
390; CHECK-BE-NEXT:    add sp, #4
391; CHECK-BE-NEXT:    pop {r4, r5, r7, pc}
392entry:
393  %0 = load <2 x i32>, <2 x i32>* %mask, align 4
394  %1 = icmp sgt <2 x i32> %0, zeroinitializer
395  %2 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %src, i32 2, <2 x i1> %1, <2 x i32> undef)
396  %3 = sext <2 x i32> %2 to <2 x i64>
397  call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %3, <2 x i64>* %dest, i32 4, <2 x i1> %1)
398  ret void
399}
400
401define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> *%src) {
402; CHECK-LE-LABEL: foo_zext_v2i64_v2i32:
403; CHECK-LE:       @ %bb.0: @ %entry
404; CHECK-LE-NEXT:    .save {r7, lr}
405; CHECK-LE-NEXT:    push {r7, lr}
406; CHECK-LE-NEXT:    .pad #4
407; CHECK-LE-NEXT:    sub sp, #4
408; CHECK-LE-NEXT:    ldrd lr, r12, [r1]
409; CHECK-LE-NEXT:    movs r1, #0
410; CHECK-LE-NEXT:    @ implicit-def: $q1
411; CHECK-LE-NEXT:    vmov.i64 q2, #0xffffffff
412; CHECK-LE-NEXT:    rsbs.w r3, lr, #0
413; CHECK-LE-NEXT:    vmov.32 q0[0], lr
414; CHECK-LE-NEXT:    sbcs.w r3, r1, lr, asr #31
415; CHECK-LE-NEXT:    mov.w lr, #0
416; CHECK-LE-NEXT:    it lt
417; CHECK-LE-NEXT:    movlt.w lr, #1
418; CHECK-LE-NEXT:    rsbs.w r3, r12, #0
419; CHECK-LE-NEXT:    sbcs.w r3, r1, r12, asr #31
420; CHECK-LE-NEXT:    it lt
421; CHECK-LE-NEXT:    movlt r1, #1
422; CHECK-LE-NEXT:    cmp r1, #0
423; CHECK-LE-NEXT:    it ne
424; CHECK-LE-NEXT:    mvnne r1, #1
425; CHECK-LE-NEXT:    bfi r1, lr, #0, #1
426; CHECK-LE-NEXT:    vmov.32 q0[2], r12
427; CHECK-LE-NEXT:    and r3, r1, #3
428; CHECK-LE-NEXT:    mov.w r12, #0
429; CHECK-LE-NEXT:    lsls r1, r1, #31
430; CHECK-LE-NEXT:    itt ne
431; CHECK-LE-NEXT:    ldrne r1, [r2]
432; CHECK-LE-NEXT:    vmovne.32 q1[0], r1
433; CHECK-LE-NEXT:    lsls r1, r3, #30
434; CHECK-LE-NEXT:    itt mi
435; CHECK-LE-NEXT:    ldrmi r1, [r2, #4]
436; CHECK-LE-NEXT:    vmovmi.32 q1[2], r1
437; CHECK-LE-NEXT:    vmov r1, s0
438; CHECK-LE-NEXT:    movs r2, #0
439; CHECK-LE-NEXT:    vand q1, q1, q2
440; CHECK-LE-NEXT:    rsbs r3, r1, #0
441; CHECK-LE-NEXT:    vmov r3, s2
442; CHECK-LE-NEXT:    sbcs.w r1, r2, r1, asr #31
443; CHECK-LE-NEXT:    it lt
444; CHECK-LE-NEXT:    movlt.w r12, #1
445; CHECK-LE-NEXT:    rsbs r1, r3, #0
446; CHECK-LE-NEXT:    sbcs.w r1, r2, r3, asr #31
447; CHECK-LE-NEXT:    it lt
448; CHECK-LE-NEXT:    movlt r2, #1
449; CHECK-LE-NEXT:    cmp r2, #0
450; CHECK-LE-NEXT:    it ne
451; CHECK-LE-NEXT:    mvnne r2, #1
452; CHECK-LE-NEXT:    bfi r2, r12, #0, #1
453; CHECK-LE-NEXT:    and r1, r2, #3
454; CHECK-LE-NEXT:    lsls r2, r2, #31
455; CHECK-LE-NEXT:    it ne
456; CHECK-LE-NEXT:    vstrne d2, [r0]
457; CHECK-LE-NEXT:    lsls r1, r1, #30
458; CHECK-LE-NEXT:    it mi
459; CHECK-LE-NEXT:    vstrmi d3, [r0, #8]
460; CHECK-LE-NEXT:    add sp, #4
461; CHECK-LE-NEXT:    pop {r7, pc}
462;
463; CHECK-BE-LABEL: foo_zext_v2i64_v2i32:
464; CHECK-BE:       @ %bb.0: @ %entry
465; CHECK-BE-NEXT:    .save {r7, lr}
466; CHECK-BE-NEXT:    push {r7, lr}
467; CHECK-BE-NEXT:    .pad #4
468; CHECK-BE-NEXT:    sub sp, #4
469; CHECK-BE-NEXT:    ldrd r12, lr, [r1]
470; CHECK-BE-NEXT:    rsbs.w r1, lr, #0
471; CHECK-BE-NEXT:    mov.w r3, #0
472; CHECK-BE-NEXT:    sbcs.w r1, r3, lr, asr #31
473; CHECK-BE-NEXT:    vmov.32 q0[1], r12
474; CHECK-BE-NEXT:    @ implicit-def: $q1
475; CHECK-BE-NEXT:    vmov.32 q0[3], lr
476; CHECK-BE-NEXT:    mov.w lr, #0
477; CHECK-BE-NEXT:    it lt
478; CHECK-BE-NEXT:    movlt.w lr, #1
479; CHECK-BE-NEXT:    rsbs.w r1, r12, #0
480; CHECK-BE-NEXT:    sbcs.w r1, r3, r12, asr #31
481; CHECK-BE-NEXT:    it lt
482; CHECK-BE-NEXT:    movlt r3, #1
483; CHECK-BE-NEXT:    cmp r3, #0
484; CHECK-BE-NEXT:    it ne
485; CHECK-BE-NEXT:    mvnne r3, #1
486; CHECK-BE-NEXT:    bfi r3, lr, #0, #1
487; CHECK-BE-NEXT:    and r1, r3, #3
488; CHECK-BE-NEXT:    lsls r3, r3, #31
489; CHECK-BE-NEXT:    beq .LBB7_2
490; CHECK-BE-NEXT:  @ %bb.1: @ %cond.load
491; CHECK-BE-NEXT:    ldr r3, [r2]
492; CHECK-BE-NEXT:    vmov.32 q2[1], r3
493; CHECK-BE-NEXT:    vrev64.32 q1, q2
494; CHECK-BE-NEXT:  .LBB7_2: @ %else
495; CHECK-BE-NEXT:    vrev64.32 q2, q0
496; CHECK-BE-NEXT:    lsls r1, r1, #30
497; CHECK-BE-NEXT:    bpl .LBB7_4
498; CHECK-BE-NEXT:  @ %bb.3: @ %cond.load1
499; CHECK-BE-NEXT:    ldr r1, [r2, #4]
500; CHECK-BE-NEXT:    vrev64.32 q0, q1
501; CHECK-BE-NEXT:    vmov.32 q0[3], r1
502; CHECK-BE-NEXT:    vrev64.32 q1, q0
503; CHECK-BE-NEXT:  .LBB7_4: @ %else2
504; CHECK-BE-NEXT:    vrev64.32 q3, q2
505; CHECK-BE-NEXT:    movs r2, #0
506; CHECK-BE-NEXT:    vmov r1, s15
507; CHECK-BE-NEXT:    mov.w r12, #0
508; CHECK-BE-NEXT:    vmov.i64 q0, #0xffffffff
509; CHECK-BE-NEXT:    vand q0, q1, q0
510; CHECK-BE-NEXT:    rsbs r3, r1, #0
511; CHECK-BE-NEXT:    vmov r3, s13
512; CHECK-BE-NEXT:    sbcs.w r1, r2, r1, asr #31
513; CHECK-BE-NEXT:    it lt
514; CHECK-BE-NEXT:    movlt.w r12, #1
515; CHECK-BE-NEXT:    rsbs r1, r3, #0
516; CHECK-BE-NEXT:    sbcs.w r1, r2, r3, asr #31
517; CHECK-BE-NEXT:    it lt
518; CHECK-BE-NEXT:    movlt r2, #1
519; CHECK-BE-NEXT:    cmp r2, #0
520; CHECK-BE-NEXT:    it ne
521; CHECK-BE-NEXT:    mvnne r2, #1
522; CHECK-BE-NEXT:    bfi r2, r12, #0, #1
523; CHECK-BE-NEXT:    and r1, r2, #3
524; CHECK-BE-NEXT:    lsls r2, r2, #31
525; CHECK-BE-NEXT:    it ne
526; CHECK-BE-NEXT:    vstrne d0, [r0]
527; CHECK-BE-NEXT:    lsls r1, r1, #30
528; CHECK-BE-NEXT:    it mi
529; CHECK-BE-NEXT:    vstrmi d1, [r0, #8]
530; CHECK-BE-NEXT:    add sp, #4
531; CHECK-BE-NEXT:    pop {r7, pc}
532entry:
533  %0 = load <2 x i32>, <2 x i32>* %mask, align 4
534  %1 = icmp sgt <2 x i32> %0, zeroinitializer
535  %2 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %src, i32 4, <2 x i1> %1, <2 x i32> undef)
536  %3 = zext <2 x i32> %2 to <2 x i64>
537  call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %3, <2 x i64>* %dest, i32 8, <2 x i1> %1)
538  ret void
539}
540
541define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> *%src) {
542; CHECK-LE-LABEL: foo_zext_v2i64_v2i32_unaligned:
543; CHECK-LE:       @ %bb.0: @ %entry
544; CHECK-LE-NEXT:    .save {r7, lr}
545; CHECK-LE-NEXT:    push {r7, lr}
546; CHECK-LE-NEXT:    .pad #4
547; CHECK-LE-NEXT:    sub sp, #4
548; CHECK-LE-NEXT:    ldrd lr, r12, [r1]
549; CHECK-LE-NEXT:    movs r1, #0
550; CHECK-LE-NEXT:    @ implicit-def: $q1
551; CHECK-LE-NEXT:    vmov.i64 q2, #0xffffffff
552; CHECK-LE-NEXT:    rsbs.w r3, lr, #0
553; CHECK-LE-NEXT:    vmov.32 q0[0], lr
554; CHECK-LE-NEXT:    sbcs.w r3, r1, lr, asr #31
555; CHECK-LE-NEXT:    mov.w lr, #0
556; CHECK-LE-NEXT:    it lt
557; CHECK-LE-NEXT:    movlt.w lr, #1
558; CHECK-LE-NEXT:    rsbs.w r3, r12, #0
559; CHECK-LE-NEXT:    sbcs.w r3, r1, r12, asr #31
560; CHECK-LE-NEXT:    it lt
561; CHECK-LE-NEXT:    movlt r1, #1
562; CHECK-LE-NEXT:    cmp r1, #0
563; CHECK-LE-NEXT:    it ne
564; CHECK-LE-NEXT:    mvnne r1, #1
565; CHECK-LE-NEXT:    bfi r1, lr, #0, #1
566; CHECK-LE-NEXT:    vmov.32 q0[2], r12
567; CHECK-LE-NEXT:    and r3, r1, #3
568; CHECK-LE-NEXT:    mov.w r12, #0
569; CHECK-LE-NEXT:    lsls r1, r1, #31
570; CHECK-LE-NEXT:    itt ne
571; CHECK-LE-NEXT:    ldrne r1, [r2]
572; CHECK-LE-NEXT:    vmovne.32 q1[0], r1
573; CHECK-LE-NEXT:    lsls r1, r3, #30
574; CHECK-LE-NEXT:    itt mi
575; CHECK-LE-NEXT:    ldrmi r1, [r2, #4]
576; CHECK-LE-NEXT:    vmovmi.32 q1[2], r1
577; CHECK-LE-NEXT:    vmov r1, s0
578; CHECK-LE-NEXT:    movs r2, #0
579; CHECK-LE-NEXT:    vand q1, q1, q2
580; CHECK-LE-NEXT:    rsbs r3, r1, #0
581; CHECK-LE-NEXT:    vmov r3, s2
582; CHECK-LE-NEXT:    sbcs.w r1, r2, r1, asr #31
583; CHECK-LE-NEXT:    it lt
584; CHECK-LE-NEXT:    movlt.w r12, #1
585; CHECK-LE-NEXT:    rsbs r1, r3, #0
586; CHECK-LE-NEXT:    sbcs.w r1, r2, r3, asr #31
587; CHECK-LE-NEXT:    it lt
588; CHECK-LE-NEXT:    movlt r2, #1
589; CHECK-LE-NEXT:    cmp r2, #0
590; CHECK-LE-NEXT:    it ne
591; CHECK-LE-NEXT:    mvnne r2, #1
592; CHECK-LE-NEXT:    bfi r2, r12, #0, #1
593; CHECK-LE-NEXT:    and r1, r2, #3
594; CHECK-LE-NEXT:    lsls r2, r2, #31
595; CHECK-LE-NEXT:    itt ne
596; CHECK-LE-NEXT:    vmovne r2, r3, d2
597; CHECK-LE-NEXT:    strdne r2, r3, [r0]
598; CHECK-LE-NEXT:    lsls r1, r1, #30
599; CHECK-LE-NEXT:    itt mi
600; CHECK-LE-NEXT:    vmovmi r1, r2, d3
601; CHECK-LE-NEXT:    strdmi r1, r2, [r0, #8]
602; CHECK-LE-NEXT:    add sp, #4
603; CHECK-LE-NEXT:    pop {r7, pc}
604;
605; CHECK-BE-LABEL: foo_zext_v2i64_v2i32_unaligned:
606; CHECK-BE:       @ %bb.0: @ %entry
607; CHECK-BE-NEXT:    .save {r7, lr}
608; CHECK-BE-NEXT:    push {r7, lr}
609; CHECK-BE-NEXT:    .pad #4
610; CHECK-BE-NEXT:    sub sp, #4
611; CHECK-BE-NEXT:    ldrd r12, lr, [r1]
612; CHECK-BE-NEXT:    rsbs.w r1, lr, #0
613; CHECK-BE-NEXT:    mov.w r3, #0
614; CHECK-BE-NEXT:    sbcs.w r1, r3, lr, asr #31
615; CHECK-BE-NEXT:    vmov.32 q0[1], r12
616; CHECK-BE-NEXT:    @ implicit-def: $q1
617; CHECK-BE-NEXT:    vmov.32 q0[3], lr
618; CHECK-BE-NEXT:    mov.w lr, #0
619; CHECK-BE-NEXT:    it lt
620; CHECK-BE-NEXT:    movlt.w lr, #1
621; CHECK-BE-NEXT:    rsbs.w r1, r12, #0
622; CHECK-BE-NEXT:    sbcs.w r1, r3, r12, asr #31
623; CHECK-BE-NEXT:    it lt
624; CHECK-BE-NEXT:    movlt r3, #1
625; CHECK-BE-NEXT:    cmp r3, #0
626; CHECK-BE-NEXT:    it ne
627; CHECK-BE-NEXT:    mvnne r3, #1
628; CHECK-BE-NEXT:    bfi r3, lr, #0, #1
629; CHECK-BE-NEXT:    and r1, r3, #3
630; CHECK-BE-NEXT:    lsls r3, r3, #31
631; CHECK-BE-NEXT:    beq .LBB8_2
632; CHECK-BE-NEXT:  @ %bb.1: @ %cond.load
633; CHECK-BE-NEXT:    ldr r3, [r2]
634; CHECK-BE-NEXT:    vmov.32 q2[1], r3
635; CHECK-BE-NEXT:    vrev64.32 q1, q2
636; CHECK-BE-NEXT:  .LBB8_2: @ %else
637; CHECK-BE-NEXT:    vrev64.32 q2, q0
638; CHECK-BE-NEXT:    lsls r1, r1, #30
639; CHECK-BE-NEXT:    bpl .LBB8_4
640; CHECK-BE-NEXT:  @ %bb.3: @ %cond.load1
641; CHECK-BE-NEXT:    ldr r1, [r2, #4]
642; CHECK-BE-NEXT:    vrev64.32 q0, q1
643; CHECK-BE-NEXT:    vmov.32 q0[3], r1
644; CHECK-BE-NEXT:    vrev64.32 q1, q0
645; CHECK-BE-NEXT:  .LBB8_4: @ %else2
646; CHECK-BE-NEXT:    vrev64.32 q3, q2
647; CHECK-BE-NEXT:    movs r2, #0
648; CHECK-BE-NEXT:    vmov r1, s15
649; CHECK-BE-NEXT:    mov.w r12, #0
650; CHECK-BE-NEXT:    vmov.i64 q0, #0xffffffff
651; CHECK-BE-NEXT:    vand q0, q1, q0
652; CHECK-BE-NEXT:    rsbs r3, r1, #0
653; CHECK-BE-NEXT:    vmov r3, s13
654; CHECK-BE-NEXT:    sbcs.w r1, r2, r1, asr #31
655; CHECK-BE-NEXT:    it lt
656; CHECK-BE-NEXT:    movlt.w r12, #1
657; CHECK-BE-NEXT:    rsbs r1, r3, #0
658; CHECK-BE-NEXT:    sbcs.w r1, r2, r3, asr #31
659; CHECK-BE-NEXT:    it lt
660; CHECK-BE-NEXT:    movlt r2, #1
661; CHECK-BE-NEXT:    cmp r2, #0
662; CHECK-BE-NEXT:    it ne
663; CHECK-BE-NEXT:    mvnne r2, #1
664; CHECK-BE-NEXT:    bfi r2, r12, #0, #1
665; CHECK-BE-NEXT:    and r1, r2, #3
666; CHECK-BE-NEXT:    lsls r2, r2, #31
667; CHECK-BE-NEXT:    itt ne
668; CHECK-BE-NEXT:    vmovne r2, r3, d0
669; CHECK-BE-NEXT:    strdne r3, r2, [r0]
670; CHECK-BE-NEXT:    lsls r1, r1, #30
671; CHECK-BE-NEXT:    itt mi
672; CHECK-BE-NEXT:    vmovmi r1, r2, d1
673; CHECK-BE-NEXT:    strdmi r2, r1, [r0, #8]
674; CHECK-BE-NEXT:    add sp, #4
675; CHECK-BE-NEXT:    pop {r7, pc}
676entry:
677  %0 = load <2 x i32>, <2 x i32>* %mask, align 4
678  %1 = icmp sgt <2 x i32> %0, zeroinitializer
679  %2 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %src, i32 2, <2 x i1> %1, <2 x i32> undef)
680  %3 = zext <2 x i32> %2 to <2 x i64>
681  call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %3, <2 x i64>* %dest, i32 4, <2 x i1> %1)
682  ret void
683}
684
685define void @foo_v8i16_v8i16(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i16> *%src) {
686; CHECK-LABEL: foo_v8i16_v8i16:
687; CHECK:       @ %bb.0: @ %entry
688; CHECK-NEXT:    vldrh.u16 q0, [r1]
689; CHECK-NEXT:    vptt.s16 gt, q0, zr
690; CHECK-NEXT:    vldrht.u16 q0, [r2]
691; CHECK-NEXT:    vstrht.16 q0, [r0]
692; CHECK-NEXT:    bx lr
693entry:
694  %0 = load <8 x i16>, <8 x i16>* %mask, align 2
695  %1 = icmp sgt <8 x i16> %0, zeroinitializer
696  %2 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %src, i32 2, <8 x i1> %1, <8 x i16> undef)
697  call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %2, <8 x i16>* %dest, i32 2, <8 x i1> %1)
698  ret void
699}
700
701define void @foo_sext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *%src) {
702; CHECK-LABEL: foo_sext_v8i16_v8i8:
703; CHECK:       @ %bb.0: @ %entry
704; CHECK-NEXT:    vldrh.u16 q0, [r1]
705; CHECK-NEXT:    vptt.s16 gt, q0, zr
706; CHECK-NEXT:    vldrbt.s16 q0, [r2]
707; CHECK-NEXT:    vstrht.16 q0, [r0]
708; CHECK-NEXT:    bx lr
709entry:
710  %0 = load <8 x i16>, <8 x i16>* %mask, align 2
711  %1 = icmp sgt <8 x i16> %0, zeroinitializer
712  %2 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %src, i32 1, <8 x i1> %1, <8 x i8> undef)
713  %3 = sext <8 x i8> %2 to <8 x i16>
714  call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %3, <8 x i16>* %dest, i32 2, <8 x i1> %1)
715  ret void
716}
717
718define void @foo_zext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *%src) {
719; CHECK-LABEL: foo_zext_v8i16_v8i8:
720; CHECK:       @ %bb.0: @ %entry
721; CHECK-NEXT:    vldrh.u16 q0, [r1]
722; CHECK-NEXT:    vptt.s16 gt, q0, zr
723; CHECK-NEXT:    vldrbt.u16 q0, [r2]
724; CHECK-NEXT:    vstrht.16 q0, [r0]
725; CHECK-NEXT:    bx lr
726entry:
727  %0 = load <8 x i16>, <8 x i16>* %mask, align 2
728  %1 = icmp sgt <8 x i16> %0, zeroinitializer
729  %2 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %src, i32 1, <8 x i1> %1, <8 x i8> undef)
730  %3 = zext <8 x i8> %2 to <8 x i16>
731  call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %3, <8 x i16>* %dest, i32 2, <8 x i1> %1)
732  ret void
733}
734
735define void @foo_v16i8_v16i8(<16 x i8> *%dest, <16 x i8> *%mask, <16 x i8> *%src) {
736; CHECK-LABEL: foo_v16i8_v16i8:
737; CHECK:       @ %bb.0: @ %entry
738; CHECK-NEXT:    vldrb.u8 q0, [r1]
739; CHECK-NEXT:    vptt.s8 gt, q0, zr
740; CHECK-NEXT:    vldrbt.u8 q0, [r2]
741; CHECK-NEXT:    vstrbt.8 q0, [r0]
742; CHECK-NEXT:    bx lr
743entry:
744  %0 = load <16 x i8>, <16 x i8>* %mask, align 1
745  %1 = icmp sgt <16 x i8> %0, zeroinitializer
746  %2 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %src, i32 1, <16 x i1> %1, <16 x i8> undef)
747  call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %2, <16 x i8>* %dest, i32 1, <16 x i1> %1)
748  ret void
749}
750
751define void @foo_trunc_v8i8_v8i16(<8 x i8> *%dest, <8 x i16> *%mask, <8 x i16> *%src) {
752; CHECK-LABEL: foo_trunc_v8i8_v8i16:
753; CHECK:       @ %bb.0: @ %entry
754; CHECK-NEXT:    vldrh.u16 q0, [r1]
755; CHECK-NEXT:    vptt.s16 gt, q0, zr
756; CHECK-NEXT:    vldrht.u16 q0, [r2]
757; CHECK-NEXT:    vstrbt.16 q0, [r0]
758; CHECK-NEXT:    bx lr
759entry:
760  %0 = load <8 x i16>, <8 x i16>* %mask, align 2
761  %1 = icmp sgt <8 x i16> %0, zeroinitializer
762  %2 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %src, i32 2, <8 x i1> %1, <8 x i16> undef)
763  %3 = trunc <8 x i16> %2 to <8 x i8>
764  call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %3, <8 x i8>* %dest, i32 1, <8 x i1> %1)
765  ret void
766}
767
768define void @foo_trunc_v4i8_v4i32(<4 x i8> *%dest, <4 x i32> *%mask, <4 x i32> *%src) {
769; CHECK-LABEL: foo_trunc_v4i8_v4i32:
770; CHECK:       @ %bb.0: @ %entry
771; CHECK-NEXT:    vldrw.u32 q0, [r1]
772; CHECK-NEXT:    vptt.s32 gt, q0, zr
773; CHECK-NEXT:    vldrwt.u32 q0, [r2]
774; CHECK-NEXT:    vstrbt.32 q0, [r0]
775; CHECK-NEXT:    bx lr
776entry:
777  %0 = load <4 x i32>, <4 x i32>* %mask, align 4
778  %1 = icmp sgt <4 x i32> %0, zeroinitializer
779  %2 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef)
780  %3 = trunc <4 x i32> %2 to <4 x i8>
781  call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %3, <4 x i8>* %dest, i32 1, <4 x i1> %1)
782  ret void
783}
784
785define void @foo_trunc_v4i16_v4i32(<4 x i16> *%dest, <4 x i32> *%mask, <4 x i32> *%src) {
786; CHECK-LABEL: foo_trunc_v4i16_v4i32:
787; CHECK:       @ %bb.0: @ %entry
788; CHECK-NEXT:    vldrw.u32 q0, [r1]
789; CHECK-NEXT:    vptt.s32 gt, q0, zr
790; CHECK-NEXT:    vldrwt.u32 q0, [r2]
791; CHECK-NEXT:    vstrht.32 q0, [r0]
792; CHECK-NEXT:    bx lr
793entry:
794  %0 = load <4 x i32>, <4 x i32>* %mask, align 4
795  %1 = icmp sgt <4 x i32> %0, zeroinitializer
796  %2 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef)
797  %3 = trunc <4 x i32> %2 to <4 x i16>
798  call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %3, <4 x i16>* %dest, i32 2, <4 x i1> %1)
799  ret void
800}
801
802define void @foo_v4f32_v4f32(<4 x float> *%dest, <4 x i32> *%mask, <4 x float> *%src) {
803; CHECK-LABEL: foo_v4f32_v4f32:
804; CHECK:       @ %bb.0: @ %entry
805; CHECK-NEXT:    vldrw.u32 q0, [r1]
806; CHECK-NEXT:    vptt.s32 gt, q0, zr
807; CHECK-NEXT:    vldrwt.u32 q0, [r2]
808; CHECK-NEXT:    vstrwt.32 q0, [r0]
809; CHECK-NEXT:    bx lr
810entry:
811  %0 = load <4 x i32>, <4 x i32>* %mask, align 4
812  %1 = icmp sgt <4 x i32> %0, zeroinitializer
813  %2 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %src, i32 4, <4 x i1> %1, <4 x float> undef)
814  call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %2, <4 x float>* %dest, i32 4, <4 x i1> %1)
815  ret void
816}
817
818define void @foo_v8f16_v8f16(<8 x half> *%dest, <8 x i16> *%mask, <8 x half> *%src) {
819; CHECK-LABEL: foo_v8f16_v8f16:
820; CHECK:       @ %bb.0: @ %entry
821; CHECK-NEXT:    vldrh.u16 q0, [r1]
822; CHECK-NEXT:    vptt.s16 gt, q0, zr
823; CHECK-NEXT:    vldrht.u16 q0, [r2]
824; CHECK-NEXT:    vstrht.16 q0, [r0]
825; CHECK-NEXT:    bx lr
826entry:
827  %0 = load <8 x i16>, <8 x i16>* %mask, align 2
828  %1 = icmp sgt <8 x i16> %0, zeroinitializer
829  %2 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %src, i32 2, <8 x i1> %1, <8 x half> undef)
830  call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %2, <8 x half>* %dest, i32 2, <8 x i1> %1)
831  ret void
832}
833
834define void @foo_v4f32_v4f16(<4 x float> *%dest, <4 x i16> *%mask, <4 x half> *%src) {
835; CHECK-LABEL: foo_v4f32_v4f16:
836; CHECK:       @ %bb.0: @ %entry
837; CHECK-NEXT:    .save {r7, lr}
838; CHECK-NEXT:    push {r7, lr}
839; CHECK-NEXT:    .pad #8
840; CHECK-NEXT:    sub sp, #8
841; CHECK-NEXT:    vldrh.s32 q0, [r1]
842; CHECK-NEXT:    mov.w lr, #0
843; CHECK-NEXT:    @ implicit-def: $q1
844; CHECK-NEXT:    vcmp.s32 gt, q0, zr
845; CHECK-NEXT:    vmrs r3, p0
846; CHECK-NEXT:    and r1, r3, #1
847; CHECK-NEXT:    rsb.w r12, r1, #0
848; CHECK-NEXT:    ubfx r1, r3, #4, #1
849; CHECK-NEXT:    bfi lr, r12, #0, #1
850; CHECK-NEXT:    rsbs r1, r1, #0
851; CHECK-NEXT:    bfi lr, r1, #1, #1
852; CHECK-NEXT:    ubfx r1, r3, #8, #1
853; CHECK-NEXT:    rsbs r1, r1, #0
854; CHECK-NEXT:    bfi lr, r1, #2, #1
855; CHECK-NEXT:    ubfx r1, r3, #12, #1
856; CHECK-NEXT:    rsbs r1, r1, #0
857; CHECK-NEXT:    bfi lr, r1, #3, #1
858; CHECK-NEXT:    lsls.w r1, lr, #31
859; CHECK-NEXT:    beq .LBB18_2
860; CHECK-NEXT:  @ %bb.1: @ %cond.load
861; CHECK-NEXT:    vldr.16 s4, [r2]
862; CHECK-NEXT:  .LBB18_2: @ %else
863; CHECK-NEXT:    lsls.w r1, lr, #30
864; CHECK-NEXT:    bpl .LBB18_6
865; CHECK-NEXT:  @ %bb.3: @ %cond.load1
866; CHECK-NEXT:    vldr.16 s0, [r2, #2]
867; CHECK-NEXT:    vmov r3, s4
868; CHECK-NEXT:    vmovx.f16 s4, s5
869; CHECK-NEXT:    vmov r1, s0
870; CHECK-NEXT:    vmov.16 q0[0], r3
871; CHECK-NEXT:    vmov.16 q0[1], r1
872; CHECK-NEXT:    vmov r1, s5
873; CHECK-NEXT:    vmov.16 q0[2], r1
874; CHECK-NEXT:    vmov r1, s4
875; CHECK-NEXT:    vmov.16 q0[3], r1
876; CHECK-NEXT:    lsls.w r1, lr, #29
877; CHECK-NEXT:    bmi .LBB18_7
878; CHECK-NEXT:  .LBB18_4:
879; CHECK-NEXT:    vmov q1, q0
880; CHECK-NEXT:    lsls.w r1, lr, #28
881; CHECK-NEXT:    bmi .LBB18_8
882; CHECK-NEXT:  .LBB18_5:
883; CHECK-NEXT:    vmov q2, q1
884; CHECK-NEXT:    b .LBB18_9
885; CHECK-NEXT:  .LBB18_6:
886; CHECK-NEXT:    vmov q0, q1
887; CHECK-NEXT:    lsls.w r1, lr, #29
888; CHECK-NEXT:    bpl .LBB18_4
889; CHECK-NEXT:  .LBB18_7: @ %cond.load4
890; CHECK-NEXT:    vmovx.f16 s4, s0
891; CHECK-NEXT:    vmov r1, s0
892; CHECK-NEXT:    vldr.16 s8, [r2, #4]
893; CHECK-NEXT:    vmov r3, s4
894; CHECK-NEXT:    vmov.16 q1[0], r1
895; CHECK-NEXT:    vmovx.f16 s0, s1
896; CHECK-NEXT:    vmov.16 q1[1], r3
897; CHECK-NEXT:    vmov r1, s8
898; CHECK-NEXT:    vmov.16 q1[2], r1
899; CHECK-NEXT:    vmov r1, s0
900; CHECK-NEXT:    vmov.16 q1[3], r1
901; CHECK-NEXT:    lsls.w r1, lr, #28
902; CHECK-NEXT:    bpl .LBB18_5
903; CHECK-NEXT:  .LBB18_8: @ %cond.load7
904; CHECK-NEXT:    vmovx.f16 s0, s4
905; CHECK-NEXT:    vmov r3, s4
906; CHECK-NEXT:    vmov r1, s0
907; CHECK-NEXT:    vmov.16 q2[0], r3
908; CHECK-NEXT:    vldr.16 s0, [r2, #6]
909; CHECK-NEXT:    vmov.16 q2[1], r1
910; CHECK-NEXT:    vmov r1, s5
911; CHECK-NEXT:    vmov.16 q2[2], r1
912; CHECK-NEXT:    vmov r1, s0
913; CHECK-NEXT:    vmov.16 q2[3], r1
914; CHECK-NEXT:  .LBB18_9: @ %else8
915; CHECK-NEXT:    vmrs r2, p0
916; CHECK-NEXT:    movs r1, #0
917; CHECK-NEXT:    vcvtt.f32.f16 s3, s9
918; CHECK-NEXT:    vcvtb.f32.f16 s2, s9
919; CHECK-NEXT:    vcvtt.f32.f16 s1, s8
920; CHECK-NEXT:    vcvtb.f32.f16 s0, s8
921; CHECK-NEXT:    and r3, r2, #1
922; CHECK-NEXT:    rsbs r3, r3, #0
923; CHECK-NEXT:    bfi r1, r3, #0, #1
924; CHECK-NEXT:    ubfx r3, r2, #4, #1
925; CHECK-NEXT:    rsbs r3, r3, #0
926; CHECK-NEXT:    bfi r1, r3, #1, #1
927; CHECK-NEXT:    ubfx r3, r2, #8, #1
928; CHECK-NEXT:    ubfx r2, r2, #12, #1
929; CHECK-NEXT:    rsbs r3, r3, #0
930; CHECK-NEXT:    bfi r1, r3, #2, #1
931; CHECK-NEXT:    rsbs r2, r2, #0
932; CHECK-NEXT:    bfi r1, r2, #3, #1
933; CHECK-NEXT:    lsls r2, r1, #31
934; CHECK-NEXT:    itt ne
935; CHECK-NEXT:    vmovne r2, s0
936; CHECK-NEXT:    strne r2, [r0]
937; CHECK-NEXT:    lsls r2, r1, #30
938; CHECK-NEXT:    itt mi
939; CHECK-NEXT:    vmovmi r2, s1
940; CHECK-NEXT:    strmi r2, [r0, #4]
941; CHECK-NEXT:    lsls r2, r1, #29
942; CHECK-NEXT:    itt mi
943; CHECK-NEXT:    vmovmi r2, s2
944; CHECK-NEXT:    strmi r2, [r0, #8]
945; CHECK-NEXT:    lsls r1, r1, #28
946; CHECK-NEXT:    itt mi
947; CHECK-NEXT:    vmovmi r1, s3
948; CHECK-NEXT:    strmi r1, [r0, #12]
949; CHECK-NEXT:    add sp, #8
950; CHECK-NEXT:    pop {r7, pc}
951entry:
952  %0 = load <4 x i16>, <4 x i16>* %mask, align 2
953  %1 = icmp sgt <4 x i16> %0, zeroinitializer
954  %2 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* %src, i32 2, <4 x i1> %1, <4 x half> undef)
955  %3 = fpext <4 x half> %2 to <4 x float>
956  call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %dest, i32 2, <4 x i1> %1)
957  ret void
958}
959
960define void @foo_v4f32_v4f16_unaligned(<4 x float> *%dest, <4 x i16> *%mask, <4 x half> *%src) {
961; CHECK-LABEL: foo_v4f32_v4f16_unaligned:
962; CHECK:       @ %bb.0: @ %entry
963; CHECK-NEXT:    .save {r7, lr}
964; CHECK-NEXT:    push {r7, lr}
965; CHECK-NEXT:    .pad #8
966; CHECK-NEXT:    sub sp, #8
967; CHECK-NEXT:    vldrh.s32 q0, [r1]
968; CHECK-NEXT:    mov.w lr, #0
969; CHECK-NEXT:    @ implicit-def: $q1
970; CHECK-NEXT:    vcmp.s32 gt, q0, zr
971; CHECK-NEXT:    vmrs r3, p0
972; CHECK-NEXT:    and r1, r3, #1
973; CHECK-NEXT:    rsb.w r12, r1, #0
974; CHECK-NEXT:    ubfx r1, r3, #4, #1
975; CHECK-NEXT:    bfi lr, r12, #0, #1
976; CHECK-NEXT:    rsbs r1, r1, #0
977; CHECK-NEXT:    bfi lr, r1, #1, #1
978; CHECK-NEXT:    ubfx r1, r3, #8, #1
979; CHECK-NEXT:    rsbs r1, r1, #0
980; CHECK-NEXT:    bfi lr, r1, #2, #1
981; CHECK-NEXT:    ubfx r1, r3, #12, #1
982; CHECK-NEXT:    rsbs r1, r1, #0
983; CHECK-NEXT:    bfi lr, r1, #3, #1
984; CHECK-NEXT:    lsls.w r1, lr, #31
985; CHECK-NEXT:    beq .LBB19_2
986; CHECK-NEXT:  @ %bb.1: @ %cond.load
987; CHECK-NEXT:    vldr.16 s4, [r2]
988; CHECK-NEXT:  .LBB19_2: @ %else
989; CHECK-NEXT:    lsls.w r1, lr, #30
990; CHECK-NEXT:    bpl .LBB19_6
991; CHECK-NEXT:  @ %bb.3: @ %cond.load1
992; CHECK-NEXT:    vldr.16 s0, [r2, #2]
993; CHECK-NEXT:    vmov r3, s4
994; CHECK-NEXT:    vmovx.f16 s4, s5
995; CHECK-NEXT:    vmov r1, s0
996; CHECK-NEXT:    vmov.16 q0[0], r3
997; CHECK-NEXT:    vmov.16 q0[1], r1
998; CHECK-NEXT:    vmov r1, s5
999; CHECK-NEXT:    vmov.16 q0[2], r1
1000; CHECK-NEXT:    vmov r1, s4
1001; CHECK-NEXT:    vmov.16 q0[3], r1
1002; CHECK-NEXT:    lsls.w r1, lr, #29
1003; CHECK-NEXT:    bmi .LBB19_7
1004; CHECK-NEXT:  .LBB19_4:
1005; CHECK-NEXT:    vmov q1, q0
1006; CHECK-NEXT:    lsls.w r1, lr, #28
1007; CHECK-NEXT:    bmi .LBB19_8
1008; CHECK-NEXT:  .LBB19_5:
1009; CHECK-NEXT:    vmov q2, q1
1010; CHECK-NEXT:    b .LBB19_9
1011; CHECK-NEXT:  .LBB19_6:
1012; CHECK-NEXT:    vmov q0, q1
1013; CHECK-NEXT:    lsls.w r1, lr, #29
1014; CHECK-NEXT:    bpl .LBB19_4
1015; CHECK-NEXT:  .LBB19_7: @ %cond.load4
1016; CHECK-NEXT:    vmovx.f16 s4, s0
1017; CHECK-NEXT:    vmov r1, s0
1018; CHECK-NEXT:    vldr.16 s8, [r2, #4]
1019; CHECK-NEXT:    vmov r3, s4
1020; CHECK-NEXT:    vmov.16 q1[0], r1
1021; CHECK-NEXT:    vmovx.f16 s0, s1
1022; CHECK-NEXT:    vmov.16 q1[1], r3
1023; CHECK-NEXT:    vmov r1, s8
1024; CHECK-NEXT:    vmov.16 q1[2], r1
1025; CHECK-NEXT:    vmov r1, s0
1026; CHECK-NEXT:    vmov.16 q1[3], r1
1027; CHECK-NEXT:    lsls.w r1, lr, #28
1028; CHECK-NEXT:    bpl .LBB19_5
1029; CHECK-NEXT:  .LBB19_8: @ %cond.load7
1030; CHECK-NEXT:    vmovx.f16 s0, s4
1031; CHECK-NEXT:    vmov r3, s4
1032; CHECK-NEXT:    vmov r1, s0
1033; CHECK-NEXT:    vmov.16 q2[0], r3
1034; CHECK-NEXT:    vldr.16 s0, [r2, #6]
1035; CHECK-NEXT:    vmov.16 q2[1], r1
1036; CHECK-NEXT:    vmov r1, s5
1037; CHECK-NEXT:    vmov.16 q2[2], r1
1038; CHECK-NEXT:    vmov r1, s0
1039; CHECK-NEXT:    vmov.16 q2[3], r1
1040; CHECK-NEXT:  .LBB19_9: @ %else8
1041; CHECK-NEXT:    vmrs r2, p0
1042; CHECK-NEXT:    movs r1, #0
1043; CHECK-NEXT:    vcvtt.f32.f16 s3, s9
1044; CHECK-NEXT:    vcvtb.f32.f16 s2, s9
1045; CHECK-NEXT:    vcvtt.f32.f16 s1, s8
1046; CHECK-NEXT:    vcvtb.f32.f16 s0, s8
1047; CHECK-NEXT:    and r3, r2, #1
1048; CHECK-NEXT:    rsbs r3, r3, #0
1049; CHECK-NEXT:    bfi r1, r3, #0, #1
1050; CHECK-NEXT:    ubfx r3, r2, #4, #1
1051; CHECK-NEXT:    rsbs r3, r3, #0
1052; CHECK-NEXT:    bfi r1, r3, #1, #1
1053; CHECK-NEXT:    ubfx r3, r2, #8, #1
1054; CHECK-NEXT:    ubfx r2, r2, #12, #1
1055; CHECK-NEXT:    rsbs r3, r3, #0
1056; CHECK-NEXT:    bfi r1, r3, #2, #1
1057; CHECK-NEXT:    rsbs r2, r2, #0
1058; CHECK-NEXT:    bfi r1, r2, #3, #1
1059; CHECK-NEXT:    lsls r2, r1, #31
1060; CHECK-NEXT:    itt ne
1061; CHECK-NEXT:    vmovne r2, s0
1062; CHECK-NEXT:    strne r2, [r0]
1063; CHECK-NEXT:    lsls r2, r1, #30
1064; CHECK-NEXT:    itt mi
1065; CHECK-NEXT:    vmovmi r2, s1
1066; CHECK-NEXT:    strmi r2, [r0, #4]
1067; CHECK-NEXT:    lsls r2, r1, #29
1068; CHECK-NEXT:    itt mi
1069; CHECK-NEXT:    vmovmi r2, s2
1070; CHECK-NEXT:    strmi r2, [r0, #8]
1071; CHECK-NEXT:    lsls r1, r1, #28
1072; CHECK-NEXT:    itt mi
1073; CHECK-NEXT:    vmovmi r1, s3
1074; CHECK-NEXT:    strmi r1, [r0, #12]
1075; CHECK-NEXT:    add sp, #8
1076; CHECK-NEXT:    pop {r7, pc}
1077entry:
1078  %0 = load <4 x i16>, <4 x i16>* %mask, align 2
1079  %1 = icmp sgt <4 x i16> %0, zeroinitializer
1080  %2 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* %src, i32 2, <4 x i1> %1, <4 x half> undef)
1081  %3 = fpext <4 x half> %2 to <4 x float>
1082  call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %dest, i32 1, <4 x i1> %1)
1083  ret void
1084}
1085
1086declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
1087declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>)
1088declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>)
1089declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, i32, <8 x i1>)
1090declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>)
1091declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>)
1092declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>)
1093declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
1094declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
1095declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
1096declare <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>*, i32, <4 x i1>, <4 x half>)
1097declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32, <8 x i1>, <8 x half>)
1098
1099declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>)
1100declare void @llvm.masked.store.v4i8.p0v4i8(<4 x i8>, <4 x i8>*, i32, <4 x i1>)
1101declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32, <4 x i1>)
1102declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
1103declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32, <4 x i1>, <4 x i16>)
1104declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32, <4 x i1>, <4 x i8>)
1105declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>)
1106