• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
3
4define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8(i8* %base, <16 x i8>* %offptr) {
5; CHECK-LABEL: unscaled_v16i8_i8:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vldrb.u8 q1, [r1]
8; CHECK-NEXT:    vldrb.u8 q0, [r0, q1]
9; CHECK-NEXT:    bx lr
10entry:
11  %offs = load <16 x i8>, <16 x i8>* %offptr, align 1
12  %offs.zext = zext <16 x i8> %offs to <16 x i32>
13  %ptrs = getelementptr inbounds i8, i8* %base, <16 x i32> %offs.zext
14  %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
15  ret <16 x i8> %gather
16}
17
18define arm_aapcs_vfpcc <8 x i8> @unscaled_v8i8_i8(i8* %base, <8 x i8>* %offptr) {
19; CHECK-LABEL: unscaled_v8i8_i8:
20; CHECK:       @ %bb.0: @ %entry
21; CHECK-NEXT:    .save {r4, r5, r7, lr}
22; CHECK-NEXT:    push {r4, r5, r7, lr}
23; CHECK-NEXT:    vldrb.u32 q0, [r1]
24; CHECK-NEXT:    vldrb.u32 q1, [r1, #4]
25; CHECK-NEXT:    vadd.i32 q0, q0, r0
26; CHECK-NEXT:    vadd.i32 q1, q1, r0
27; CHECK-NEXT:    vmov r2, s2
28; CHECK-NEXT:    vmov r3, s3
29; CHECK-NEXT:    vmov r5, s1
30; CHECK-NEXT:    vmov r0, s4
31; CHECK-NEXT:    vmov r1, s5
32; CHECK-NEXT:    vmov r4, s7
33; CHECK-NEXT:    ldrb.w r12, [r2]
34; CHECK-NEXT:    vmov r2, s0
35; CHECK-NEXT:    ldrb.w lr, [r3]
36; CHECK-NEXT:    vmov r3, s6
37; CHECK-NEXT:    ldrb r5, [r5]
38; CHECK-NEXT:    ldrb r0, [r0]
39; CHECK-NEXT:    ldrb r1, [r1]
40; CHECK-NEXT:    ldrb r4, [r4]
41; CHECK-NEXT:    ldrb r2, [r2]
42; CHECK-NEXT:    ldrb r3, [r3]
43; CHECK-NEXT:    vmov.16 q0[0], r2
44; CHECK-NEXT:    vmov.16 q0[1], r5
45; CHECK-NEXT:    vmov.16 q0[2], r12
46; CHECK-NEXT:    vmov.16 q0[3], lr
47; CHECK-NEXT:    vmov.16 q0[4], r0
48; CHECK-NEXT:    vmov.16 q0[5], r1
49; CHECK-NEXT:    vmov.16 q0[6], r3
50; CHECK-NEXT:    vmov.16 q0[7], r4
51; CHECK-NEXT:    pop {r4, r5, r7, pc}
52entry:
53  %offs = load <8 x i8>, <8 x i8>* %offptr, align 1
54  %offs.zext = zext <8 x i8> %offs to <8 x i32>
55  %ptrs = getelementptr inbounds i8, i8* %base, <8 x i32> %offs.zext
56  %gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> %ptrs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef)
57  ret <8 x i8> %gather
58}
59
60define arm_aapcs_vfpcc <2 x i8> @unscaled_v2i8_i8(i8* %base, <2 x i8>* %offptr) {
61; CHECK-LABEL: unscaled_v2i8_i8:
62; CHECK:       @ %bb.0: @ %entry
63; CHECK-NEXT:    ldrb r2, [r1]
64; CHECK-NEXT:    vmov.i32 q0, #0xff
65; CHECK-NEXT:    ldrb r1, [r1, #1]
66; CHECK-NEXT:    vmov.32 q1[0], r2
67; CHECK-NEXT:    vmov.32 q1[2], r1
68; CHECK-NEXT:    vand q0, q1, q0
69; CHECK-NEXT:    vmov r1, s0
70; CHECK-NEXT:    vmov r2, s2
71; CHECK-NEXT:    ldrb r1, [r0, r1]
72; CHECK-NEXT:    ldrb r0, [r0, r2]
73; CHECK-NEXT:    vmov.32 q0[0], r1
74; CHECK-NEXT:    vmov.32 q0[2], r0
75; CHECK-NEXT:    bx lr
76entry:
77  %offs = load <2 x i8>, <2 x i8>* %offptr, align 1
78  %offs.zext = zext <2 x i8> %offs to <2 x i32>
79  %ptrs = getelementptr inbounds i8, i8* %base, <2 x i32> %offs.zext
80  %gather = call <2 x i8> @llvm.masked.gather.v2i8.v2p0i8(<2 x i8*> %ptrs, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> undef)
81  ret <2 x i8> %gather
82}
83
84define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_sext(i8* %base, <16 x i8>* %offptr) {
85; CHECK-LABEL: unscaled_v16i8_sext:
86; CHECK:       @ %bb.0: @ %entry
87; CHECK-NEXT:    .save {r4, r5, r6, lr}
88; CHECK-NEXT:    push {r4, r5, r6, lr}
89; CHECK-NEXT:    vldrb.s32 q0, [r1, #8]
90; CHECK-NEXT:    vldrb.s32 q2, [r1, #4]
91; CHECK-NEXT:    vadd.i32 q1, q0, r0
92; CHECK-NEXT:    vldrb.s32 q0, [r1, #12]
93; CHECK-NEXT:    vmov r2, s6
94; CHECK-NEXT:    vadd.i32 q2, q2, r0
95; CHECK-NEXT:    vadd.i32 q0, q0, r0
96; CHECK-NEXT:    vmov r6, s4
97; CHECK-NEXT:    vmov r3, s2
98; CHECK-NEXT:    vmov r4, s3
99; CHECK-NEXT:    vmov r5, s7
100; CHECK-NEXT:    ldrb.w r12, [r2]
101; CHECK-NEXT:    vmov r2, s0
102; CHECK-NEXT:    ldrb r6, [r6]
103; CHECK-NEXT:    ldrb r3, [r3]
104; CHECK-NEXT:    ldrb r4, [r4]
105; CHECK-NEXT:    ldrb r5, [r5]
106; CHECK-NEXT:    ldrb.w lr, [r2]
107; CHECK-NEXT:    vmov r2, s1
108; CHECK-NEXT:    vldrb.s32 q0, [r1]
109; CHECK-NEXT:    vadd.i32 q3, q0, r0
110; CHECK-NEXT:    vmov r0, s12
111; CHECK-NEXT:    ldrb r2, [r2]
112; CHECK-NEXT:    ldrb r0, [r0]
113; CHECK-NEXT:    vmov.8 q0[0], r0
114; CHECK-NEXT:    vmov r0, s13
115; CHECK-NEXT:    ldrb r0, [r0]
116; CHECK-NEXT:    vmov.8 q0[1], r0
117; CHECK-NEXT:    vmov r0, s14
118; CHECK-NEXT:    ldrb r0, [r0]
119; CHECK-NEXT:    vmov.8 q0[2], r0
120; CHECK-NEXT:    vmov r0, s15
121; CHECK-NEXT:    ldrb r0, [r0]
122; CHECK-NEXT:    vmov.8 q0[3], r0
123; CHECK-NEXT:    vmov r0, s8
124; CHECK-NEXT:    ldrb r0, [r0]
125; CHECK-NEXT:    vmov.8 q0[4], r0
126; CHECK-NEXT:    vmov r0, s9
127; CHECK-NEXT:    ldrb r0, [r0]
128; CHECK-NEXT:    vmov.8 q0[5], r0
129; CHECK-NEXT:    vmov r0, s10
130; CHECK-NEXT:    ldrb r0, [r0]
131; CHECK-NEXT:    vmov.8 q0[6], r0
132; CHECK-NEXT:    vmov r0, s11
133; CHECK-NEXT:    ldrb r0, [r0]
134; CHECK-NEXT:    vmov.8 q0[7], r0
135; CHECK-NEXT:    vmov r0, s5
136; CHECK-NEXT:    vmov.8 q0[8], r6
137; CHECK-NEXT:    ldrb r0, [r0]
138; CHECK-NEXT:    vmov.8 q0[9], r0
139; CHECK-NEXT:    vmov.8 q0[10], r12
140; CHECK-NEXT:    vmov.8 q0[11], r5
141; CHECK-NEXT:    vmov.8 q0[12], lr
142; CHECK-NEXT:    vmov.8 q0[13], r2
143; CHECK-NEXT:    vmov.8 q0[14], r3
144; CHECK-NEXT:    vmov.8 q0[15], r4
145; CHECK-NEXT:    pop {r4, r5, r6, pc}
146entry:
147  %offs = load <16 x i8>, <16 x i8>* %offptr, align 1
148  %offs.sext = sext <16 x i8> %offs to <16 x i32>
149  %ptrs = getelementptr inbounds i8, i8* %base, <16 x i32> %offs.sext
150  %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
151  ret <16 x i8> %gather
152}
153
154define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i16(i8* %base, <16 x i16>* %offptr) {
155; CHECK-LABEL: unscaled_v16i8_i16:
156; CHECK:       @ %bb.0: @ %entry
157; CHECK-NEXT:    .save {r4, r5, r6, lr}
158; CHECK-NEXT:    push {r4, r5, r6, lr}
159; CHECK-NEXT:    vldrh.s32 q0, [r1, #16]
160; CHECK-NEXT:    vldrh.s32 q2, [r1, #8]
161; CHECK-NEXT:    vadd.i32 q1, q0, r0
162; CHECK-NEXT:    vldrh.s32 q0, [r1, #24]
163; CHECK-NEXT:    vmov r2, s6
164; CHECK-NEXT:    vadd.i32 q2, q2, r0
165; CHECK-NEXT:    vadd.i32 q0, q0, r0
166; CHECK-NEXT:    vmov r6, s4
167; CHECK-NEXT:    vmov r3, s2
168; CHECK-NEXT:    vmov r4, s3
169; CHECK-NEXT:    vmov r5, s7
170; CHECK-NEXT:    ldrb.w r12, [r2]
171; CHECK-NEXT:    vmov r2, s0
172; CHECK-NEXT:    ldrb r6, [r6]
173; CHECK-NEXT:    ldrb r3, [r3]
174; CHECK-NEXT:    ldrb r4, [r4]
175; CHECK-NEXT:    ldrb r5, [r5]
176; CHECK-NEXT:    ldrb.w lr, [r2]
177; CHECK-NEXT:    vmov r2, s1
178; CHECK-NEXT:    vldrh.s32 q0, [r1]
179; CHECK-NEXT:    vadd.i32 q3, q0, r0
180; CHECK-NEXT:    vmov r0, s12
181; CHECK-NEXT:    ldrb r2, [r2]
182; CHECK-NEXT:    ldrb r0, [r0]
183; CHECK-NEXT:    vmov.8 q0[0], r0
184; CHECK-NEXT:    vmov r0, s13
185; CHECK-NEXT:    ldrb r0, [r0]
186; CHECK-NEXT:    vmov.8 q0[1], r0
187; CHECK-NEXT:    vmov r0, s14
188; CHECK-NEXT:    ldrb r0, [r0]
189; CHECK-NEXT:    vmov.8 q0[2], r0
190; CHECK-NEXT:    vmov r0, s15
191; CHECK-NEXT:    ldrb r0, [r0]
192; CHECK-NEXT:    vmov.8 q0[3], r0
193; CHECK-NEXT:    vmov r0, s8
194; CHECK-NEXT:    ldrb r0, [r0]
195; CHECK-NEXT:    vmov.8 q0[4], r0
196; CHECK-NEXT:    vmov r0, s9
197; CHECK-NEXT:    ldrb r0, [r0]
198; CHECK-NEXT:    vmov.8 q0[5], r0
199; CHECK-NEXT:    vmov r0, s10
200; CHECK-NEXT:    ldrb r0, [r0]
201; CHECK-NEXT:    vmov.8 q0[6], r0
202; CHECK-NEXT:    vmov r0, s11
203; CHECK-NEXT:    ldrb r0, [r0]
204; CHECK-NEXT:    vmov.8 q0[7], r0
205; CHECK-NEXT:    vmov r0, s5
206; CHECK-NEXT:    vmov.8 q0[8], r6
207; CHECK-NEXT:    ldrb r0, [r0]
208; CHECK-NEXT:    vmov.8 q0[9], r0
209; CHECK-NEXT:    vmov.8 q0[10], r12
210; CHECK-NEXT:    vmov.8 q0[11], r5
211; CHECK-NEXT:    vmov.8 q0[12], lr
212; CHECK-NEXT:    vmov.8 q0[13], r2
213; CHECK-NEXT:    vmov.8 q0[14], r3
214; CHECK-NEXT:    vmov.8 q0[15], r4
215; CHECK-NEXT:    pop {r4, r5, r6, pc}
216entry:
217  %offs = load <16 x i16>, <16 x i16>* %offptr, align 2
218  %offs.sext = sext <16 x i16> %offs to <16 x i32>
219  %ptrs = getelementptr inbounds i8, i8* %base, <16 x i32> %offs.sext
220  %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
221  ret <16 x i8> %gather
222}
223
224define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_scaled(i32* %base, <16 x i8>* %offptr) {
225; CHECK-LABEL: unscaled_v16i8_scaled:
226; CHECK:       @ %bb.0: @ %entry
227; CHECK-NEXT:    .save {r4, r5, r6, lr}
228; CHECK-NEXT:    push {r4, r5, r6, lr}
229; CHECK-NEXT:    vldrb.u32 q0, [r1, #8]
230; CHECK-NEXT:    vldrb.u32 q2, [r1, #4]
231; CHECK-NEXT:    vshl.i32 q0, q0, #2
232; CHECK-NEXT:    vshl.i32 q2, q2, #2
233; CHECK-NEXT:    vadd.i32 q1, q0, r0
234; CHECK-NEXT:    vldrb.u32 q0, [r1, #12]
235; CHECK-NEXT:    vmov r2, s6
236; CHECK-NEXT:    vadd.i32 q2, q2, r0
237; CHECK-NEXT:    vshl.i32 q0, q0, #2
238; CHECK-NEXT:    vmov r6, s4
239; CHECK-NEXT:    vadd.i32 q0, q0, r0
240; CHECK-NEXT:    vmov r5, s7
241; CHECK-NEXT:    vmov r3, s2
242; CHECK-NEXT:    vmov r4, s3
243; CHECK-NEXT:    ldrb.w r12, [r2]
244; CHECK-NEXT:    vmov r2, s0
245; CHECK-NEXT:    ldrb r6, [r6]
246; CHECK-NEXT:    ldrb r5, [r5]
247; CHECK-NEXT:    ldrb r3, [r3]
248; CHECK-NEXT:    ldrb r4, [r4]
249; CHECK-NEXT:    ldrb.w lr, [r2]
250; CHECK-NEXT:    vmov r2, s1
251; CHECK-NEXT:    vldrb.u32 q0, [r1]
252; CHECK-NEXT:    vshl.i32 q0, q0, #2
253; CHECK-NEXT:    vadd.i32 q3, q0, r0
254; CHECK-NEXT:    vmov r0, s12
255; CHECK-NEXT:    ldrb r2, [r2]
256; CHECK-NEXT:    ldrb r0, [r0]
257; CHECK-NEXT:    vmov.8 q0[0], r0
258; CHECK-NEXT:    vmov r0, s13
259; CHECK-NEXT:    ldrb r0, [r0]
260; CHECK-NEXT:    vmov.8 q0[1], r0
261; CHECK-NEXT:    vmov r0, s14
262; CHECK-NEXT:    ldrb r0, [r0]
263; CHECK-NEXT:    vmov.8 q0[2], r0
264; CHECK-NEXT:    vmov r0, s15
265; CHECK-NEXT:    ldrb r0, [r0]
266; CHECK-NEXT:    vmov.8 q0[3], r0
267; CHECK-NEXT:    vmov r0, s8
268; CHECK-NEXT:    ldrb r0, [r0]
269; CHECK-NEXT:    vmov.8 q0[4], r0
270; CHECK-NEXT:    vmov r0, s9
271; CHECK-NEXT:    ldrb r0, [r0]
272; CHECK-NEXT:    vmov.8 q0[5], r0
273; CHECK-NEXT:    vmov r0, s10
274; CHECK-NEXT:    ldrb r0, [r0]
275; CHECK-NEXT:    vmov.8 q0[6], r0
276; CHECK-NEXT:    vmov r0, s11
277; CHECK-NEXT:    ldrb r0, [r0]
278; CHECK-NEXT:    vmov.8 q0[7], r0
279; CHECK-NEXT:    vmov r0, s5
280; CHECK-NEXT:    vmov.8 q0[8], r6
281; CHECK-NEXT:    ldrb r0, [r0]
282; CHECK-NEXT:    vmov.8 q0[9], r0
283; CHECK-NEXT:    vmov.8 q0[10], r12
284; CHECK-NEXT:    vmov.8 q0[11], r5
285; CHECK-NEXT:    vmov.8 q0[12], lr
286; CHECK-NEXT:    vmov.8 q0[13], r2
287; CHECK-NEXT:    vmov.8 q0[14], r3
288; CHECK-NEXT:    vmov.8 q0[15], r4
289; CHECK-NEXT:    pop {r4, r5, r6, pc}
290entry:
291  %offs = load <16 x i8>, <16 x i8>* %offptr, align 4
292  %offs.zext = zext <16 x i8> %offs to <16 x i32>
293  %ptrs32 = getelementptr inbounds i32, i32* %base, <16 x i32> %offs.zext
294  %ptrs = bitcast <16 x i32*> %ptrs32 to <16 x i8*>
295  %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
296  ret <16 x i8> %gather
297}
298
299define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_next(i8* %base, <16 x i32>* %offptr) {
300; CHECK-LABEL: unscaled_v16i8_i8_next:
301; CHECK:       @ %bb.0: @ %entry
302; CHECK-NEXT:    .save {r4, r5, r6, lr}
303; CHECK-NEXT:    push {r4, r5, r6, lr}
304; CHECK-NEXT:    vldrw.u32 q0, [r1, #32]
305; CHECK-NEXT:    vldrw.u32 q2, [r1, #16]
306; CHECK-NEXT:    vadd.i32 q1, q0, r0
307; CHECK-NEXT:    vldrw.u32 q0, [r1, #48]
308; CHECK-NEXT:    vmov r2, s6
309; CHECK-NEXT:    vadd.i32 q2, q2, r0
310; CHECK-NEXT:    vadd.i32 q0, q0, r0
311; CHECK-NEXT:    vmov r6, s4
312; CHECK-NEXT:    vmov r3, s2
313; CHECK-NEXT:    vmov r4, s3
314; CHECK-NEXT:    vmov r5, s7
315; CHECK-NEXT:    ldrb.w r12, [r2]
316; CHECK-NEXT:    vmov r2, s0
317; CHECK-NEXT:    ldrb r6, [r6]
318; CHECK-NEXT:    ldrb r3, [r3]
319; CHECK-NEXT:    ldrb r4, [r4]
320; CHECK-NEXT:    ldrb r5, [r5]
321; CHECK-NEXT:    ldrb.w lr, [r2]
322; CHECK-NEXT:    vmov r2, s1
323; CHECK-NEXT:    vldrw.u32 q0, [r1]
324; CHECK-NEXT:    vadd.i32 q3, q0, r0
325; CHECK-NEXT:    vmov r0, s12
326; CHECK-NEXT:    ldrb r2, [r2]
327; CHECK-NEXT:    ldrb r0, [r0]
328; CHECK-NEXT:    vmov.8 q0[0], r0
329; CHECK-NEXT:    vmov r0, s13
330; CHECK-NEXT:    ldrb r0, [r0]
331; CHECK-NEXT:    vmov.8 q0[1], r0
332; CHECK-NEXT:    vmov r0, s14
333; CHECK-NEXT:    ldrb r0, [r0]
334; CHECK-NEXT:    vmov.8 q0[2], r0
335; CHECK-NEXT:    vmov r0, s15
336; CHECK-NEXT:    ldrb r0, [r0]
337; CHECK-NEXT:    vmov.8 q0[3], r0
338; CHECK-NEXT:    vmov r0, s8
339; CHECK-NEXT:    ldrb r0, [r0]
340; CHECK-NEXT:    vmov.8 q0[4], r0
341; CHECK-NEXT:    vmov r0, s9
342; CHECK-NEXT:    ldrb r0, [r0]
343; CHECK-NEXT:    vmov.8 q0[5], r0
344; CHECK-NEXT:    vmov r0, s10
345; CHECK-NEXT:    ldrb r0, [r0]
346; CHECK-NEXT:    vmov.8 q0[6], r0
347; CHECK-NEXT:    vmov r0, s11
348; CHECK-NEXT:    ldrb r0, [r0]
349; CHECK-NEXT:    vmov.8 q0[7], r0
350; CHECK-NEXT:    vmov r0, s5
351; CHECK-NEXT:    vmov.8 q0[8], r6
352; CHECK-NEXT:    ldrb r0, [r0]
353; CHECK-NEXT:    vmov.8 q0[9], r0
354; CHECK-NEXT:    vmov.8 q0[10], r12
355; CHECK-NEXT:    vmov.8 q0[11], r5
356; CHECK-NEXT:    vmov.8 q0[12], lr
357; CHECK-NEXT:    vmov.8 q0[13], r2
358; CHECK-NEXT:    vmov.8 q0[14], r3
359; CHECK-NEXT:    vmov.8 q0[15], r4
360; CHECK-NEXT:    pop {r4, r5, r6, pc}
361entry:
362  %offs = load <16 x i32>, <16 x i32>* %offptr, align 4
363  %ptrs = getelementptr inbounds i8, i8* %base, <16 x i32> %offs
364  %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
365  ret <16 x i8> %gather
366}
367
368define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_2gep(i8* %base, <16 x i8>* %offptr) {
369; CHECK-LABEL: unscaled_v16i8_i8_2gep:
370; CHECK:       @ %bb.0: @ %entry
371; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
372; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
373; CHECK-NEXT:    vldrb.s32 q0, [r1, #12]
374; CHECK-NEXT:    vmov.i32 q2, #0x5
375; CHECK-NEXT:    vadd.i32 q0, q0, r0
376; CHECK-NEXT:    vadd.i32 q0, q0, q2
377; CHECK-NEXT:    vmov r2, s0
378; CHECK-NEXT:    ldrb r3, [r2]
379; CHECK-NEXT:    vmov r2, s2
380; CHECK-NEXT:    ldrb.w r12, [r2]
381; CHECK-NEXT:    vmov r2, s3
382; CHECK-NEXT:    ldrb.w lr, [r2]
383; CHECK-NEXT:    vmov r2, s1
384; CHECK-NEXT:    vldrb.s32 q0, [r1, #8]
385; CHECK-NEXT:    vadd.i32 q0, q0, r0
386; CHECK-NEXT:    vadd.i32 q1, q0, q2
387; CHECK-NEXT:    vldrb.s32 q0, [r1]
388; CHECK-NEXT:    vmov r6, s4
389; CHECK-NEXT:    vadd.i32 q0, q0, r0
390; CHECK-NEXT:    vmov r4, s6
391; CHECK-NEXT:    vadd.i32 q3, q0, q2
392; CHECK-NEXT:    vmov r5, s12
393; CHECK-NEXT:    vmov r7, s15
394; CHECK-NEXT:    ldrb r2, [r2]
395; CHECK-NEXT:    ldrb r6, [r6]
396; CHECK-NEXT:    ldrb r4, [r4]
397; CHECK-NEXT:    ldrb r5, [r5]
398; CHECK-NEXT:    ldrb r7, [r7]
399; CHECK-NEXT:    vmov.8 q0[0], r5
400; CHECK-NEXT:    vmov r5, s13
401; CHECK-NEXT:    ldrb r5, [r5]
402; CHECK-NEXT:    vmov.8 q0[1], r5
403; CHECK-NEXT:    vmov r5, s14
404; CHECK-NEXT:    vldrb.s32 q3, [r1, #4]
405; CHECK-NEXT:    vadd.i32 q3, q3, r0
406; CHECK-NEXT:    vadd.i32 q2, q3, q2
407; CHECK-NEXT:    vmov r0, s8
408; CHECK-NEXT:    ldrb r5, [r5]
409; CHECK-NEXT:    vmov.8 q0[2], r5
410; CHECK-NEXT:    vmov r5, s7
411; CHECK-NEXT:    vmov.8 q0[3], r7
412; CHECK-NEXT:    ldrb r0, [r0]
413; CHECK-NEXT:    vmov.8 q0[4], r0
414; CHECK-NEXT:    vmov r0, s9
415; CHECK-NEXT:    ldrb r5, [r5]
416; CHECK-NEXT:    ldrb r0, [r0]
417; CHECK-NEXT:    vmov.8 q0[5], r0
418; CHECK-NEXT:    vmov r0, s10
419; CHECK-NEXT:    ldrb r0, [r0]
420; CHECK-NEXT:    vmov.8 q0[6], r0
421; CHECK-NEXT:    vmov r0, s11
422; CHECK-NEXT:    ldrb r0, [r0]
423; CHECK-NEXT:    vmov.8 q0[7], r0
424; CHECK-NEXT:    vmov r0, s5
425; CHECK-NEXT:    vmov.8 q0[8], r6
426; CHECK-NEXT:    ldrb r0, [r0]
427; CHECK-NEXT:    vmov.8 q0[9], r0
428; CHECK-NEXT:    vmov.8 q0[10], r4
429; CHECK-NEXT:    vmov.8 q0[11], r5
430; CHECK-NEXT:    vmov.8 q0[12], r3
431; CHECK-NEXT:    vmov.8 q0[13], r2
432; CHECK-NEXT:    vmov.8 q0[14], r12
433; CHECK-NEXT:    vmov.8 q0[15], lr
434; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
435entry:
436	%offs = load <16 x i8>, <16 x i8>* %offptr, align 1
437	%ptrs = getelementptr inbounds i8, i8* %base, <16 x i8> %offs
438	%ptrs2 = getelementptr inbounds i8, <16 x i8*> %ptrs, i8 5
439	%gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
440	ret <16 x i8> %gather
441}
442
443
444define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_2gep2(i8* %base, <16 x i8>* %offptr) {
445; CHECK-LABEL: unscaled_v16i8_i8_2gep2:
446; CHECK:       @ %bb.0: @ %entry
447; CHECK-NEXT:    adr r1, .LCPI8_0
448; CHECK-NEXT:    vldrw.u32 q1, [r1]
449; CHECK-NEXT:    vldrb.u8 q0, [r0, q1]
450; CHECK-NEXT:    bx lr
451; CHECK-NEXT:    .p2align 4
452; CHECK-NEXT:  @ %bb.1:
453; CHECK-NEXT:  .LCPI8_0:
454; CHECK-NEXT:    .byte 5 @ 0x5
455; CHECK-NEXT:    .byte 8 @ 0x8
456; CHECK-NEXT:    .byte 11 @ 0xb
457; CHECK-NEXT:    .byte 14 @ 0xe
458; CHECK-NEXT:    .byte 17 @ 0x11
459; CHECK-NEXT:    .byte 20 @ 0x14
460; CHECK-NEXT:    .byte 23 @ 0x17
461; CHECK-NEXT:    .byte 26 @ 0x1a
462; CHECK-NEXT:    .byte 29 @ 0x1d
463; CHECK-NEXT:    .byte 32 @ 0x20
464; CHECK-NEXT:    .byte 35 @ 0x23
465; CHECK-NEXT:    .byte 38 @ 0x26
466; CHECK-NEXT:    .byte 41 @ 0x29
467; CHECK-NEXT:    .byte 44 @ 0x2c
468; CHECK-NEXT:    .byte 47 @ 0x2f
469; CHECK-NEXT:    .byte 50 @ 0x32
470entry:
471	%ptrs = getelementptr inbounds i8, i8* %base, <16 x i8> <i8 0, i8 3, i8 6, i8 9, i8 12, i8 15, i8 18, i8 21, i8 24, i8 27, i8 30, i8 33, i8 36, i8 39, i8 42, i8 45>
472	%ptrs2 = getelementptr inbounds i8, <16 x i8*> %ptrs, i8 5
473	%gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
474	ret <16 x i8> %gather
475}
476
477
478define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_biggep(i8* %base) {
479; CHECK-LABEL: unscaled_v16i8_i8_biggep:
480; CHECK:       @ %bb.0: @ %entry
481; CHECK-NEXT:    adr r1, .LCPI9_0
482; CHECK-NEXT:    vldrw.u32 q1, [r1]
483; CHECK-NEXT:    vldrb.u8 q0, [r0, q1]
484; CHECK-NEXT:    bx lr
485; CHECK-NEXT:    .p2align 4
486; CHECK-NEXT:  @ %bb.1:
487; CHECK-NEXT:  .LCPI9_0:
488; CHECK-NEXT:    .byte 5 @ 0x5
489; CHECK-NEXT:    .byte 8 @ 0x8
490; CHECK-NEXT:    .byte 11 @ 0xb
491; CHECK-NEXT:    .byte 14 @ 0xe
492; CHECK-NEXT:    .byte 17 @ 0x11
493; CHECK-NEXT:    .byte 20 @ 0x14
494; CHECK-NEXT:    .byte 23 @ 0x17
495; CHECK-NEXT:    .byte 26 @ 0x1a
496; CHECK-NEXT:    .byte 29 @ 0x1d
497; CHECK-NEXT:    .byte 32 @ 0x20
498; CHECK-NEXT:    .byte 35 @ 0x23
499; CHECK-NEXT:    .byte 38 @ 0x26
500; CHECK-NEXT:    .byte 41 @ 0x29
501; CHECK-NEXT:    .byte 44 @ 0x2c
502; CHECK-NEXT:    .byte 47 @ 0x2f
503; CHECK-NEXT:    .byte 50 @ 0x32
504entry:
505  %ptrs = getelementptr inbounds i8, i8* %base, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
506	%ptrs2 = getelementptr inbounds i8, <16 x i8*> %ptrs, i32 5
507	%gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
508	ret <16 x i8> %gather
509}
510
511
512define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_biggep2(i8* %base) {
513; CHECK-LABEL: unscaled_v16i8_i8_biggep2:
514; CHECK:       @ %bb.0: @ %entry
515; CHECK-NEXT:    adr r1, .LCPI10_0
516; CHECK-NEXT:    vldrw.u32 q1, [r1]
517; CHECK-NEXT:    vldrb.u8 q0, [r0, q1]
518; CHECK-NEXT:    bx lr
519; CHECK-NEXT:    .p2align 4
520; CHECK-NEXT:  @ %bb.1:
521; CHECK-NEXT:  .LCPI10_0:
522; CHECK-NEXT:    .byte 0 @ 0x0
523; CHECK-NEXT:    .byte 3 @ 0x3
524; CHECK-NEXT:    .byte 6 @ 0x6
525; CHECK-NEXT:    .byte 9 @ 0x9
526; CHECK-NEXT:    .byte 12 @ 0xc
527; CHECK-NEXT:    .byte 15 @ 0xf
528; CHECK-NEXT:    .byte 18 @ 0x12
529; CHECK-NEXT:    .byte 21 @ 0x15
530; CHECK-NEXT:    .byte 24 @ 0x18
531; CHECK-NEXT:    .byte 27 @ 0x1b
532; CHECK-NEXT:    .byte 30 @ 0x1e
533; CHECK-NEXT:    .byte 33 @ 0x21
534; CHECK-NEXT:    .byte 36 @ 0x24
535; CHECK-NEXT:    .byte 39 @ 0x27
536; CHECK-NEXT:    .byte 42 @ 0x2a
537; CHECK-NEXT:    .byte 45 @ 0x2d
538entry:
539	%ptrs = getelementptr inbounds i8, i8* %base, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
540	%gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
541	ret <16 x i8> %gather
542}
543
544
545define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_biggep3(i8* %base) {
546; CHECK-LABEL: unscaled_v16i8_i8_biggep3:
547; CHECK:       @ %bb.0: @ %entry
548; CHECK-NEXT:    .save {r4, r5, r6, lr}
549; CHECK-NEXT:    push {r4, r5, r6, lr}
550; CHECK-NEXT:    adr r1, .LCPI11_0
551; CHECK-NEXT:    adr r2, .LCPI11_1
552; CHECK-NEXT:    vldrw.u32 q0, [r1]
553; CHECK-NEXT:    adr r6, .LCPI11_2
554; CHECK-NEXT:    vadd.i32 q1, q0, r0
555; CHECK-NEXT:    vldrw.u32 q0, [r2]
556; CHECK-NEXT:    vmov r1, s6
557; CHECK-NEXT:    vadd.i32 q0, q0, r0
558; CHECK-NEXT:    vmov r5, s4
559; CHECK-NEXT:    vmov r2, s3
560; CHECK-NEXT:    vmov r4, s7
561; CHECK-NEXT:    ldrb.w r12, [r1]
562; CHECK-NEXT:    vmov r1, s0
563; CHECK-NEXT:    ldrb r5, [r5]
564; CHECK-NEXT:    ldrb r2, [r2]
565; CHECK-NEXT:    ldrb r4, [r4]
566; CHECK-NEXT:    ldrb.w lr, [r1]
567; CHECK-NEXT:    vmov r1, s1
568; CHECK-NEXT:    ldrb r3, [r1]
569; CHECK-NEXT:    vmov r1, s2
570; CHECK-NEXT:    vldrw.u32 q0, [r6]
571; CHECK-NEXT:    adr r6, .LCPI11_3
572; CHECK-NEXT:    vldrw.u32 q2, [r6]
573; CHECK-NEXT:    vadd.i32 q3, q0, r0
574; CHECK-NEXT:    vadd.i32 q2, q2, r0
575; CHECK-NEXT:    vmov r0, s12
576; CHECK-NEXT:    ldrb r1, [r1]
577; CHECK-NEXT:    ldrb r0, [r0]
578; CHECK-NEXT:    vmov.8 q0[0], r0
579; CHECK-NEXT:    vmov r0, s13
580; CHECK-NEXT:    ldrb r0, [r0]
581; CHECK-NEXT:    vmov.8 q0[1], r0
582; CHECK-NEXT:    vmov r0, s14
583; CHECK-NEXT:    ldrb r0, [r0]
584; CHECK-NEXT:    vmov.8 q0[2], r0
585; CHECK-NEXT:    vmov r0, s15
586; CHECK-NEXT:    ldrb r0, [r0]
587; CHECK-NEXT:    vmov.8 q0[3], r0
588; CHECK-NEXT:    vmov r0, s8
589; CHECK-NEXT:    ldrb r0, [r0]
590; CHECK-NEXT:    vmov.8 q0[4], r0
591; CHECK-NEXT:    vmov r0, s9
592; CHECK-NEXT:    ldrb r0, [r0]
593; CHECK-NEXT:    vmov.8 q0[5], r0
594; CHECK-NEXT:    vmov r0, s10
595; CHECK-NEXT:    ldrb r0, [r0]
596; CHECK-NEXT:    vmov.8 q0[6], r0
597; CHECK-NEXT:    vmov r0, s11
598; CHECK-NEXT:    ldrb r0, [r0]
599; CHECK-NEXT:    vmov.8 q0[7], r0
600; CHECK-NEXT:    vmov r0, s5
601; CHECK-NEXT:    vmov.8 q0[8], r5
602; CHECK-NEXT:    ldrb r0, [r0]
603; CHECK-NEXT:    vmov.8 q0[9], r0
604; CHECK-NEXT:    vmov.8 q0[10], r12
605; CHECK-NEXT:    vmov.8 q0[11], r4
606; CHECK-NEXT:    vmov.8 q0[12], lr
607; CHECK-NEXT:    vmov.8 q0[13], r3
608; CHECK-NEXT:    vmov.8 q0[14], r1
609; CHECK-NEXT:    vmov.8 q0[15], r2
610; CHECK-NEXT:    pop {r4, r5, r6, pc}
611; CHECK-NEXT:    .p2align 4
612; CHECK-NEXT:  @ %bb.1:
613; CHECK-NEXT:  .LCPI11_0:
614; CHECK-NEXT:    .long 280 @ 0x118
615; CHECK-NEXT:    .long 283 @ 0x11b
616; CHECK-NEXT:    .long 286 @ 0x11e
617; CHECK-NEXT:    .long 289 @ 0x121
618; CHECK-NEXT:  .LCPI11_1:
619; CHECK-NEXT:    .long 292 @ 0x124
620; CHECK-NEXT:    .long 295 @ 0x127
621; CHECK-NEXT:    .long 298 @ 0x12a
622; CHECK-NEXT:    .long 301 @ 0x12d
623; CHECK-NEXT:  .LCPI11_2:
624; CHECK-NEXT:    .long 256 @ 0x100
625; CHECK-NEXT:    .long 259 @ 0x103
626; CHECK-NEXT:    .long 262 @ 0x106
627; CHECK-NEXT:    .long 265 @ 0x109
628; CHECK-NEXT:  .LCPI11_3:
629; CHECK-NEXT:    .long 268 @ 0x10c
630; CHECK-NEXT:    .long 271 @ 0x10f
631; CHECK-NEXT:    .long 274 @ 0x112
632; CHECK-NEXT:    .long 277 @ 0x115
633entry:
634	%ptrs = getelementptr inbounds i8, i8* %base, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
635	%ptrs2 = getelementptr inbounds i8, <16 x i8*> %ptrs, i32 256
636	%gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
637	ret <16 x i8> %gather
638}
639
640
641define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_biggep4(i8* %base) {
642; CHECK-LABEL: unscaled_v16i8_i8_biggep4:
643; CHECK:       @ %bb.0: @ %entry
644; CHECK-NEXT:    .save {r4, r5, r6, lr}
645; CHECK-NEXT:    push {r4, r5, r6, lr}
646; CHECK-NEXT:    adr r1, .LCPI12_0
647; CHECK-NEXT:    adr r2, .LCPI12_1
648; CHECK-NEXT:    vldrw.u32 q0, [r1]
649; CHECK-NEXT:    adr r6, .LCPI12_2
650; CHECK-NEXT:    vadd.i32 q1, q0, r0
651; CHECK-NEXT:    vldrw.u32 q0, [r2]
652; CHECK-NEXT:    vmov r1, s6
653; CHECK-NEXT:    vadd.i32 q0, q0, r0
654; CHECK-NEXT:    vmov r5, s4
655; CHECK-NEXT:    vmov r2, s3
656; CHECK-NEXT:    vmov r4, s7
657; CHECK-NEXT:    ldrb.w r12, [r1]
658; CHECK-NEXT:    vmov r1, s0
659; CHECK-NEXT:    ldrb r5, [r5]
660; CHECK-NEXT:    ldrb r2, [r2]
661; CHECK-NEXT:    ldrb r4, [r4]
662; CHECK-NEXT:    ldrb.w lr, [r1]
663; CHECK-NEXT:    vmov r1, s1
664; CHECK-NEXT:    ldrb r3, [r1]
665; CHECK-NEXT:    vmov r1, s2
666; CHECK-NEXT:    vldrw.u32 q0, [r6]
667; CHECK-NEXT:    adr r6, .LCPI12_3
668; CHECK-NEXT:    vldrw.u32 q2, [r6]
669; CHECK-NEXT:    vadd.i32 q3, q0, r0
670; CHECK-NEXT:    vadd.i32 q2, q2, r0
671; CHECK-NEXT:    vmov r0, s12
672; CHECK-NEXT:    ldrb r1, [r1]
673; CHECK-NEXT:    ldrb r0, [r0]
674; CHECK-NEXT:    vmov.8 q0[0], r0
675; CHECK-NEXT:    vmov r0, s13
676; CHECK-NEXT:    ldrb r0, [r0]
677; CHECK-NEXT:    vmov.8 q0[1], r0
678; CHECK-NEXT:    vmov r0, s14
679; CHECK-NEXT:    ldrb r0, [r0]
680; CHECK-NEXT:    vmov.8 q0[2], r0
681; CHECK-NEXT:    vmov r0, s15
682; CHECK-NEXT:    ldrb r0, [r0]
683; CHECK-NEXT:    vmov.8 q0[3], r0
684; CHECK-NEXT:    vmov r0, s8
685; CHECK-NEXT:    ldrb r0, [r0]
686; CHECK-NEXT:    vmov.8 q0[4], r0
687; CHECK-NEXT:    vmov r0, s9
688; CHECK-NEXT:    ldrb r0, [r0]
689; CHECK-NEXT:    vmov.8 q0[5], r0
690; CHECK-NEXT:    vmov r0, s10
691; CHECK-NEXT:    ldrb r0, [r0]
692; CHECK-NEXT:    vmov.8 q0[6], r0
693; CHECK-NEXT:    vmov r0, s11
694; CHECK-NEXT:    ldrb r0, [r0]
695; CHECK-NEXT:    vmov.8 q0[7], r0
696; CHECK-NEXT:    vmov r0, s5
697; CHECK-NEXT:    vmov.8 q0[8], r5
698; CHECK-NEXT:    ldrb r0, [r0]
699; CHECK-NEXT:    vmov.8 q0[9], r0
700; CHECK-NEXT:    vmov.8 q0[10], r12
701; CHECK-NEXT:    vmov.8 q0[11], r4
702; CHECK-NEXT:    vmov.8 q0[12], lr
703; CHECK-NEXT:    vmov.8 q0[13], r3
704; CHECK-NEXT:    vmov.8 q0[14], r1
705; CHECK-NEXT:    vmov.8 q0[15], r2
706; CHECK-NEXT:    pop {r4, r5, r6, pc}
707; CHECK-NEXT:    .p2align 4
708; CHECK-NEXT:  @ %bb.1:
709; CHECK-NEXT:  .LCPI12_0:
710; CHECK-NEXT:    .long 256 @ 0x100
711; CHECK-NEXT:    .long 27 @ 0x1b
712; CHECK-NEXT:    .long 30 @ 0x1e
713; CHECK-NEXT:    .long 33 @ 0x21
714; CHECK-NEXT:  .LCPI12_1:
715; CHECK-NEXT:    .long 36 @ 0x24
716; CHECK-NEXT:    .long 39 @ 0x27
717; CHECK-NEXT:    .long 42 @ 0x2a
718; CHECK-NEXT:    .long 45 @ 0x2d
719; CHECK-NEXT:  .LCPI12_2:
720; CHECK-NEXT:    .long 0 @ 0x0
721; CHECK-NEXT:    .long 3 @ 0x3
722; CHECK-NEXT:    .long 6 @ 0x6
723; CHECK-NEXT:    .long 9 @ 0x9
724; CHECK-NEXT:  .LCPI12_3:
725; CHECK-NEXT:    .long 12 @ 0xc
726; CHECK-NEXT:    .long 15 @ 0xf
727; CHECK-NEXT:    .long 18 @ 0x12
728; CHECK-NEXT:    .long 21 @ 0x15
729entry:
730	%ptrs = getelementptr inbounds i8, i8* %base, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 256, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
731	%gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
732	ret <16 x i8> %gather
733}
734
735
736define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_biggep5(<16 x i8*> %base) {
737; CHECK-LABEL: unscaled_v16i8_i8_biggep5:
738; CHECK:       @ %bb.0: @ %entry
739; CHECK-NEXT:    .save {r4, r5, r7, lr}
740; CHECK-NEXT:    push {r4, r5, r7, lr}
741; CHECK-NEXT:    .vsave {d8, d9}
742; CHECK-NEXT:    vpush {d8, d9}
743; CHECK-NEXT:    vmov.i32 q4, #0x100
744; CHECK-NEXT:    vadd.i32 q2, q2, q4
745; CHECK-NEXT:    vadd.i32 q3, q3, q4
746; CHECK-NEXT:    vmov r0, s10
747; CHECK-NEXT:    vadd.i32 q1, q1, q4
748; CHECK-NEXT:    vmov r1, s15
749; CHECK-NEXT:    vmov r4, s8
750; CHECK-NEXT:    ldrb.w r12, [r0]
751; CHECK-NEXT:    vmov r0, s11
752; CHECK-NEXT:    ldrb r1, [r1]
753; CHECK-NEXT:    ldrb r4, [r4]
754; CHECK-NEXT:    ldrb.w lr, [r0]
755; CHECK-NEXT:    vmov r0, s12
756; CHECK-NEXT:    ldrb r2, [r0]
757; CHECK-NEXT:    vmov r0, s13
758; CHECK-NEXT:    ldrb r3, [r0]
759; CHECK-NEXT:    vmov r0, s14
760; CHECK-NEXT:    vadd.i32 q3, q0, q4
761; CHECK-NEXT:    vmov r5, s12
762; CHECK-NEXT:    ldrb r0, [r0]
763; CHECK-NEXT:    ldrb r5, [r5]
764; CHECK-NEXT:    vmov.8 q0[0], r5
765; CHECK-NEXT:    vmov r5, s13
766; CHECK-NEXT:    ldrb r5, [r5]
767; CHECK-NEXT:    vmov.8 q0[1], r5
768; CHECK-NEXT:    vmov r5, s14
769; CHECK-NEXT:    ldrb r5, [r5]
770; CHECK-NEXT:    vmov.8 q0[2], r5
771; CHECK-NEXT:    vmov r5, s15
772; CHECK-NEXT:    ldrb r5, [r5]
773; CHECK-NEXT:    vmov.8 q0[3], r5
774; CHECK-NEXT:    vmov r5, s4
775; CHECK-NEXT:    ldrb r5, [r5]
776; CHECK-NEXT:    vmov.8 q0[4], r5
777; CHECK-NEXT:    vmov r5, s5
778; CHECK-NEXT:    ldrb r5, [r5]
779; CHECK-NEXT:    vmov.8 q0[5], r5
780; CHECK-NEXT:    vmov r5, s6
781; CHECK-NEXT:    ldrb r5, [r5]
782; CHECK-NEXT:    vmov.8 q0[6], r5
783; CHECK-NEXT:    vmov r5, s7
784; CHECK-NEXT:    ldrb r5, [r5]
785; CHECK-NEXT:    vmov.8 q0[7], r5
786; CHECK-NEXT:    vmov r5, s9
787; CHECK-NEXT:    vmov.8 q0[8], r4
788; CHECK-NEXT:    ldrb r5, [r5]
789; CHECK-NEXT:    vmov.8 q0[9], r5
790; CHECK-NEXT:    vmov.8 q0[10], r12
791; CHECK-NEXT:    vmov.8 q0[11], lr
792; CHECK-NEXT:    vmov.8 q0[12], r2
793; CHECK-NEXT:    vmov.8 q0[13], r3
794; CHECK-NEXT:    vmov.8 q0[14], r0
795; CHECK-NEXT:    vmov.8 q0[15], r1
796; CHECK-NEXT:    vpop {d8, d9}
797; CHECK-NEXT:    pop {r4, r5, r7, pc}
798entry:
799	%ptrs2 = getelementptr inbounds i8, <16 x i8*> %base, i32 256
800  %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
801	ret <16 x i8> %gather
802}
803
804
805define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_biggep6(i8* %base) {
806; CHECK-LABEL: unscaled_v16i8_i8_biggep6:
807; CHECK:       @ %bb.0: @ %entry
808; CHECK-NEXT:    .save {r4, r5, r6, lr}
809; CHECK-NEXT:    push {r4, r5, r6, lr}
810; CHECK-NEXT:    adr r1, .LCPI14_0
811; CHECK-NEXT:    adr r2, .LCPI14_1
812; CHECK-NEXT:    vldrw.u32 q0, [r1]
813; CHECK-NEXT:    adr r6, .LCPI14_2
814; CHECK-NEXT:    vadd.i32 q1, q0, r0
815; CHECK-NEXT:    vldrw.u32 q0, [r2]
816; CHECK-NEXT:    vmov r1, s6
817; CHECK-NEXT:    vadd.i32 q0, q0, r0
818; CHECK-NEXT:    vmov r5, s4
819; CHECK-NEXT:    vmov r2, s3
820; CHECK-NEXT:    vmov r4, s7
821; CHECK-NEXT:    ldrb.w r12, [r1]
822; CHECK-NEXT:    vmov r1, s0
823; CHECK-NEXT:    ldrb r5, [r5]
824; CHECK-NEXT:    ldrb r2, [r2]
825; CHECK-NEXT:    ldrb r4, [r4]
826; CHECK-NEXT:    ldrb.w lr, [r1]
827; CHECK-NEXT:    vmov r1, s1
828; CHECK-NEXT:    ldrb r3, [r1]
829; CHECK-NEXT:    vmov r1, s2
830; CHECK-NEXT:    vldrw.u32 q0, [r6]
831; CHECK-NEXT:    adr r6, .LCPI14_3
832; CHECK-NEXT:    vldrw.u32 q2, [r6]
833; CHECK-NEXT:    vadd.i32 q3, q0, r0
834; CHECK-NEXT:    vadd.i32 q2, q2, r0
835; CHECK-NEXT:    vmov r0, s12
836; CHECK-NEXT:    ldrb r1, [r1]
837; CHECK-NEXT:    ldrb r0, [r0]
838; CHECK-NEXT:    vmov.8 q0[0], r0
839; CHECK-NEXT:    vmov r0, s13
840; CHECK-NEXT:    ldrb r0, [r0]
841; CHECK-NEXT:    vmov.8 q0[1], r0
842; CHECK-NEXT:    vmov r0, s14
843; CHECK-NEXT:    ldrb r0, [r0]
844; CHECK-NEXT:    vmov.8 q0[2], r0
845; CHECK-NEXT:    vmov r0, s15
846; CHECK-NEXT:    ldrb r0, [r0]
847; CHECK-NEXT:    vmov.8 q0[3], r0
848; CHECK-NEXT:    vmov r0, s8
849; CHECK-NEXT:    ldrb r0, [r0]
850; CHECK-NEXT:    vmov.8 q0[4], r0
851; CHECK-NEXT:    vmov r0, s9
852; CHECK-NEXT:    ldrb r0, [r0]
853; CHECK-NEXT:    vmov.8 q0[5], r0
854; CHECK-NEXT:    vmov r0, s10
855; CHECK-NEXT:    ldrb r0, [r0]
856; CHECK-NEXT:    vmov.8 q0[6], r0
857; CHECK-NEXT:    vmov r0, s11
858; CHECK-NEXT:    ldrb r0, [r0]
859; CHECK-NEXT:    vmov.8 q0[7], r0
860; CHECK-NEXT:    vmov r0, s5
861; CHECK-NEXT:    vmov.8 q0[8], r5
862; CHECK-NEXT:    ldrb r0, [r0]
863; CHECK-NEXT:    vmov.8 q0[9], r0
864; CHECK-NEXT:    vmov.8 q0[10], r12
865; CHECK-NEXT:    vmov.8 q0[11], r4
866; CHECK-NEXT:    vmov.8 q0[12], lr
867; CHECK-NEXT:    vmov.8 q0[13], r3
868; CHECK-NEXT:    vmov.8 q0[14], r1
869; CHECK-NEXT:    vmov.8 q0[15], r2
870; CHECK-NEXT:    pop {r4, r5, r6, pc}
871; CHECK-NEXT:    .p2align 4
872; CHECK-NEXT:  @ %bb.1:
873; CHECK-NEXT:  .LCPI14_0:
874; CHECK-NEXT:    .long 257 @ 0x101
875; CHECK-NEXT:    .long 28 @ 0x1c
876; CHECK-NEXT:    .long 31 @ 0x1f
877; CHECK-NEXT:    .long 34 @ 0x22
878; CHECK-NEXT:  .LCPI14_1:
879; CHECK-NEXT:    .long 37 @ 0x25
880; CHECK-NEXT:    .long 40 @ 0x28
881; CHECK-NEXT:    .long 43 @ 0x2b
882; CHECK-NEXT:    .long 46 @ 0x2e
883; CHECK-NEXT:  .LCPI14_2:
884; CHECK-NEXT:    .long 1 @ 0x1
885; CHECK-NEXT:    .long 4 @ 0x4
886; CHECK-NEXT:    .long 7 @ 0x7
887; CHECK-NEXT:    .long 10 @ 0xa
888; CHECK-NEXT:  .LCPI14_3:
889; CHECK-NEXT:    .long 13 @ 0xd
890; CHECK-NEXT:    .long 16 @ 0x10
891; CHECK-NEXT:    .long 19 @ 0x13
892; CHECK-NEXT:    .long 22 @ 0x16
893entry:
894	%ptrs = getelementptr inbounds i8, i8* %base, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 256, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
895	%ptrs2 = getelementptr inbounds i8, <16 x i8*> %ptrs, i32 1
896	%gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
897	ret <16 x i8> %gather
898}
899
900
901define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_biggep7(i8* %base) {
902; CHECK-LABEL: unscaled_v16i8_i8_biggep7:
903; CHECK:       @ %bb.0: @ %entry
904; CHECK-NEXT:    .save {r4, r5, r6, lr}
905; CHECK-NEXT:    push {r4, r5, r6, lr}
906; CHECK-NEXT:    adr r1, .LCPI15_0
907; CHECK-NEXT:    adr r2, .LCPI15_1
908; CHECK-NEXT:    vldrw.u32 q0, [r1]
909; CHECK-NEXT:    adr r6, .LCPI15_2
910; CHECK-NEXT:    vadd.i32 q1, q0, r0
911; CHECK-NEXT:    vldrw.u32 q0, [r2]
912; CHECK-NEXT:    vmov r1, s6
913; CHECK-NEXT:    vadd.i32 q0, q0, r0
914; CHECK-NEXT:    vmov r5, s4
915; CHECK-NEXT:    vmov r2, s3
916; CHECK-NEXT:    vmov r4, s7
917; CHECK-NEXT:    ldrb.w r12, [r1]
918; CHECK-NEXT:    vmov r1, s0
919; CHECK-NEXT:    ldrb r5, [r5]
920; CHECK-NEXT:    ldrb r2, [r2]
921; CHECK-NEXT:    ldrb r4, [r4]
922; CHECK-NEXT:    ldrb.w lr, [r1]
923; CHECK-NEXT:    vmov r1, s1
924; CHECK-NEXT:    ldrb r3, [r1]
925; CHECK-NEXT:    vmov r1, s2
926; CHECK-NEXT:    vldrw.u32 q0, [r6]
927; CHECK-NEXT:    adr r6, .LCPI15_3
928; CHECK-NEXT:    vldrw.u32 q2, [r6]
929; CHECK-NEXT:    vadd.i32 q3, q0, r0
930; CHECK-NEXT:    vadd.i32 q2, q2, r0
931; CHECK-NEXT:    vmov r0, s12
932; CHECK-NEXT:    ldrb r1, [r1]
933; CHECK-NEXT:    ldrb r0, [r0]
934; CHECK-NEXT:    vmov.8 q0[0], r0
935; CHECK-NEXT:    vmov r0, s13
936; CHECK-NEXT:    ldrb r0, [r0]
937; CHECK-NEXT:    vmov.8 q0[1], r0
938; CHECK-NEXT:    vmov r0, s14
939; CHECK-NEXT:    ldrb r0, [r0]
940; CHECK-NEXT:    vmov.8 q0[2], r0
941; CHECK-NEXT:    vmov r0, s15
942; CHECK-NEXT:    ldrb r0, [r0]
943; CHECK-NEXT:    vmov.8 q0[3], r0
944; CHECK-NEXT:    vmov r0, s8
945; CHECK-NEXT:    ldrb r0, [r0]
946; CHECK-NEXT:    vmov.8 q0[4], r0
947; CHECK-NEXT:    vmov r0, s9
948; CHECK-NEXT:    ldrb r0, [r0]
949; CHECK-NEXT:    vmov.8 q0[5], r0
950; CHECK-NEXT:    vmov r0, s10
951; CHECK-NEXT:    ldrb r0, [r0]
952; CHECK-NEXT:    vmov.8 q0[6], r0
953; CHECK-NEXT:    vmov r0, s11
954; CHECK-NEXT:    ldrb r0, [r0]
955; CHECK-NEXT:    vmov.8 q0[7], r0
956; CHECK-NEXT:    vmov r0, s5
957; CHECK-NEXT:    vmov.8 q0[8], r5
958; CHECK-NEXT:    ldrb r0, [r0]
959; CHECK-NEXT:    vmov.8 q0[9], r0
960; CHECK-NEXT:    vmov.8 q0[10], r12
961; CHECK-NEXT:    vmov.8 q0[11], r4
962; CHECK-NEXT:    vmov.8 q0[12], lr
963; CHECK-NEXT:    vmov.8 q0[13], r3
964; CHECK-NEXT:    vmov.8 q0[14], r1
965; CHECK-NEXT:    vmov.8 q0[15], r2
966; CHECK-NEXT:    pop {r4, r5, r6, pc}
967; CHECK-NEXT:    .p2align 4
968; CHECK-NEXT:  @ %bb.1:
969; CHECK-NEXT:  .LCPI15_0:
970; CHECK-NEXT:    .long 224 @ 0xe0
971; CHECK-NEXT:    .long 227 @ 0xe3
972; CHECK-NEXT:    .long 230 @ 0xe6
973; CHECK-NEXT:    .long 233 @ 0xe9
974; CHECK-NEXT:  .LCPI15_1:
975; CHECK-NEXT:    .long 236 @ 0xec
976; CHECK-NEXT:    .long 239 @ 0xef
977; CHECK-NEXT:    .long 242 @ 0xf2
978; CHECK-NEXT:    .long 245 @ 0xf5
979; CHECK-NEXT:  .LCPI15_2:
980; CHECK-NEXT:    .long 300 @ 0x12c
981; CHECK-NEXT:    .long 203 @ 0xcb
982; CHECK-NEXT:    .long 206 @ 0xce
983; CHECK-NEXT:    .long 209 @ 0xd1
984; CHECK-NEXT:  .LCPI15_3:
985; CHECK-NEXT:    .long 212 @ 0xd4
986; CHECK-NEXT:    .long 215 @ 0xd7
987; CHECK-NEXT:    .long 218 @ 0xda
988; CHECK-NEXT:    .long 221 @ 0xdd
989entry:
990  %ptrs = getelementptr inbounds i8, i8* %base, <16 x i32> <i32 100, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
991	%ptrs2 = getelementptr inbounds i8, <16 x i8*> %ptrs, i32 200
992	%gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
993	ret <16 x i8> %gather
994}
995
996
997define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_2(i8* %base, <16 x i8>* %offptr) {
998; CHECK-LABEL: unscaled_v16i8_i8_2:
999; CHECK:       @ %bb.0: @ %entry
1000; CHECK-NEXT:    .save {r4, r5, r6, lr}
1001; CHECK-NEXT:    push {r4, r5, r6, lr}
1002; CHECK-NEXT:    vldrb.s32 q0, [r1, #8]
1003; CHECK-NEXT:    vldrb.s32 q2, [r1, #4]
1004; CHECK-NEXT:    vadd.i32 q1, q0, r0
1005; CHECK-NEXT:    vldrb.s32 q0, [r1, #12]
1006; CHECK-NEXT:    vmov r2, s6
1007; CHECK-NEXT:    vadd.i32 q2, q2, r0
1008; CHECK-NEXT:    vadd.i32 q0, q0, r0
1009; CHECK-NEXT:    vmov r6, s4
1010; CHECK-NEXT:    vmov r3, s2
1011; CHECK-NEXT:    vmov r4, s3
1012; CHECK-NEXT:    vmov r5, s7
1013; CHECK-NEXT:    ldrb.w r12, [r2]
1014; CHECK-NEXT:    vmov r2, s0
1015; CHECK-NEXT:    ldrb r6, [r6]
1016; CHECK-NEXT:    ldrb r3, [r3]
1017; CHECK-NEXT:    ldrb r4, [r4]
1018; CHECK-NEXT:    ldrb r5, [r5]
1019; CHECK-NEXT:    ldrb.w lr, [r2]
1020; CHECK-NEXT:    vmov r2, s1
1021; CHECK-NEXT:    vldrb.s32 q0, [r1]
1022; CHECK-NEXT:    vadd.i32 q3, q0, r0
1023; CHECK-NEXT:    vmov r0, s12
1024; CHECK-NEXT:    ldrb r2, [r2]
1025; CHECK-NEXT:    ldrb r0, [r0]
1026; CHECK-NEXT:    vmov.8 q0[0], r0
1027; CHECK-NEXT:    vmov r0, s13
1028; CHECK-NEXT:    ldrb r0, [r0]
1029; CHECK-NEXT:    vmov.8 q0[1], r0
1030; CHECK-NEXT:    vmov r0, s14
1031; CHECK-NEXT:    ldrb r0, [r0]
1032; CHECK-NEXT:    vmov.8 q0[2], r0
1033; CHECK-NEXT:    vmov r0, s15
1034; CHECK-NEXT:    ldrb r0, [r0]
1035; CHECK-NEXT:    vmov.8 q0[3], r0
1036; CHECK-NEXT:    vmov r0, s8
1037; CHECK-NEXT:    ldrb r0, [r0]
1038; CHECK-NEXT:    vmov.8 q0[4], r0
1039; CHECK-NEXT:    vmov r0, s9
1040; CHECK-NEXT:    ldrb r0, [r0]
1041; CHECK-NEXT:    vmov.8 q0[5], r0
1042; CHECK-NEXT:    vmov r0, s10
1043; CHECK-NEXT:    ldrb r0, [r0]
1044; CHECK-NEXT:    vmov.8 q0[6], r0
1045; CHECK-NEXT:    vmov r0, s11
1046; CHECK-NEXT:    ldrb r0, [r0]
1047; CHECK-NEXT:    vmov.8 q0[7], r0
1048; CHECK-NEXT:    vmov r0, s5
1049; CHECK-NEXT:    vmov.8 q0[8], r6
1050; CHECK-NEXT:    ldrb r0, [r0]
1051; CHECK-NEXT:    vmov.8 q0[9], r0
1052; CHECK-NEXT:    vmov.8 q0[10], r12
1053; CHECK-NEXT:    vmov.8 q0[11], r5
1054; CHECK-NEXT:    vmov.8 q0[12], lr
1055; CHECK-NEXT:    vmov.8 q0[13], r2
1056; CHECK-NEXT:    vmov.8 q0[14], r3
1057; CHECK-NEXT:    vmov.8 q0[15], r4
1058; CHECK-NEXT:    pop {r4, r5, r6, pc}
1059entry:
1060  %offs = load <16 x i8>, <16 x i8>* %offptr, align 1
1061  %ptrs = getelementptr inbounds i8, i8* %base, <16 x i8> %offs
1062  %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
1063	ret <16 x i8> %gather
1064}
1065
1066
1067define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_3(i8* %base, <16 x i8>* %offptr) {
1068; CHECK-LABEL: unscaled_v16i8_i8_3:
1069; CHECK:       @ %bb.0: @ %entry
1070; CHECK-NEXT:    adr r1, .LCPI17_0
1071; CHECK-NEXT:    vldrw.u32 q1, [r1]
1072; CHECK-NEXT:    vldrb.u8 q0, [r0, q1]
1073; CHECK-NEXT:    bx lr
1074; CHECK-NEXT:    .p2align 4
1075; CHECK-NEXT:  @ %bb.1:
1076; CHECK-NEXT:  .LCPI17_0:
1077; CHECK-NEXT:    .byte 0 @ 0x0
1078; CHECK-NEXT:    .byte 3 @ 0x3
1079; CHECK-NEXT:    .byte 6 @ 0x6
1080; CHECK-NEXT:    .byte 9 @ 0x9
1081; CHECK-NEXT:    .byte 12 @ 0xc
1082; CHECK-NEXT:    .byte 15 @ 0xf
1083; CHECK-NEXT:    .byte 18 @ 0x12
1084; CHECK-NEXT:    .byte 21 @ 0x15
1085; CHECK-NEXT:    .byte 24 @ 0x18
1086; CHECK-NEXT:    .byte 27 @ 0x1b
1087; CHECK-NEXT:    .byte 30 @ 0x1e
1088; CHECK-NEXT:    .byte 33 @ 0x21
1089; CHECK-NEXT:    .byte 36 @ 0x24
1090; CHECK-NEXT:    .byte 39 @ 0x27
1091; CHECK-NEXT:    .byte 42 @ 0x2a
1092; CHECK-NEXT:    .byte 45 @ 0x2d
1093entry:
1094  %ptrs = getelementptr inbounds i8, i8* %base, <16 x i8> <i8 0, i8 3, i8 6, i8 9, i8 12, i8 15, i8 18, i8 21, i8 24, i8 27, i8 30, i8 33, i8 36, i8 39, i8 42, i8 45>
1095  %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
1096	ret <16 x i8> %gather
1097}
1098
1099define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_basei16(i16* %base, <16 x i8>* %offptr) {
1100; CHECK-LABEL: unscaled_v16i8_basei16:
1101; CHECK:       @ %bb.0: @ %entry
1102; CHECK-NEXT:    .save {r4, r5, r6, lr}
1103; CHECK-NEXT:    push {r4, r5, r6, lr}
1104; CHECK-NEXT:    vldrb.u32 q0, [r1, #8]
1105; CHECK-NEXT:    vldrb.u32 q2, [r1, #4]
1106; CHECK-NEXT:    vshl.i32 q0, q0, #1
1107; CHECK-NEXT:    vshl.i32 q2, q2, #1
1108; CHECK-NEXT:    vadd.i32 q1, q0, r0
1109; CHECK-NEXT:    vldrb.u32 q0, [r1, #12]
1110; CHECK-NEXT:    vmov r2, s6
1111; CHECK-NEXT:    vadd.i32 q2, q2, r0
1112; CHECK-NEXT:    vshl.i32 q0, q0, #1
1113; CHECK-NEXT:    vmov r6, s4
1114; CHECK-NEXT:    vadd.i32 q0, q0, r0
1115; CHECK-NEXT:    vmov r5, s7
1116; CHECK-NEXT:    vmov r3, s2
1117; CHECK-NEXT:    vmov r4, s3
1118; CHECK-NEXT:    ldrb.w r12, [r2]
1119; CHECK-NEXT:    vmov r2, s0
1120; CHECK-NEXT:    ldrb r6, [r6]
1121; CHECK-NEXT:    ldrb r5, [r5]
1122; CHECK-NEXT:    ldrb r3, [r3]
1123; CHECK-NEXT:    ldrb r4, [r4]
1124; CHECK-NEXT:    ldrb.w lr, [r2]
1125; CHECK-NEXT:    vmov r2, s1
1126; CHECK-NEXT:    vldrb.u32 q0, [r1]
1127; CHECK-NEXT:    vshl.i32 q0, q0, #1
1128; CHECK-NEXT:    vadd.i32 q3, q0, r0
1129; CHECK-NEXT:    vmov r0, s12
1130; CHECK-NEXT:    ldrb r2, [r2]
1131; CHECK-NEXT:    ldrb r0, [r0]
1132; CHECK-NEXT:    vmov.8 q0[0], r0
1133; CHECK-NEXT:    vmov r0, s13
1134; CHECK-NEXT:    ldrb r0, [r0]
1135; CHECK-NEXT:    vmov.8 q0[1], r0
1136; CHECK-NEXT:    vmov r0, s14
1137; CHECK-NEXT:    ldrb r0, [r0]
1138; CHECK-NEXT:    vmov.8 q0[2], r0
1139; CHECK-NEXT:    vmov r0, s15
1140; CHECK-NEXT:    ldrb r0, [r0]
1141; CHECK-NEXT:    vmov.8 q0[3], r0
1142; CHECK-NEXT:    vmov r0, s8
1143; CHECK-NEXT:    ldrb r0, [r0]
1144; CHECK-NEXT:    vmov.8 q0[4], r0
1145; CHECK-NEXT:    vmov r0, s9
1146; CHECK-NEXT:    ldrb r0, [r0]
1147; CHECK-NEXT:    vmov.8 q0[5], r0
1148; CHECK-NEXT:    vmov r0, s10
1149; CHECK-NEXT:    ldrb r0, [r0]
1150; CHECK-NEXT:    vmov.8 q0[6], r0
1151; CHECK-NEXT:    vmov r0, s11
1152; CHECK-NEXT:    ldrb r0, [r0]
1153; CHECK-NEXT:    vmov.8 q0[7], r0
1154; CHECK-NEXT:    vmov r0, s5
1155; CHECK-NEXT:    vmov.8 q0[8], r6
1156; CHECK-NEXT:    ldrb r0, [r0]
1157; CHECK-NEXT:    vmov.8 q0[9], r0
1158; CHECK-NEXT:    vmov.8 q0[10], r12
1159; CHECK-NEXT:    vmov.8 q0[11], r5
1160; CHECK-NEXT:    vmov.8 q0[12], lr
1161; CHECK-NEXT:    vmov.8 q0[13], r2
1162; CHECK-NEXT:    vmov.8 q0[14], r3
1163; CHECK-NEXT:    vmov.8 q0[15], r4
1164; CHECK-NEXT:    pop {r4, r5, r6, pc}
1165entry:
1166  %offs = load <16 x i8>, <16 x i8>* %offptr, align 1
1167  %offs.zext = zext <16 x i8> %offs to <16 x i32>
1168  %ptrs = getelementptr inbounds i16, i16* %base, <16 x i32> %offs.zext
1169  %ptrs.cast = bitcast <16 x i16*> %ptrs to <16 x i8*>
1170  %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs.cast, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
1171  ret <16 x i8> %gather
1172}
1173
1174declare <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*>, i32, <16 x i1>, <16 x i8>)
1175declare <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*>, i32, <8 x i1>, <8 x i8>)
1176declare <2 x i8> @llvm.masked.gather.v2i8.v2p0i8(<2 x i8*>, i32, <2 x i1>, <2 x i8>)
1177