• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
3
4define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16(i16* %base, <8 x i16>* %offptr) {
5; CHECK-LABEL: scaled_v8i16_i16:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vldrh.u16 q1, [r1]
8; CHECK-NEXT:    vldrh.u16 q0, [r0, q1, uxtw #1]
9; CHECK-NEXT:    bx lr
10entry:
11  %offs = load <8 x i16>, <8 x i16>* %offptr, align 2
12  %offs.zext = zext <8 x i16> %offs to <8 x i32>
13  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> %offs.zext
14  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
15  ret <8 x i16> %gather
16}
17
18define arm_aapcs_vfpcc <8 x half> @scaled_v8f16_i16(i16* %base, <8 x i16>* %offptr) {
19; CHECK-LABEL: scaled_v8f16_i16:
20; CHECK:       @ %bb.0: @ %entry
21; CHECK-NEXT:    vldrh.u16 q1, [r1]
22; CHECK-NEXT:    vldrh.u16 q0, [r0, q1, uxtw #1]
23; CHECK-NEXT:    bx lr
24entry:
25  %offs = load <8 x i16>, <8 x i16>* %offptr, align 2
26  %offs.zext = zext <8 x i16> %offs to <8 x i32>
27  %i16_ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> %offs.zext
28  %ptrs = bitcast <8 x i16*> %i16_ptrs to <8 x half*>
29  %gather = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x half> undef)
30  ret <8 x half> %gather
31}
32
33define arm_aapcs_vfpcc <8 x half> @scaled_v8f16_half(half* %base, <8 x i16>* %offptr) {
34; CHECK-LABEL: scaled_v8f16_half:
35; CHECK:       @ %bb.0: @ %entry
36; CHECK-NEXT:    vldrh.u16 q1, [r1]
37; CHECK-NEXT:    vldrh.u16 q0, [r0, q1, uxtw #1]
38; CHECK-NEXT:    bx lr
39entry:
40  %offs = load <8 x i16>, <8 x i16>* %offptr, align 2
41  %offs.zext = zext <8 x i16> %offs to <8 x i32>
42  %ptrs = getelementptr inbounds half, half* %base, <8 x i32> %offs.zext
43  %gather = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x half> undef)
44  ret <8 x half> %gather
45}
46
47define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_sext(i16* %base, <8 x i16>* %offptr) {
48; CHECK-LABEL: scaled_v8i16_sext:
49; CHECK:       @ %bb.0: @ %entry
50; CHECK-NEXT:    .save {r4, r5, r7, lr}
51; CHECK-NEXT:    push {r4, r5, r7, lr}
52; CHECK-NEXT:    vldrh.s32 q0, [r1]
53; CHECK-NEXT:    vldrh.s32 q1, [r1, #8]
54; CHECK-NEXT:    vshl.i32 q0, q0, #1
55; CHECK-NEXT:    vshl.i32 q1, q1, #1
56; CHECK-NEXT:    vadd.i32 q0, q0, r0
57; CHECK-NEXT:    vadd.i32 q1, q1, r0
58; CHECK-NEXT:    vmov r2, s2
59; CHECK-NEXT:    vmov r3, s3
60; CHECK-NEXT:    vmov r5, s1
61; CHECK-NEXT:    vmov r0, s4
62; CHECK-NEXT:    vmov r1, s5
63; CHECK-NEXT:    vmov r4, s7
64; CHECK-NEXT:    ldrh.w r12, [r2]
65; CHECK-NEXT:    vmov r2, s0
66; CHECK-NEXT:    ldrh.w lr, [r3]
67; CHECK-NEXT:    vmov r3, s6
68; CHECK-NEXT:    ldrh r5, [r5]
69; CHECK-NEXT:    ldrh r0, [r0]
70; CHECK-NEXT:    ldrh r1, [r1]
71; CHECK-NEXT:    ldrh r4, [r4]
72; CHECK-NEXT:    ldrh r2, [r2]
73; CHECK-NEXT:    ldrh r3, [r3]
74; CHECK-NEXT:    vmov.16 q0[0], r2
75; CHECK-NEXT:    vmov.16 q0[1], r5
76; CHECK-NEXT:    vmov.16 q0[2], r12
77; CHECK-NEXT:    vmov.16 q0[3], lr
78; CHECK-NEXT:    vmov.16 q0[4], r0
79; CHECK-NEXT:    vmov.16 q0[5], r1
80; CHECK-NEXT:    vmov.16 q0[6], r3
81; CHECK-NEXT:    vmov.16 q0[7], r4
82; CHECK-NEXT:    pop {r4, r5, r7, pc}
83entry:
84  %offs = load <8 x i16>, <8 x i16>* %offptr, align 2
85  %offs.sext = sext <8 x i16> %offs to <8 x i32>
86  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> %offs.sext
87  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
88  ret <8 x i16> %gather
89}
90
91define arm_aapcs_vfpcc <8 x half> @scaled_v8f16_sext(i16* %base, <8 x i16>* %offptr) {
92; CHECK-LABEL: scaled_v8f16_sext:
93; CHECK:       @ %bb.0: @ %entry
94; CHECK-NEXT:    vldrh.s32 q0, [r1]
95; CHECK-NEXT:    vshl.i32 q0, q0, #1
96; CHECK-NEXT:    vadd.i32 q1, q0, r0
97; CHECK-NEXT:    vmov r2, s5
98; CHECK-NEXT:    vldr.16 s0, [r2]
99; CHECK-NEXT:    vmov r3, s4
100; CHECK-NEXT:    vmov r2, s0
101; CHECK-NEXT:    vldr.16 s0, [r3]
102; CHECK-NEXT:    vmov r3, s0
103; CHECK-NEXT:    vmov.16 q0[0], r3
104; CHECK-NEXT:    vmov.16 q0[1], r2
105; CHECK-NEXT:    vmov r2, s6
106; CHECK-NEXT:    vldr.16 s8, [r2]
107; CHECK-NEXT:    vmov r2, s8
108; CHECK-NEXT:    vmov.16 q0[2], r2
109; CHECK-NEXT:    vmov r2, s7
110; CHECK-NEXT:    vldr.16 s4, [r2]
111; CHECK-NEXT:    vmov r2, s4
112; CHECK-NEXT:    vldrh.s32 q1, [r1, #8]
113; CHECK-NEXT:    vmov.16 q0[3], r2
114; CHECK-NEXT:    vshl.i32 q1, q1, #1
115; CHECK-NEXT:    vadd.i32 q1, q1, r0
116; CHECK-NEXT:    vmov r0, s4
117; CHECK-NEXT:    vldr.16 s8, [r0]
118; CHECK-NEXT:    vmov r0, s8
119; CHECK-NEXT:    vmov.16 q0[4], r0
120; CHECK-NEXT:    vmov r0, s5
121; CHECK-NEXT:    vldr.16 s8, [r0]
122; CHECK-NEXT:    vmov r0, s8
123; CHECK-NEXT:    vmov.16 q0[5], r0
124; CHECK-NEXT:    vmov r0, s6
125; CHECK-NEXT:    vldr.16 s8, [r0]
126; CHECK-NEXT:    vmov r0, s8
127; CHECK-NEXT:    vmov.16 q0[6], r0
128; CHECK-NEXT:    vmov r0, s7
129; CHECK-NEXT:    vldr.16 s4, [r0]
130; CHECK-NEXT:    vmov r0, s4
131; CHECK-NEXT:    vmov.16 q0[7], r0
132; CHECK-NEXT:    bx lr
133entry:
134  %offs = load <8 x i16>, <8 x i16>* %offptr, align 2
135  %offs.sext = sext <8 x i16> %offs to <8 x i32>
136  %i16_ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> %offs.sext
137  %ptrs = bitcast <8 x i16*> %i16_ptrs to <8 x half*>
138  %gather = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x half> undef)
139  ret <8 x half> %gather
140}
141
142define arm_aapcs_vfpcc <8 x i16> @unsigned_scaled_v8i16_i8(i16* %base, <8 x i8>* %offptr) {
143; CHECK-LABEL: unsigned_scaled_v8i16_i8:
144; CHECK:       @ %bb.0: @ %entry
145; CHECK-NEXT:    vldrb.u16 q1, [r1]
146; CHECK-NEXT:    vldrh.u16 q0, [r0, q1, uxtw #1]
147; CHECK-NEXT:    bx lr
148entry:
149  %offs = load <8 x i8>, <8 x i8>* %offptr, align 1
150  %offs.zext = zext <8 x i8> %offs to <8 x i32>
151  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> %offs.zext
152  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
153  ret <8 x i16> %gather
154}
155
156define arm_aapcs_vfpcc <8 x half> @unsigned_scaled_v8f16_i8(i16* %base, <8 x i8>* %offptr) {
157; CHECK-LABEL: unsigned_scaled_v8f16_i8:
158; CHECK:       @ %bb.0: @ %entry
159; CHECK-NEXT:    vldrb.u16 q1, [r1]
160; CHECK-NEXT:    vldrh.u16 q0, [r0, q1, uxtw #1]
161; CHECK-NEXT:    bx lr
162entry:
163  %offs = load <8 x i8>, <8 x i8>* %offptr, align 1
164  %offs.zext = zext <8 x i8> %offs to <8 x i32>
165  %i16_ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> %offs.zext
166  %ptrs = bitcast <8 x i16*> %i16_ptrs to <8 x half*>
167  %gather = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x half> undef)
168  ret <8 x half> %gather
169}
170
171define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_passthru0t(i16* %base, <8 x i16>* %offptr) {
172; CHECK-LABEL: scaled_v8i16_i16_passthru0t:
173; CHECK:       @ %bb.0: @ %entry
174; CHECK-NEXT:    vldrh.u16 q1, [r1]
175; CHECK-NEXT:    vldrh.u16 q0, [r0, q1, uxtw #1]
176; CHECK-NEXT:    bx lr
177entry:
178  %offs = load <8 x i16>, <8 x i16>* %offptr, align 2
179  %offs.zext = zext <8 x i16> %offs to <8 x i32>
180  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> %offs.zext
181  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> zeroinitializer)
182  ret <8 x i16> %gather
183}
184
185define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_passthru1t(i16* %base, <8 x i16>* %offptr) {
186; CHECK-LABEL: scaled_v8i16_i16_passthru1t:
187; CHECK:       @ %bb.0: @ %entry
188; CHECK-NEXT:    vldrh.u16 q1, [r1]
189; CHECK-NEXT:    vldrh.u16 q0, [r0, q1, uxtw #1]
190; CHECK-NEXT:    bx lr
191entry:
192  %offs = load <8 x i16>, <8 x i16>* %offptr, align 2
193  %offs.zext = zext <8 x i16> %offs to <8 x i32>
194  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> %offs.zext
195  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
196  ret <8 x i16> %gather
197}
198
199define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_passthru1f(i16* %base, <8 x i16>* %offptr) {
200; CHECK-LABEL: scaled_v8i16_i16_passthru1f:
201; CHECK:       @ %bb.0: @ %entry
202; CHECK-NEXT:    movw r2, #65487
203; CHECK-NEXT:    vmov.i16 q0, #0x1
204; CHECK-NEXT:    vmsr p0, r2
205; CHECK-NEXT:    vldrh.u16 q1, [r1]
206; CHECK-NEXT:    vpst
207; CHECK-NEXT:    vldrht.u16 q2, [r0, q1, uxtw #1]
208; CHECK-NEXT:    vpsel q0, q2, q0
209; CHECK-NEXT:    bx lr
210entry:
211  %offs = load <8 x i16>, <8 x i16>* %offptr, align 2
212  %offs.zext = zext <8 x i16> %offs to <8 x i32>
213  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> %offs.zext
214  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
215  ret <8 x i16> %gather
216}
217
218define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_passthru0f(i16* %base, <8 x i16>* %offptr) {
219; CHECK-LABEL: scaled_v8i16_i16_passthru0f:
220; CHECK:       @ %bb.0: @ %entry
221; CHECK-NEXT:    movw r2, #65523
222; CHECK-NEXT:    vmsr p0, r2
223; CHECK-NEXT:    vldrh.u16 q1, [r1]
224; CHECK-NEXT:    vpst
225; CHECK-NEXT:    vldrht.u16 q0, [r0, q1, uxtw #1]
226; CHECK-NEXT:    bx lr
227entry:
228  %offs = load <8 x i16>, <8 x i16>* %offptr, align 2
229  %offs.zext = zext <8 x i16> %offs to <8 x i32>
230  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> %offs.zext
231  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %ptrs, i32 2, <8 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
232  ret <8 x i16> %gather
233}
234
235define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_passthru_icmp0(i16* %base, <8 x i16>* %offptr) {
236; CHECK-LABEL: scaled_v8i16_i16_passthru_icmp0:
237; CHECK:       @ %bb.0: @ %entry
238; CHECK-NEXT:    vldrh.u16 q1, [r1]
239; CHECK-NEXT:    vpt.s16 gt, q1, zr
240; CHECK-NEXT:    vldrht.u16 q0, [r0, q1, uxtw #1]
241; CHECK-NEXT:    bx lr
242entry:
243  %offs = load <8 x i16>, <8 x i16>* %offptr, align 2
244  %offs.zext = zext <8 x i16> %offs to <8 x i32>
245  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> %offs.zext
246  %mask = icmp sgt <8 x i16> %offs, zeroinitializer
247  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %ptrs, i32 2, <8 x i1> %mask, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
248  ret <8 x i16> %gather
249}
250
251define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_passthru_icmp1(i16* %base, <8 x i16>* %offptr) {
252; CHECK-LABEL: scaled_v8i16_i16_passthru_icmp1:
253; CHECK:       @ %bb.0: @ %entry
254; CHECK-NEXT:    vmov.i16 q0, #0x1
255; CHECK-NEXT:    vldrh.u16 q1, [r1]
256; CHECK-NEXT:    vpt.s16 gt, q1, zr
257; CHECK-NEXT:    vldrht.u16 q2, [r0, q1, uxtw #1]
258; CHECK-NEXT:    vpsel q0, q2, q0
259; CHECK-NEXT:    bx lr
260entry:
261  %offs = load <8 x i16>, <8 x i16>* %offptr, align 2
262  %offs.zext = zext <8 x i16> %offs to <8 x i32>
263  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> %offs.zext
264  %mask = icmp sgt <8 x i16> %offs, zeroinitializer
265  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %ptrs, i32 2, <8 x i1> %mask, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
266  ret <8 x i16> %gather
267}
268
269define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_2gep(i16* %base, <8 x i16>* %offptr) {
270; CHECK-LABEL: scaled_v8i16_i16_2gep:
271; CHECK:       @ %bb.0: @ %entry
272; CHECK-NEXT:    .save {r4, r5, r7, lr}
273; CHECK-NEXT:    push {r4, r5, r7, lr}
274; CHECK-NEXT:    vldrh.s32 q0, [r1]
275; CHECK-NEXT:    vmov.i32 q1, #0x28
276; CHECK-NEXT:    vldrh.s32 q2, [r1, #8]
277; CHECK-NEXT:    vshl.i32 q0, q0, #1
278; CHECK-NEXT:    vadd.i32 q0, q0, r0
279; CHECK-NEXT:    vshl.i32 q2, q2, #1
280; CHECK-NEXT:    vadd.i32 q0, q0, q1
281; CHECK-NEXT:    vadd.i32 q2, q2, r0
282; CHECK-NEXT:    vmov r2, s2
283; CHECK-NEXT:    vadd.i32 q1, q2, q1
284; CHECK-NEXT:    vmov r3, s3
285; CHECK-NEXT:    vmov r5, s1
286; CHECK-NEXT:    vmov r0, s4
287; CHECK-NEXT:    vmov r1, s5
288; CHECK-NEXT:    vmov r4, s7
289; CHECK-NEXT:    ldrh.w r12, [r2]
290; CHECK-NEXT:    vmov r2, s0
291; CHECK-NEXT:    ldrh.w lr, [r3]
292; CHECK-NEXT:    vmov r3, s6
293; CHECK-NEXT:    ldrh r5, [r5]
294; CHECK-NEXT:    ldrh r0, [r0]
295; CHECK-NEXT:    ldrh r1, [r1]
296; CHECK-NEXT:    ldrh r4, [r4]
297; CHECK-NEXT:    ldrh r2, [r2]
298; CHECK-NEXT:    ldrh r3, [r3]
299; CHECK-NEXT:    vmov.16 q0[0], r2
300; CHECK-NEXT:    vmov.16 q0[1], r5
301; CHECK-NEXT:    vmov.16 q0[2], r12
302; CHECK-NEXT:    vmov.16 q0[3], lr
303; CHECK-NEXT:    vmov.16 q0[4], r0
304; CHECK-NEXT:    vmov.16 q0[5], r1
305; CHECK-NEXT:    vmov.16 q0[6], r3
306; CHECK-NEXT:    vmov.16 q0[7], r4
307; CHECK-NEXT:    pop {r4, r5, r7, pc}
308entry:
309  %offs = load <8 x i16>, <8 x i16>* %offptr, align 2
310  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i16> %offs
311  %ptrs2 = getelementptr inbounds i16, <8 x i16*> %ptrs, i16 20
312  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
313  ret <8 x i16> %gather
314}
315
316define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_2gep2(i16* %base, <8 x i16>* %offptr) {
317; CHECK-LABEL: scaled_v8i16_i16_2gep2:
318; CHECK:       @ %bb.0: @ %entry
319; CHECK-NEXT:    adr r1, .LCPI14_0
320; CHECK-NEXT:    vldrw.u32 q1, [r1]
321; CHECK-NEXT:    vldrh.u16 q0, [r0, q1, uxtw #1]
322; CHECK-NEXT:    bx lr
323; CHECK-NEXT:    .p2align 4
324; CHECK-NEXT:  @ %bb.1:
325; CHECK-NEXT:  .LCPI14_0:
326; CHECK-NEXT:    .short 20 @ 0x14
327; CHECK-NEXT:    .short 23 @ 0x17
328; CHECK-NEXT:    .short 26 @ 0x1a
329; CHECK-NEXT:    .short 29 @ 0x1d
330; CHECK-NEXT:    .short 32 @ 0x20
331; CHECK-NEXT:    .short 35 @ 0x23
332; CHECK-NEXT:    .short 38 @ 0x26
333; CHECK-NEXT:    .short 41 @ 0x29
334entry:
335  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i16> <i16 0, i16 3, i16 6, i16 9, i16 12, i16 15, i16 18, i16 21>
336  %ptrs2 = getelementptr inbounds i16,<8 x i16*> %ptrs, i16 20
337  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
338  ret <8 x i16> %gather
339}
340
341define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep(i16* %base) {
342; CHECK-LABEL: scaled_v8i16_i16_biggep:
343; CHECK:       @ %bb.0: @ %entry
344; CHECK-NEXT:    adr r1, .LCPI15_0
345; CHECK-NEXT:    vldrw.u32 q1, [r1]
346; CHECK-NEXT:    vldrh.u16 q0, [r0, q1, uxtw #1]
347; CHECK-NEXT:    bx lr
348; CHECK-NEXT:    .p2align 4
349; CHECK-NEXT:  @ %bb.1:
350; CHECK-NEXT:  .LCPI15_0:
351; CHECK-NEXT:    .short 20 @ 0x14
352; CHECK-NEXT:    .short 23 @ 0x17
353; CHECK-NEXT:    .short 26 @ 0x1a
354; CHECK-NEXT:    .short 29 @ 0x1d
355; CHECK-NEXT:    .short 32 @ 0x20
356; CHECK-NEXT:    .short 35 @ 0x23
357; CHECK-NEXT:    .short 38 @ 0x26
358; CHECK-NEXT:    .short 41 @ 0x29
359entry:
360  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
361  %ptrs2 = getelementptr inbounds i16,<8 x i16*> %ptrs, i32 20
362  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
363  ret <8 x i16> %gather
364}
365
366define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep2(i16* %base) {
367; CHECK-LABEL: scaled_v8i16_i16_biggep2:
368; CHECK:       @ %bb.0: @ %entry
369; CHECK-NEXT:    adr r1, .LCPI16_0
370; CHECK-NEXT:    vldrw.u32 q1, [r1]
371; CHECK-NEXT:    vldrh.u16 q0, [r0, q1, uxtw #1]
372; CHECK-NEXT:    bx lr
373; CHECK-NEXT:    .p2align 4
374; CHECK-NEXT:  @ %bb.1:
375; CHECK-NEXT:  .LCPI16_0:
376; CHECK-NEXT:    .short 0 @ 0x0
377; CHECK-NEXT:    .short 3 @ 0x3
378; CHECK-NEXT:    .short 6 @ 0x6
379; CHECK-NEXT:    .short 9 @ 0x9
380; CHECK-NEXT:    .short 12 @ 0xc
381; CHECK-NEXT:    .short 15 @ 0xf
382; CHECK-NEXT:    .short 18 @ 0x12
383; CHECK-NEXT:    .short 21 @ 0x15
384entry:
385  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
386  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
387  ret <8 x i16> %gather
388}
389
390define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep3(i16* %base) {
391; CHECK-LABEL: scaled_v8i16_i16_biggep3:
392; CHECK:       @ %bb.0: @ %entry
393; CHECK-NEXT:    .save {r4, r5, r6, lr}
394; CHECK-NEXT:    push {r4, r5, r6, lr}
395; CHECK-NEXT:    adr r1, .LCPI17_0
396; CHECK-NEXT:    adr.w r12, .LCPI17_1
397; CHECK-NEXT:    vldrw.u32 q0, [r1]
398; CHECK-NEXT:    vldrw.u32 q1, [r12]
399; CHECK-NEXT:    vadd.i32 q0, q0, r0
400; CHECK-NEXT:    vadd.i32 q1, q1, r0
401; CHECK-NEXT:    vmov r1, s2
402; CHECK-NEXT:    vmov r2, s3
403; CHECK-NEXT:    vmov r3, s0
404; CHECK-NEXT:    vmov r5, s1
405; CHECK-NEXT:    vmov r0, s4
406; CHECK-NEXT:    vmov r4, s7
407; CHECK-NEXT:    ldrh.w lr, [r1]
408; CHECK-NEXT:    vmov r1, s5
409; CHECK-NEXT:    ldrh r6, [r2]
410; CHECK-NEXT:    vmov r2, s6
411; CHECK-NEXT:    ldrh r3, [r3]
412; CHECK-NEXT:    ldrh r5, [r5]
413; CHECK-NEXT:    vmov.16 q0[0], r3
414; CHECK-NEXT:    ldrh r0, [r0]
415; CHECK-NEXT:    vmov.16 q0[1], r5
416; CHECK-NEXT:    ldrh r4, [r4]
417; CHECK-NEXT:    vmov.16 q0[2], lr
418; CHECK-NEXT:    vmov.16 q0[3], r6
419; CHECK-NEXT:    vmov.16 q0[4], r0
420; CHECK-NEXT:    ldrh r1, [r1]
421; CHECK-NEXT:    ldrh r2, [r2]
422; CHECK-NEXT:    vmov.16 q0[5], r1
423; CHECK-NEXT:    vmov.16 q0[6], r2
424; CHECK-NEXT:    vmov.16 q0[7], r4
425; CHECK-NEXT:    pop {r4, r5, r6, pc}
426; CHECK-NEXT:    .p2align 4
427; CHECK-NEXT:  @ %bb.1:
428; CHECK-NEXT:  .LCPI17_0:
429; CHECK-NEXT:    .long 131072 @ 0x20000
430; CHECK-NEXT:    .long 131078 @ 0x20006
431; CHECK-NEXT:    .long 131084 @ 0x2000c
432; CHECK-NEXT:    .long 131090 @ 0x20012
433; CHECK-NEXT:  .LCPI17_1:
434; CHECK-NEXT:    .long 131096 @ 0x20018
435; CHECK-NEXT:    .long 131102 @ 0x2001e
436; CHECK-NEXT:    .long 131108 @ 0x20024
437; CHECK-NEXT:    .long 131114 @ 0x2002a
438entry:
439  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
440  %ptrs2 = getelementptr inbounds i16,<8 x i16*> %ptrs, i32 65536
441  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
442  ret <8 x i16> %gather
443}
444
445define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep4(i16* %base) {
446; CHECK-LABEL: scaled_v8i16_i16_biggep4:
447; CHECK:       @ %bb.0: @ %entry
448; CHECK-NEXT:    .save {r4, r5, r6, lr}
449; CHECK-NEXT:    push {r4, r5, r6, lr}
450; CHECK-NEXT:    adr r1, .LCPI18_0
451; CHECK-NEXT:    adr.w r12, .LCPI18_1
452; CHECK-NEXT:    vldrw.u32 q0, [r1]
453; CHECK-NEXT:    vldrw.u32 q1, [r12]
454; CHECK-NEXT:    vadd.i32 q0, q0, r0
455; CHECK-NEXT:    vadd.i32 q1, q1, r0
456; CHECK-NEXT:    vmov r1, s2
457; CHECK-NEXT:    vmov r2, s3
458; CHECK-NEXT:    vmov r3, s0
459; CHECK-NEXT:    vmov r5, s1
460; CHECK-NEXT:    vmov r0, s4
461; CHECK-NEXT:    vmov r4, s7
462; CHECK-NEXT:    ldrh.w lr, [r1]
463; CHECK-NEXT:    vmov r1, s5
464; CHECK-NEXT:    ldrh r6, [r2]
465; CHECK-NEXT:    vmov r2, s6
466; CHECK-NEXT:    ldrh r3, [r3]
467; CHECK-NEXT:    ldrh r5, [r5]
468; CHECK-NEXT:    vmov.16 q0[0], r3
469; CHECK-NEXT:    ldrh r0, [r0]
470; CHECK-NEXT:    vmov.16 q0[1], r5
471; CHECK-NEXT:    ldrh r4, [r4]
472; CHECK-NEXT:    vmov.16 q0[2], lr
473; CHECK-NEXT:    vmov.16 q0[3], r6
474; CHECK-NEXT:    vmov.16 q0[4], r0
475; CHECK-NEXT:    ldrh r1, [r1]
476; CHECK-NEXT:    ldrh r2, [r2]
477; CHECK-NEXT:    vmov.16 q0[5], r1
478; CHECK-NEXT:    vmov.16 q0[6], r2
479; CHECK-NEXT:    vmov.16 q0[7], r4
480; CHECK-NEXT:    pop {r4, r5, r6, pc}
481; CHECK-NEXT:    .p2align 4
482; CHECK-NEXT:  @ %bb.1:
483; CHECK-NEXT:  .LCPI18_0:
484; CHECK-NEXT:    .long 0 @ 0x0
485; CHECK-NEXT:    .long 6 @ 0x6
486; CHECK-NEXT:    .long 12 @ 0xc
487; CHECK-NEXT:    .long 18 @ 0x12
488; CHECK-NEXT:  .LCPI18_1:
489; CHECK-NEXT:    .long 24 @ 0x18
490; CHECK-NEXT:    .long 131072 @ 0x20000
491; CHECK-NEXT:    .long 36 @ 0x24
492; CHECK-NEXT:    .long 42 @ 0x2a
493entry:
494  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 65536, i32 18, i32 21>
495  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
496  ret <8 x i16> %gather
497}
498
499define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep5(<8 x i16*> %base) {
500; CHECK-LABEL: scaled_v8i16_i16_biggep5:
501; CHECK:       @ %bb.0: @ %entry
502; CHECK-NEXT:    .save {r4, r5, r7, lr}
503; CHECK-NEXT:    push {r4, r5, r7, lr}
504; CHECK-NEXT:    vmov.i32 q2, #0x20000
505; CHECK-NEXT:    vadd.i32 q0, q0, q2
506; CHECK-NEXT:    vadd.i32 q1, q1, q2
507; CHECK-NEXT:    vmov r0, s2
508; CHECK-NEXT:    vmov r1, s3
509; CHECK-NEXT:    vmov r2, s0
510; CHECK-NEXT:    vmov r5, s1
511; CHECK-NEXT:    vmov r3, s4
512; CHECK-NEXT:    vmov r4, s7
513; CHECK-NEXT:    ldrh.w r12, [r0]
514; CHECK-NEXT:    vmov r0, s5
515; CHECK-NEXT:    ldrh.w lr, [r1]
516; CHECK-NEXT:    vmov r1, s6
517; CHECK-NEXT:    ldrh r2, [r2]
518; CHECK-NEXT:    ldrh r5, [r5]
519; CHECK-NEXT:    vmov.16 q0[0], r2
520; CHECK-NEXT:    ldrh r3, [r3]
521; CHECK-NEXT:    vmov.16 q0[1], r5
522; CHECK-NEXT:    ldrh r4, [r4]
523; CHECK-NEXT:    vmov.16 q0[2], r12
524; CHECK-NEXT:    vmov.16 q0[3], lr
525; CHECK-NEXT:    vmov.16 q0[4], r3
526; CHECK-NEXT:    ldrh r0, [r0]
527; CHECK-NEXT:    ldrh r1, [r1]
528; CHECK-NEXT:    vmov.16 q0[5], r0
529; CHECK-NEXT:    vmov.16 q0[6], r1
530; CHECK-NEXT:    vmov.16 q0[7], r4
531; CHECK-NEXT:    pop {r4, r5, r7, pc}
532entry:
533  %ptrs2 = getelementptr inbounds i16,<8 x i16*> %base, i32 65536
534  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
535  ret <8 x i16> %gather
536}
537
538define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep6(i16* %base) {
539; CHECK-LABEL: scaled_v8i16_i16_biggep6:
540; CHECK:       @ %bb.0: @ %entry
541; CHECK-NEXT:    .save {r4, r5, r6, lr}
542; CHECK-NEXT:    push {r4, r5, r6, lr}
543; CHECK-NEXT:    adr r1, .LCPI20_0
544; CHECK-NEXT:    adr.w r12, .LCPI20_1
545; CHECK-NEXT:    vldrw.u32 q0, [r1]
546; CHECK-NEXT:    vldrw.u32 q1, [r12]
547; CHECK-NEXT:    vadd.i32 q0, q0, r0
548; CHECK-NEXT:    vadd.i32 q1, q1, r0
549; CHECK-NEXT:    vmov r1, s2
550; CHECK-NEXT:    vmov r2, s3
551; CHECK-NEXT:    vmov r3, s0
552; CHECK-NEXT:    vmov r5, s1
553; CHECK-NEXT:    vmov r0, s4
554; CHECK-NEXT:    vmov r4, s7
555; CHECK-NEXT:    ldrh.w lr, [r1]
556; CHECK-NEXT:    vmov r1, s5
557; CHECK-NEXT:    ldrh r6, [r2]
558; CHECK-NEXT:    vmov r2, s6
559; CHECK-NEXT:    ldrh r3, [r3]
560; CHECK-NEXT:    ldrh r5, [r5]
561; CHECK-NEXT:    vmov.16 q0[0], r3
562; CHECK-NEXT:    ldrh r0, [r0]
563; CHECK-NEXT:    vmov.16 q0[1], r5
564; CHECK-NEXT:    ldrh r4, [r4]
565; CHECK-NEXT:    vmov.16 q0[2], lr
566; CHECK-NEXT:    vmov.16 q0[3], r6
567; CHECK-NEXT:    vmov.16 q0[4], r0
568; CHECK-NEXT:    ldrh r1, [r1]
569; CHECK-NEXT:    ldrh r2, [r2]
570; CHECK-NEXT:    vmov.16 q0[5], r1
571; CHECK-NEXT:    vmov.16 q0[6], r2
572; CHECK-NEXT:    vmov.16 q0[7], r4
573; CHECK-NEXT:    pop {r4, r5, r6, pc}
574; CHECK-NEXT:    .p2align 4
575; CHECK-NEXT:  @ %bb.1:
576; CHECK-NEXT:  .LCPI20_0:
577; CHECK-NEXT:    .long 2 @ 0x2
578; CHECK-NEXT:    .long 8 @ 0x8
579; CHECK-NEXT:    .long 14 @ 0xe
580; CHECK-NEXT:    .long 20 @ 0x14
581; CHECK-NEXT:  .LCPI20_1:
582; CHECK-NEXT:    .long 131074 @ 0x20002
583; CHECK-NEXT:    .long 32 @ 0x20
584; CHECK-NEXT:    .long 38 @ 0x26
585; CHECK-NEXT:    .long 44 @ 0x2c
586entry:
587  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 65536, i32 15, i32 18, i32 21>
588  %ptrs2 = getelementptr inbounds i16,<8 x i16*> %ptrs, i32 1
589  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
590  ret <8 x i16> %gather
591}
592
593define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep7(i16* %base, <8 x i16>* %offptr) {
594; CHECK-LABEL: scaled_v8i16_i16_biggep7:
595; CHECK:       @ %bb.0: @ %entry
596; CHECK-NEXT:    .save {r4, r5, r6, lr}
597; CHECK-NEXT:    push {r4, r5, r6, lr}
598; CHECK-NEXT:    adr r1, .LCPI21_0
599; CHECK-NEXT:    adr.w r12, .LCPI21_1
600; CHECK-NEXT:    vldrw.u32 q0, [r1]
601; CHECK-NEXT:    vldrw.u32 q1, [r12]
602; CHECK-NEXT:    vadd.i32 q0, q0, r0
603; CHECK-NEXT:    vadd.i32 q1, q1, r0
604; CHECK-NEXT:    vmov r1, s2
605; CHECK-NEXT:    vmov r2, s3
606; CHECK-NEXT:    vmov r3, s0
607; CHECK-NEXT:    vmov r5, s1
608; CHECK-NEXT:    vmov r0, s4
609; CHECK-NEXT:    vmov r4, s7
610; CHECK-NEXT:    ldrh.w lr, [r1]
611; CHECK-NEXT:    vmov r1, s5
612; CHECK-NEXT:    ldrh r6, [r2]
613; CHECK-NEXT:    vmov r2, s6
614; CHECK-NEXT:    ldrh r3, [r3]
615; CHECK-NEXT:    ldrh r5, [r5]
616; CHECK-NEXT:    vmov.16 q0[0], r3
617; CHECK-NEXT:    ldrh r0, [r0]
618; CHECK-NEXT:    vmov.16 q0[1], r5
619; CHECK-NEXT:    ldrh r4, [r4]
620; CHECK-NEXT:    vmov.16 q0[2], lr
621; CHECK-NEXT:    vmov.16 q0[3], r6
622; CHECK-NEXT:    vmov.16 q0[4], r0
623; CHECK-NEXT:    ldrh r1, [r1]
624; CHECK-NEXT:    ldrh r2, [r2]
625; CHECK-NEXT:    vmov.16 q0[5], r1
626; CHECK-NEXT:    vmov.16 q0[6], r2
627; CHECK-NEXT:    vmov.16 q0[7], r4
628; CHECK-NEXT:    pop {r4, r5, r6, pc}
629; CHECK-NEXT:    .p2align 4
630; CHECK-NEXT:  @ %bb.1:
631; CHECK-NEXT:  .LCPI21_0:
632; CHECK-NEXT:    .long 128 @ 0x80
633; CHECK-NEXT:    .long 1206 @ 0x4b6
634; CHECK-NEXT:    .long 1212 @ 0x4bc
635; CHECK-NEXT:    .long 1218 @ 0x4c2
636; CHECK-NEXT:  .LCPI21_1:
637; CHECK-NEXT:    .long 1224 @ 0x4c8
638; CHECK-NEXT:    .long 1230 @ 0x4ce
639; CHECK-NEXT:    .long 1236 @ 0x4d4
640; CHECK-NEXT:    .long 1242 @ 0x4da
641entry:
642  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i16> <i16 65000, i16 3, i16 6, i16 9, i16 12, i16 15, i16 18, i16 21>
643  %ptrs2 = getelementptr inbounds i16,<8 x i16*> %ptrs, i16 600
644  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
645  ret <8 x i16> %gather
646}
647
648define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_basei32(i32* %base, <8 x i16>* %offptr) {
649; CHECK-LABEL: scaled_v8i16_i16_basei32:
650; CHECK:       @ %bb.0: @ %entry
651; CHECK-NEXT:    .save {r4, r5, r7, lr}
652; CHECK-NEXT:    push {r4, r5, r7, lr}
653; CHECK-NEXT:    vldrh.u32 q0, [r1]
654; CHECK-NEXT:    vldrh.u32 q1, [r1, #8]
655; CHECK-NEXT:    vshl.i32 q0, q0, #2
656; CHECK-NEXT:    vshl.i32 q1, q1, #2
657; CHECK-NEXT:    vadd.i32 q0, q0, r0
658; CHECK-NEXT:    vadd.i32 q1, q1, r0
659; CHECK-NEXT:    vmov r2, s2
660; CHECK-NEXT:    vmov r3, s3
661; CHECK-NEXT:    vmov r5, s1
662; CHECK-NEXT:    vmov r0, s4
663; CHECK-NEXT:    vmov r1, s5
664; CHECK-NEXT:    vmov r4, s7
665; CHECK-NEXT:    ldrh.w r12, [r2]
666; CHECK-NEXT:    vmov r2, s0
667; CHECK-NEXT:    ldrh.w lr, [r3]
668; CHECK-NEXT:    vmov r3, s6
669; CHECK-NEXT:    ldrh r5, [r5]
670; CHECK-NEXT:    ldrh r0, [r0]
671; CHECK-NEXT:    ldrh r1, [r1]
672; CHECK-NEXT:    ldrh r4, [r4]
673; CHECK-NEXT:    ldrh r2, [r2]
674; CHECK-NEXT:    ldrh r3, [r3]
675; CHECK-NEXT:    vmov.16 q0[0], r2
676; CHECK-NEXT:    vmov.16 q0[1], r5
677; CHECK-NEXT:    vmov.16 q0[2], r12
678; CHECK-NEXT:    vmov.16 q0[3], lr
679; CHECK-NEXT:    vmov.16 q0[4], r0
680; CHECK-NEXT:    vmov.16 q0[5], r1
681; CHECK-NEXT:    vmov.16 q0[6], r3
682; CHECK-NEXT:    vmov.16 q0[7], r4
683; CHECK-NEXT:    pop {r4, r5, r7, pc}
684entry:
685  %offs = load <8 x i16>, <8 x i16>* %offptr, align 2
686  %offs.zext = zext <8 x i16> %offs to <8 x i32>
687  %ptrs = getelementptr inbounds i32, i32* %base, <8 x i32> %offs.zext
688  %ptrs.cast = bitcast <8 x i32*> %ptrs to <8 x i16*>
689  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %ptrs.cast, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
690  ret <8 x i16> %gather
691}
692
693declare <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*>, i32, <8 x i1>, <8 x i8>) #1
694declare <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*>, i32, <8 x i1>, <8 x i16>) #1
695declare <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*>, i32, <8 x i1>, <8 x half>) #1
696