• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
3
4; VLDRH.16 Qd, [base, offs, uxtw #1]
5define arm_aapcs_vfpcc void @scaled_v8i16_i16(i16* %base, <8 x i16>* %offptr, <8 x i16> %input) {
6; CHECK-LABEL: scaled_v8i16_i16:
7; CHECK:       @ %bb.0: @ %entry
8; CHECK-NEXT:    vldrh.u16 q1, [r1]
9; CHECK-NEXT:    vstrh.16 q0, [r0, q1, uxtw #1]
10; CHECK-NEXT:    bx lr
11entry:
12  %offs = load <8 x i16>, <8 x i16>* %offptr, align 2
13  %offs.zext = zext <8 x i16> %offs to <8 x i32>
14  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> %offs.zext
15  call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %input, <8 x i16*> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
16  ret void
17}
18
19; VLDRH.16 Qd, [base, offs, uxtw #1]
20define arm_aapcs_vfpcc void @scaled_v8f16_i16(i16* %base, <8 x i16>* %offptr, <8 x half> %input) {
21; CHECK-LABEL: scaled_v8f16_i16:
22; CHECK:       @ %bb.0: @ %entry
23; CHECK-NEXT:    vldrh.u16 q1, [r1]
24; CHECK-NEXT:    vstrh.16 q0, [r0, q1, uxtw #1]
25; CHECK-NEXT:    bx lr
26entry:
27  %offs = load <8 x i16>, <8 x i16>* %offptr, align 2
28  %offs.zext = zext <8 x i16> %offs to <8 x i32>
29  %i16_ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> %offs.zext
30  %ptrs = bitcast <8 x i16*> %i16_ptrs to <8 x half*>
31  call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %input, <8 x half*> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
32  ret void
33}
34
35; VLDRH.16 Qd, [base, offs, uxtw #1]
36define arm_aapcs_vfpcc void @scaled_v8f16_half(half* %base, <8 x i16>* %offptr, <8 x half> %input) {
37; CHECK-LABEL: scaled_v8f16_half:
38; CHECK:       @ %bb.0: @ %entry
39; CHECK-NEXT:    vldrh.u16 q1, [r1]
40; CHECK-NEXT:    vstrh.16 q0, [r0, q1, uxtw #1]
41; CHECK-NEXT:    bx lr
42entry:
43  %offs = load <8 x i16>, <8 x i16>* %offptr, align 2
44  %offs.zext = zext <8 x i16> %offs to <8 x i32>
45  %ptrs = getelementptr inbounds half, half* %base, <8 x i32> %offs.zext
46  call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %input, <8 x half*> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
47  ret void
48}
49
50; Expand
51define arm_aapcs_vfpcc void @scaled_v8i16_sext(i16* %base, <8 x i16>* %offptr, <8 x i16> %input) {
52; CHECK-LABEL: scaled_v8i16_sext:
53; CHECK:       @ %bb.0: @ %entry
54; CHECK-NEXT:    vldrh.s32 q2, [r1]
55; CHECK-NEXT:    vldrh.s32 q1, [r1, #8]
56; CHECK-NEXT:    vmov.u16 r1, q0[0]
57; CHECK-NEXT:    vshl.i32 q2, q2, #1
58; CHECK-NEXT:    vshl.i32 q1, q1, #1
59; CHECK-NEXT:    vadd.i32 q2, q2, r0
60; CHECK-NEXT:    vadd.i32 q1, q1, r0
61; CHECK-NEXT:    vmov r0, s8
62; CHECK-NEXT:    strh r1, [r0]
63; CHECK-NEXT:    vmov r0, s9
64; CHECK-NEXT:    vmov.u16 r1, q0[1]
65; CHECK-NEXT:    strh r1, [r0]
66; CHECK-NEXT:    vmov r0, s10
67; CHECK-NEXT:    vmov.u16 r1, q0[2]
68; CHECK-NEXT:    strh r1, [r0]
69; CHECK-NEXT:    vmov r0, s11
70; CHECK-NEXT:    vmov.u16 r1, q0[3]
71; CHECK-NEXT:    strh r1, [r0]
72; CHECK-NEXT:    vmov r0, s4
73; CHECK-NEXT:    vmov.u16 r1, q0[4]
74; CHECK-NEXT:    strh r1, [r0]
75; CHECK-NEXT:    vmov r0, s5
76; CHECK-NEXT:    vmov.u16 r1, q0[5]
77; CHECK-NEXT:    strh r1, [r0]
78; CHECK-NEXT:    vmov r0, s6
79; CHECK-NEXT:    vmov.u16 r1, q0[6]
80; CHECK-NEXT:    strh r1, [r0]
81; CHECK-NEXT:    vmov r0, s7
82; CHECK-NEXT:    vmov.u16 r1, q0[7]
83; CHECK-NEXT:    strh r1, [r0]
84; CHECK-NEXT:    bx lr
85entry:
86  %offs = load <8 x i16>, <8 x i16>* %offptr, align 2
87  %offs.sext = sext <8 x i16> %offs to <8 x i32>
88  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> %offs.sext
89  call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %input, <8 x i16*> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
90  ret void
91}
92
93; Expand
94define arm_aapcs_vfpcc void @scaled_v8f16_sext(i16* %base, <8 x i16>* %offptr, <8 x half> %input) {
95; CHECK-LABEL: scaled_v8f16_sext:
96; CHECK:       @ %bb.0: @ %entry
97; CHECK-NEXT:    vldrh.s32 q1, [r1]
98; CHECK-NEXT:    vmovx.f16 s12, s0
99; CHECK-NEXT:    vshl.i32 q2, q1, #1
100; CHECK-NEXT:    vldrh.s32 q1, [r1, #8]
101; CHECK-NEXT:    vadd.i32 q2, q2, r0
102; CHECK-NEXT:    vmov r1, s8
103; CHECK-NEXT:    vshl.i32 q1, q1, #1
104; CHECK-NEXT:    vstr.16 s0, [r1]
105; CHECK-NEXT:    vmov r1, s9
106; CHECK-NEXT:    vadd.i32 q1, q1, r0
107; CHECK-NEXT:    vstr.16 s12, [r1]
108; CHECK-NEXT:    vmov r1, s10
109; CHECK-NEXT:    vmovx.f16 s0, s3
110; CHECK-NEXT:    vstr.16 s1, [r1]
111; CHECK-NEXT:    vmov r1, s11
112; CHECK-NEXT:    vmovx.f16 s8, s1
113; CHECK-NEXT:    vmov r0, s4
114; CHECK-NEXT:    vstr.16 s8, [r1]
115; CHECK-NEXT:    vstr.16 s2, [r0]
116; CHECK-NEXT:    vmov r0, s5
117; CHECK-NEXT:    vmovx.f16 s8, s2
118; CHECK-NEXT:    vstr.16 s8, [r0]
119; CHECK-NEXT:    vmov r0, s6
120; CHECK-NEXT:    vstr.16 s3, [r0]
121; CHECK-NEXT:    vmov r0, s7
122; CHECK-NEXT:    vstr.16 s0, [r0]
123; CHECK-NEXT:    bx lr
124entry:
125  %offs = load <8 x i16>, <8 x i16>* %offptr, align 2
126  %offs.sext = sext <8 x i16> %offs to <8 x i32>
127  %i16_ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> %offs.sext
128  %ptrs = bitcast <8 x i16*> %i16_ptrs to <8 x half*>
129  call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %input, <8 x half*> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
130  ret void
131}
132
133; VLDRH.16 Qd, [base, zext(offs), uxtw #1]
134define arm_aapcs_vfpcc void @unsigned_scaled_v8i16_i8(i16* %base, <8 x i8>* %offptr, <8 x i16> %input) {
135; CHECK-LABEL: unsigned_scaled_v8i16_i8:
136; CHECK:       @ %bb.0: @ %entry
137; CHECK-NEXT:    vldrb.u16 q1, [r1]
138; CHECK-NEXT:    vstrh.16 q0, [r0, q1, uxtw #1]
139; CHECK-NEXT:    bx lr
140entry:
141  %offs = load <8 x i8>, <8 x i8>* %offptr, align 1
142  %offs.zext = zext <8 x i8> %offs to <8 x i32>
143  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> %offs.zext
144  call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %input, <8 x i16*> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
145  ret void
146}
147
148; VLDRH.16 Qd, [base, zext(offs), uxtw #1]
149define arm_aapcs_vfpcc void @unsigned_scaled_v8f16_i8(i16* %base, <8 x i8>* %offptr, <8 x half> %input) {
150; CHECK-LABEL: unsigned_scaled_v8f16_i8:
151; CHECK:       @ %bb.0: @ %entry
152; CHECK-NEXT:    vldrb.u16 q1, [r1]
153; CHECK-NEXT:    vstrh.16 q0, [r0, q1, uxtw #1]
154; CHECK-NEXT:    bx lr
155entry:
156  %offs = load <8 x i8>, <8 x i8>* %offptr, align 1
157  %offs.zext = zext <8 x i8> %offs to <8 x i32>
158  %i16_ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> %offs.zext
159  %ptrs = bitcast <8 x i16*> %i16_ptrs to <8 x half*>
160  call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %input, <8 x half*> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
161  ret void
162}
163
164define arm_aapcs_vfpcc void @scaled_v8i16_i16_passthru_icmp0(i16* %base, <8 x i16>* %offptr, <8 x i16> %input) {
165; CHECK-LABEL: scaled_v8i16_i16_passthru_icmp0:
166; CHECK:       @ %bb.0: @ %entry
167; CHECK-NEXT:    vldrh.u16 q1, [r1]
168; CHECK-NEXT:    vpt.s16 gt, q1, zr
169; CHECK-NEXT:    vstrht.16 q0, [r0, q1, uxtw #1]
170; CHECK-NEXT:    bx lr
171entry:
172  %offs = load <8 x i16>, <8 x i16>* %offptr, align 2
173  %offs.zext = zext <8 x i16> %offs to <8 x i32>
174  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i32> %offs.zext
175  %mask = icmp sgt <8 x i16> %offs, zeroinitializer
176  call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %input, <8 x i16*> %ptrs, i32 2, <8 x i1> %mask)
177  ret void
178}
179
180define arm_aapcs_vfpcc void @scaled_v8i16_i16_2gep(i16* %base, <8 x i16>* %offptr, <8 x i16> %input) {
181; CHECK-LABEL: scaled_v8i16_i16_2gep:
182; CHECK:       @ %bb.0: @ %entry
183; CHECK-NEXT:    vldrh.s32 q1, [r1, #8]
184; CHECK-NEXT:    vldrh.s32 q3, [r1]
185; CHECK-NEXT:    vmov.i32 q2, #0x28
186; CHECK-NEXT:    vmov.u16 r1, q0[0]
187; CHECK-NEXT:    vshl.i32 q1, q1, #1
188; CHECK-NEXT:    vshl.i32 q3, q3, #1
189; CHECK-NEXT:    vadd.i32 q1, q1, r0
190; CHECK-NEXT:    vadd.i32 q3, q3, r0
191; CHECK-NEXT:    vadd.i32 q1, q1, q2
192; CHECK-NEXT:    vadd.i32 q2, q3, q2
193; CHECK-NEXT:    vmov r0, s8
194; CHECK-NEXT:    strh r1, [r0]
195; CHECK-NEXT:    vmov r0, s9
196; CHECK-NEXT:    vmov.u16 r1, q0[1]
197; CHECK-NEXT:    strh r1, [r0]
198; CHECK-NEXT:    vmov r0, s10
199; CHECK-NEXT:    vmov.u16 r1, q0[2]
200; CHECK-NEXT:    strh r1, [r0]
201; CHECK-NEXT:    vmov r0, s11
202; CHECK-NEXT:    vmov.u16 r1, q0[3]
203; CHECK-NEXT:    strh r1, [r0]
204; CHECK-NEXT:    vmov r0, s4
205; CHECK-NEXT:    vmov.u16 r1, q0[4]
206; CHECK-NEXT:    strh r1, [r0]
207; CHECK-NEXT:    vmov r0, s5
208; CHECK-NEXT:    vmov.u16 r1, q0[5]
209; CHECK-NEXT:    strh r1, [r0]
210; CHECK-NEXT:    vmov r0, s6
211; CHECK-NEXT:    vmov.u16 r1, q0[6]
212; CHECK-NEXT:    strh r1, [r0]
213; CHECK-NEXT:    vmov r0, s7
214; CHECK-NEXT:    vmov.u16 r1, q0[7]
215; CHECK-NEXT:    strh r1, [r0]
216; CHECK-NEXT:    bx lr
217entry:
218  %offs = load <8 x i16>, <8 x i16>* %offptr, align 2
219  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i16> %offs
220  %ptrs2 = getelementptr inbounds i16, <8 x i16*> %ptrs, i16 20
221  call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %input, <8 x i16*> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
222  ret void
223}
224
225define arm_aapcs_vfpcc void @scaled_v8i16_i16_2gep2(i16* %base, <8 x i16>* %offptr, <8 x i16> %input) {
226; CHECK-LABEL: scaled_v8i16_i16_2gep2:
227; CHECK:       @ %bb.0: @ %entry
228; CHECK-NEXT:    adr r1, .LCPI9_0
229; CHECK-NEXT:    vldrw.u32 q1, [r1]
230; CHECK-NEXT:    vstrh.16 q0, [r0, q1, uxtw #1]
231; CHECK-NEXT:    bx lr
232; CHECK-NEXT:    .p2align 4
233; CHECK-NEXT:  @ %bb.1:
234; CHECK-NEXT:  .LCPI9_0:
235; CHECK-NEXT:    .short 20 @ 0x14
236; CHECK-NEXT:    .short 23 @ 0x17
237; CHECK-NEXT:    .short 26 @ 0x1a
238; CHECK-NEXT:    .short 29 @ 0x1d
239; CHECK-NEXT:    .short 32 @ 0x20
240; CHECK-NEXT:    .short 35 @ 0x23
241; CHECK-NEXT:    .short 38 @ 0x26
242; CHECK-NEXT:    .short 41 @ 0x29
243entry:
244  %ptrs = getelementptr inbounds i16, i16* %base, <8 x i16> <i16 0, i16 3, i16 6, i16 9, i16 12, i16 15, i16 18, i16 21>
245  %ptrs2 = getelementptr inbounds i16, <8 x i16*> %ptrs, i16 20
246  call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %input, <8 x i16*> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
247  ret void
248}
249
250declare void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16>, <8 x i16*>, i32, <8 x i1>)
251declare void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half>, <8 x half*>, i32, <8 x i1>)
252