• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
3
4declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
5declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
6
7declare <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32, <8 x half>, <8 x half>)
8declare <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32, <4 x float>, <4 x float>)
9
10declare <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32, <8 x half>, <8 x half>, <8 x half>, <8 x i1>)
11declare <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32, <4 x float>, <4 x float>, <4 x float>, <4 x i1>)
12
13define arm_aapcs_vfpcc <8 x half> @test_vcmulq_f16(<8 x half> %a, <8 x half> %b) {
14; CHECK-LABEL: test_vcmulq_f16:
15; CHECK:       @ %bb.0: @ %entry
16; CHECK-NEXT:    vcmul.f16 q0, q0, q1, #0
17; CHECK-NEXT:    bx lr
18entry:
19  %0 = call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 0, <8 x half> %a, <8 x half> %b)
20  ret <8 x half> %0
21}
22
23define arm_aapcs_vfpcc <4 x float> @test_vcmulq_f32(<4 x float> %a, <4 x float> %b) {
24; CHECK-LABEL: test_vcmulq_f32:
25; CHECK:       @ %bb.0: @ %entry
26; CHECK-NEXT:    vcmul.f32 q2, q0, q1, #0
27; CHECK-NEXT:    vmov q0, q2
28; CHECK-NEXT:    bx lr
29entry:
30  %0 = call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %a, <4 x float> %b)
31  ret <4 x float> %0
32}
33
34define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot90_f16(<8 x half> %a, <8 x half> %b) {
35; CHECK-LABEL: test_vcmulq_rot90_f16:
36; CHECK:       @ %bb.0: @ %entry
37; CHECK-NEXT:    vcmul.f16 q0, q0, q1, #90
38; CHECK-NEXT:    bx lr
39entry:
40  %0 = call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 1, <8 x half> %a, <8 x half> %b)
41  ret <8 x half> %0
42}
43
44define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot90_f32(<4 x float> %a, <4 x float> %b) {
45; CHECK-LABEL: test_vcmulq_rot90_f32:
46; CHECK:       @ %bb.0: @ %entry
47; CHECK-NEXT:    vcmul.f32 q2, q0, q1, #90
48; CHECK-NEXT:    vmov q0, q2
49; CHECK-NEXT:    bx lr
50entry:
51  %0 = call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 1, <4 x float> %a, <4 x float> %b)
52  ret <4 x float> %0
53}
54
55define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot180_f16(<8 x half> %a, <8 x half> %b) {
56; CHECK-LABEL: test_vcmulq_rot180_f16:
57; CHECK:       @ %bb.0: @ %entry
58; CHECK-NEXT:    vcmul.f16 q0, q0, q1, #180
59; CHECK-NEXT:    bx lr
60entry:
61  %0 = call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 2, <8 x half> %a, <8 x half> %b)
62  ret <8 x half> %0
63}
64
65define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot180_f32(<4 x float> %a, <4 x float> %b) {
66; CHECK-LABEL: test_vcmulq_rot180_f32:
67; CHECK:       @ %bb.0: @ %entry
68; CHECK-NEXT:    vcmul.f32 q2, q0, q1, #180
69; CHECK-NEXT:    vmov q0, q2
70; CHECK-NEXT:    bx lr
71entry:
72  %0 = call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 2, <4 x float> %a, <4 x float> %b)
73  ret <4 x float> %0
74}
75
76define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot270_f16(<8 x half> %a, <8 x half> %b) {
77; CHECK-LABEL: test_vcmulq_rot270_f16:
78; CHECK:       @ %bb.0: @ %entry
79; CHECK-NEXT:    vcmul.f16 q0, q0, q1, #270
80; CHECK-NEXT:    bx lr
81entry:
82  %0 = call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 3, <8 x half> %a, <8 x half> %b)
83  ret <8 x half> %0
84}
85
86define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot270_f32(<4 x float> %a, <4 x float> %b) {
87; CHECK-LABEL: test_vcmulq_rot270_f32:
88; CHECK:       @ %bb.0: @ %entry
89; CHECK-NEXT:    vcmul.f32 q2, q0, q1, #270
90; CHECK-NEXT:    vmov q0, q2
91; CHECK-NEXT:    bx lr
92entry:
93  %0 = call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 3, <4 x float> %a, <4 x float> %b)
94  ret <4 x float> %0
95}
96
97define arm_aapcs_vfpcc <8 x half> @test_vcmulq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) {
98; CHECK-LABEL: test_vcmulq_m_f16:
99; CHECK:       @ %bb.0: @ %entry
100; CHECK-NEXT:    vmsr p0, r0
101; CHECK-NEXT:    vpst
102; CHECK-NEXT:    vcmult.f16 q0, q1, q2, #0
103; CHECK-NEXT:    bx lr
104entry:
105  %0 = zext i16 %p to i32
106  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
107  %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 0, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
108  ret <8 x half> %2
109}
110
111define arm_aapcs_vfpcc <4 x float> @test_vcmulq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) {
112; CHECK-LABEL: test_vcmulq_m_f32:
113; CHECK:       @ %bb.0: @ %entry
114; CHECK-NEXT:    vmsr p0, r0
115; CHECK-NEXT:    vpst
116; CHECK-NEXT:    vcmult.f32 q0, q1, q2, #0
117; CHECK-NEXT:    bx lr
118entry:
119  %0 = zext i16 %p to i32
120  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
121  %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 0, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
122  ret <4 x float> %2
123}
124
125define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot90_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) {
126; CHECK-LABEL: test_vcmulq_rot90_m_f16:
127; CHECK:       @ %bb.0: @ %entry
128; CHECK-NEXT:    vmsr p0, r0
129; CHECK-NEXT:    vpst
130; CHECK-NEXT:    vcmult.f16 q0, q1, q2, #90
131; CHECK-NEXT:    bx lr
132entry:
133  %0 = zext i16 %p to i32
134  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
135  %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 1, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
136  ret <8 x half> %2
137}
138
139define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot90_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) {
140; CHECK-LABEL: test_vcmulq_rot90_m_f32:
141; CHECK:       @ %bb.0: @ %entry
142; CHECK-NEXT:    vmsr p0, r0
143; CHECK-NEXT:    vpst
144; CHECK-NEXT:    vcmult.f32 q0, q1, q2, #90
145; CHECK-NEXT:    bx lr
146entry:
147  %0 = zext i16 %p to i32
148  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
149  %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 1, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
150  ret <4 x float> %2
151}
152
153define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot180_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) {
154; CHECK-LABEL: test_vcmulq_rot180_m_f16:
155; CHECK:       @ %bb.0: @ %entry
156; CHECK-NEXT:    vmsr p0, r0
157; CHECK-NEXT:    vpst
158; CHECK-NEXT:    vcmult.f16 q0, q1, q2, #180
159; CHECK-NEXT:    bx lr
160entry:
161  %0 = zext i16 %p to i32
162  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
163  %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 2, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
164  ret <8 x half> %2
165}
166
167define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot180_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) {
168; CHECK-LABEL: test_vcmulq_rot180_m_f32:
169; CHECK:       @ %bb.0: @ %entry
170; CHECK-NEXT:    vmsr p0, r0
171; CHECK-NEXT:    vpst
172; CHECK-NEXT:    vcmult.f32 q0, q1, q2, #180
173; CHECK-NEXT:    bx lr
174entry:
175  %0 = zext i16 %p to i32
176  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
177  %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 2, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
178  ret <4 x float> %2
179}
180
181define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot270_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) {
182; CHECK-LABEL: test_vcmulq_rot270_m_f16:
183; CHECK:       @ %bb.0: @ %entry
184; CHECK-NEXT:    vmsr p0, r0
185; CHECK-NEXT:    vpst
186; CHECK-NEXT:    vcmult.f16 q0, q1, q2, #270
187; CHECK-NEXT:    bx lr
188entry:
189  %0 = zext i16 %p to i32
190  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
191  %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 3, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
192  ret <8 x half> %2
193}
194
195define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot270_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) {
196; CHECK-LABEL: test_vcmulq_rot270_m_f32:
197; CHECK:       @ %bb.0: @ %entry
198; CHECK-NEXT:    vmsr p0, r0
199; CHECK-NEXT:    vpst
200; CHECK-NEXT:    vcmult.f32 q0, q1, q2, #270
201; CHECK-NEXT:    bx lr
202entry:
203  %0 = zext i16 %p to i32
204  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
205  %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 3, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
206  ret <4 x float> %2
207}
208
209define arm_aapcs_vfpcc <8 x half> @test_vcmulq_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) {
210; CHECK-LABEL: test_vcmulq_x_f16:
211; CHECK:       @ %bb.0: @ %entry
212; CHECK-NEXT:    vmsr p0, r0
213; CHECK-NEXT:    vpst
214; CHECK-NEXT:    vcmult.f16 q0, q0, q1, #0
215; CHECK-NEXT:    bx lr
216entry:
217  %0 = zext i16 %p to i32
218  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
219  %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 0, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
220  ret <8 x half> %2
221}
222
223define arm_aapcs_vfpcc <4 x float> @test_vcmulq_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) {
224; CHECK-LABEL: test_vcmulq_x_f32:
225; CHECK:       @ %bb.0: @ %entry
226; CHECK-NEXT:    vmsr p0, r0
227; CHECK-NEXT:    vpst
228; CHECK-NEXT:    vcmult.f32 q2, q0, q1, #0
229; CHECK-NEXT:    vmov q0, q2
230; CHECK-NEXT:    bx lr
231entry:
232  %0 = zext i16 %p to i32
233  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
234  %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 0, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
235  ret <4 x float> %2
236}
237
238define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot90_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) {
239; CHECK-LABEL: test_vcmulq_rot90_x_f16:
240; CHECK:       @ %bb.0: @ %entry
241; CHECK-NEXT:    vmsr p0, r0
242; CHECK-NEXT:    vpst
243; CHECK-NEXT:    vcmult.f16 q0, q0, q1, #90
244; CHECK-NEXT:    bx lr
245entry:
246  %0 = zext i16 %p to i32
247  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
248  %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 1, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
249  ret <8 x half> %2
250}
251
252define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot90_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) {
253; CHECK-LABEL: test_vcmulq_rot90_x_f32:
254; CHECK:       @ %bb.0: @ %entry
255; CHECK-NEXT:    vmsr p0, r0
256; CHECK-NEXT:    vpst
257; CHECK-NEXT:    vcmult.f32 q2, q0, q1, #90
258; CHECK-NEXT:    vmov q0, q2
259; CHECK-NEXT:    bx lr
260entry:
261  %0 = zext i16 %p to i32
262  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
263  %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 1, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
264  ret <4 x float> %2
265}
266
267define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot180_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) {
268; CHECK-LABEL: test_vcmulq_rot180_x_f16:
269; CHECK:       @ %bb.0: @ %entry
270; CHECK-NEXT:    vmsr p0, r0
271; CHECK-NEXT:    vpst
272; CHECK-NEXT:    vcmult.f16 q0, q0, q1, #180
273; CHECK-NEXT:    bx lr
274entry:
275  %0 = zext i16 %p to i32
276  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
277  %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 2, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
278  ret <8 x half> %2
279}
280
281define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot180_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) {
282; CHECK-LABEL: test_vcmulq_rot180_x_f32:
283; CHECK:       @ %bb.0: @ %entry
284; CHECK-NEXT:    vmsr p0, r0
285; CHECK-NEXT:    vpst
286; CHECK-NEXT:    vcmult.f32 q2, q0, q1, #180
287; CHECK-NEXT:    vmov q0, q2
288; CHECK-NEXT:    bx lr
289entry:
290  %0 = zext i16 %p to i32
291  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
292  %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 2, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
293  ret <4 x float> %2
294}
295
296define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot270_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) {
297; CHECK-LABEL: test_vcmulq_rot270_x_f16:
298; CHECK:       @ %bb.0: @ %entry
299; CHECK-NEXT:    vmsr p0, r0
300; CHECK-NEXT:    vpst
301; CHECK-NEXT:    vcmult.f16 q0, q0, q1, #270
302; CHECK-NEXT:    bx lr
303entry:
304  %0 = zext i16 %p to i32
305  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
306  %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 3, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
307  ret <8 x half> %2
308}
309
310define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot270_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) {
311; CHECK-LABEL: test_vcmulq_rot270_x_f32:
312; CHECK:       @ %bb.0: @ %entry
313; CHECK-NEXT:    vmsr p0, r0
314; CHECK-NEXT:    vpst
315; CHECK-NEXT:    vcmult.f32 q2, q0, q1, #270
316; CHECK-NEXT:    vmov q0, q2
317; CHECK-NEXT:    bx lr
318entry:
319  %0 = zext i16 %p to i32
320  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
321  %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 3, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
322  ret <4 x float> %2
323}
324