• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
3
4declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
5declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
6
7declare <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32, <8 x half>, <8 x half>, <8 x half>)
8declare <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32, <4 x float>, <4 x float>, <4 x float>)
9
10declare <8 x half> @llvm.arm.mve.vcmlaq.predicated.v8f16.v8i1(i32, <8 x half>, <8 x half>, <8 x half>, <8 x i1>)
11declare <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32, <4 x float>, <4 x float>, <4 x float>, <4 x i1>)
12
13
14define arm_aapcs_vfpcc <8 x half> @test_vcmlaq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
15; CHECK-LABEL: test_vcmlaq_f16:
16; CHECK:       @ %bb.0: @ %entry
17; CHECK-NEXT:    vcmla.f16 q0, q1, q2, #0
18; CHECK-NEXT:    bx lr
19entry:
20  %0 = call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 0, <8 x half> %a, <8 x half> %b, <8 x half> %c)
21  ret <8 x half> %0
22}
23
24define arm_aapcs_vfpcc <4 x float> @test_vcmlaq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
25; CHECK-LABEL: test_vcmlaq_f32:
26; CHECK:       @ %bb.0: @ %entry
27; CHECK-NEXT:    vcmla.f32 q0, q1, q2, #0
28; CHECK-NEXT:    bx lr
29entry:
30  %0 = call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> %a, <4 x float> %b, <4 x float> %c)
31  ret <4 x float> %0
32}
33
34
35define arm_aapcs_vfpcc <8 x half> @test_vcmlaq_rot90_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
36; CHECK-LABEL: test_vcmlaq_rot90_f16:
37; CHECK:       @ %bb.0: @ %entry
38; CHECK-NEXT:    vcmla.f16 q0, q1, q2, #90
39; CHECK-NEXT:    bx lr
40entry:
41  %0 = call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 1, <8 x half> %a, <8 x half> %b, <8 x half> %c)
42  ret <8 x half> %0
43}
44
45define arm_aapcs_vfpcc <4 x float> @test_vcmlaq_rot90_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
46; CHECK-LABEL: test_vcmlaq_rot90_f32:
47; CHECK:       @ %bb.0: @ %entry
48; CHECK-NEXT:    vcmla.f32 q0, q1, q2, #90
49; CHECK-NEXT:    bx lr
50entry:
51  %0 = call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> %a, <4 x float> %b, <4 x float> %c)
52  ret <4 x float> %0
53}
54
55define arm_aapcs_vfpcc <8 x half> @test_vcmlaq_rot180_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
56; CHECK-LABEL: test_vcmlaq_rot180_f16:
57; CHECK:       @ %bb.0: @ %entry
58; CHECK-NEXT:    vcmla.f16 q0, q1, q2, #180
59; CHECK-NEXT:    bx lr
60entry:
61  %0 = call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 2, <8 x half> %a, <8 x half> %b, <8 x half> %c)
62  ret <8 x half> %0
63}
64
65define arm_aapcs_vfpcc <4 x float> @test_vcmlaq_rot180_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
66; CHECK-LABEL: test_vcmlaq_rot180_f32:
67; CHECK:       @ %bb.0: @ %entry
68; CHECK-NEXT:    vcmla.f32 q0, q1, q2, #180
69; CHECK-NEXT:    bx lr
70entry:
71  %0 = call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 2, <4 x float> %a, <4 x float> %b, <4 x float> %c)
72  ret <4 x float> %0
73}
74
75define arm_aapcs_vfpcc <8 x half> @test_vcmlaq_rot270_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
76; CHECK-LABEL: test_vcmlaq_rot270_f16:
77; CHECK:       @ %bb.0: @ %entry
78; CHECK-NEXT:    vcmla.f16 q0, q1, q2, #270
79; CHECK-NEXT:    bx lr
80entry:
81  %0 = call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 3, <8 x half> %a, <8 x half> %b, <8 x half> %c)
82  ret <8 x half> %0
83}
84
85define arm_aapcs_vfpcc <4 x float> @test_vcmlaq_rot270_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
86; CHECK-LABEL: test_vcmlaq_rot270_f32:
87; CHECK:       @ %bb.0: @ %entry
88; CHECK-NEXT:    vcmla.f32 q0, q1, q2, #270
89; CHECK-NEXT:    bx lr
90entry:
91  %0 = call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 3, <4 x float> %a, <4 x float> %b, <4 x float> %c)
92  ret <4 x float> %0
93}
94
95define arm_aapcs_vfpcc <8 x half> @test_vcmlaq_m_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i16 zeroext %p) {
96; CHECK-LABEL: test_vcmlaq_m_f16:
97; CHECK:       @ %bb.0: @ %entry
98; CHECK-NEXT:    vmsr p0, r0
99; CHECK-NEXT:    vpst
100; CHECK-NEXT:    vcmlat.f16 q0, q1, q2, #0
101; CHECK-NEXT:    bx lr
102entry:
103  %0 = zext i16 %p to i32
104  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
105  %2 = call <8 x half> @llvm.arm.mve.vcmlaq.predicated.v8f16.v8i1(i32 0, <8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %1)
106  ret <8 x half> %2
107}
108
109define arm_aapcs_vfpcc <4 x float> @test_vcmlaq_m_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, i16 zeroext %p) {
110; CHECK-LABEL: test_vcmlaq_m_f32:
111; CHECK:       @ %bb.0: @ %entry
112; CHECK-NEXT:    vmsr p0, r0
113; CHECK-NEXT:    vpst
114; CHECK-NEXT:    vcmlat.f32 q0, q1, q2, #0
115; CHECK-NEXT:    bx lr
116entry:
117  %0 = zext i16 %p to i32
118  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
119  %2 = call <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 0, <4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %1)
120  ret <4 x float> %2
121}
122
123define arm_aapcs_vfpcc <8 x half> @test_vcmlaq_rot90_m_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i16 zeroext %p) {
124; CHECK-LABEL: test_vcmlaq_rot90_m_f16:
125; CHECK:       @ %bb.0: @ %entry
126; CHECK-NEXT:    vmsr p0, r0
127; CHECK-NEXT:    vpst
128; CHECK-NEXT:    vcmlat.f16 q0, q1, q2, #90
129; CHECK-NEXT:    bx lr
130entry:
131  %0 = zext i16 %p to i32
132  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
133  %2 = call <8 x half> @llvm.arm.mve.vcmlaq.predicated.v8f16.v8i1(i32 1, <8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %1)
134  ret <8 x half> %2
135}
136
137define arm_aapcs_vfpcc <4 x float> @test_vcmlaq_rot90_m_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, i16 zeroext %p) {
138; CHECK-LABEL: test_vcmlaq_rot90_m_f32:
139; CHECK:       @ %bb.0: @ %entry
140; CHECK-NEXT:    vmsr p0, r0
141; CHECK-NEXT:    vpst
142; CHECK-NEXT:    vcmlat.f32 q0, q1, q2, #90
143; CHECK-NEXT:    bx lr
144entry:
145  %0 = zext i16 %p to i32
146  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
147  %2 = call <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 1, <4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %1)
148  ret <4 x float> %2
149}
150
151define arm_aapcs_vfpcc <8 x half> @test_vcmlaq_rot180_m_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i16 zeroext %p) {
152; CHECK-LABEL: test_vcmlaq_rot180_m_f16:
153; CHECK:       @ %bb.0: @ %entry
154; CHECK-NEXT:    vmsr p0, r0
155; CHECK-NEXT:    vpst
156; CHECK-NEXT:    vcmlat.f16 q0, q1, q2, #180
157; CHECK-NEXT:    bx lr
158entry:
159  %0 = zext i16 %p to i32
160  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
161  %2 = call <8 x half> @llvm.arm.mve.vcmlaq.predicated.v8f16.v8i1(i32 2, <8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %1)
162  ret <8 x half> %2
163}
164
165define arm_aapcs_vfpcc <4 x float> @test_vcmlaq_rot180_m_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, i16 zeroext %p) {
166; CHECK-LABEL: test_vcmlaq_rot180_m_f32:
167; CHECK:       @ %bb.0: @ %entry
168; CHECK-NEXT:    vmsr p0, r0
169; CHECK-NEXT:    vpst
170; CHECK-NEXT:    vcmlat.f32 q0, q1, q2, #180
171; CHECK-NEXT:    bx lr
172entry:
173  %0 = zext i16 %p to i32
174  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
175  %2 = call <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 2, <4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %1)
176  ret <4 x float> %2
177}
178
179define arm_aapcs_vfpcc <8 x half> @test_vcmlaq_rot270_m_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i16 zeroext %p) {
180; CHECK-LABEL: test_vcmlaq_rot270_m_f16:
181; CHECK:       @ %bb.0: @ %entry
182; CHECK-NEXT:    vmsr p0, r0
183; CHECK-NEXT:    vpst
184; CHECK-NEXT:    vcmlat.f16 q0, q1, q2, #270
185; CHECK-NEXT:    bx lr
186entry:
187  %0 = zext i16 %p to i32
188  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
189  %2 = call <8 x half> @llvm.arm.mve.vcmlaq.predicated.v8f16.v8i1(i32 3, <8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %1)
190  ret <8 x half> %2
191}
192
193define arm_aapcs_vfpcc <4 x float> @test_vcmlaq_rot270_m_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, i16 zeroext %p) {
194; CHECK-LABEL: test_vcmlaq_rot270_m_f32:
195; CHECK:       @ %bb.0: @ %entry
196; CHECK-NEXT:    vmsr p0, r0
197; CHECK-NEXT:    vpst
198; CHECK-NEXT:    vcmlat.f32 q0, q1, q2, #270
199; CHECK-NEXT:    bx lr
200entry:
201  %0 = zext i16 %p to i32
202  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
203  %2 = call <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 3, <4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %1)
204  ret <4 x float> %2
205}
206