• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
3
4declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
5declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
6declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
7
8declare <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32, i32, <16 x i8>, <16 x i8>)
9declare <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32, i32, <4 x i32>, <4 x i32>)
10declare <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32, i32, <8 x i16>, <8 x i16>)
11declare <8 x half> @llvm.arm.mve.vcaddq.v8f16(i32, i32, <8 x half>, <8 x half>)
12declare <4 x float> @llvm.arm.mve.vcaddq.v4f32(i32, i32, <4 x float>, <4 x float>)
13
14declare <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32, i32, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i1>)
15declare <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32, i32, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i1>)
16declare <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i1>)
17declare <8 x half> @llvm.arm.mve.vcaddq.predicated.v8f16.v8i1(i32, i32, <8 x half>, <8 x half>, <8 x half>, <8 x i1>)
18declare <4 x float> @llvm.arm.mve.vcaddq.predicated.v4f32.v4i1(i32, i32, <4 x float>, <4 x float>, <4 x float>, <4 x i1>)
19
20define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot90_u8(<16 x i8> %a, <16 x i8> %b) {
21; CHECK-LABEL: test_vcaddq_rot90_u8:
22; CHECK:       @ %bb.0: @ %entry
23; CHECK-NEXT:    vcadd.i8 q0, q0, q1, #90
24; CHECK-NEXT:    bx lr
25entry:
26  %0 = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 1, i32 0, <16 x i8> %a, <16 x i8> %b)
27  ret <16 x i8> %0
28}
29
30define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot90_u16(<8 x i16> %a, <8 x i16> %b) {
31; CHECK-LABEL: test_vcaddq_rot90_u16:
32; CHECK:       @ %bb.0: @ %entry
33; CHECK-NEXT:    vcadd.i16 q0, q0, q1, #90
34; CHECK-NEXT:    bx lr
35entry:
36  %0 = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 1, i32 0, <8 x i16> %a, <8 x i16> %b)
37  ret <8 x i16> %0
38}
39
40define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot90_u32(<4 x i32> %a, <4 x i32> %b) {
41; CHECK-LABEL: test_vcaddq_rot90_u32:
42; CHECK:       @ %bb.0: @ %entry
43; CHECK-NEXT:    vcadd.i32 q2, q0, q1, #90
44; CHECK-NEXT:    vmov q0, q2
45; CHECK-NEXT:    bx lr
46entry:
47  %0 = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 1, i32 0, <4 x i32> %a, <4 x i32> %b)
48  ret <4 x i32> %0
49}
50
51define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot90_s8(<16 x i8> %a, <16 x i8> %b) {
52; CHECK-LABEL: test_vcaddq_rot90_s8:
53; CHECK:       @ %bb.0: @ %entry
54; CHECK-NEXT:    vcadd.i8 q0, q0, q1, #90
55; CHECK-NEXT:    bx lr
56entry:
57  %0 = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 1, i32 0, <16 x i8> %a, <16 x i8> %b)
58  ret <16 x i8> %0
59}
60
61define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot90_s16(<8 x i16> %a, <8 x i16> %b) {
62; CHECK-LABEL: test_vcaddq_rot90_s16:
63; CHECK:       @ %bb.0: @ %entry
64; CHECK-NEXT:    vcadd.i16 q0, q0, q1, #90
65; CHECK-NEXT:    bx lr
66entry:
67  %0 = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 1, i32 0, <8 x i16> %a, <8 x i16> %b)
68  ret <8 x i16> %0
69}
70
71define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot90_s32(<4 x i32> %a, <4 x i32> %b) {
72; CHECK-LABEL: test_vcaddq_rot90_s32:
73; CHECK:       @ %bb.0: @ %entry
74; CHECK-NEXT:    vcadd.i32 q2, q0, q1, #90
75; CHECK-NEXT:    vmov q0, q2
76; CHECK-NEXT:    bx lr
77entry:
78  %0 = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 1, i32 0, <4 x i32> %a, <4 x i32> %b)
79  ret <4 x i32> %0
80}
81
82define arm_aapcs_vfpcc <8 x half> @test_vcaddq_rot90_f16(<8 x half> %a, <8 x half> %b) {
83; CHECK-LABEL: test_vcaddq_rot90_f16:
84; CHECK:       @ %bb.0: @ %entry
85; CHECK-NEXT:    vcadd.f16 q0, q0, q1, #90
86; CHECK-NEXT:    bx lr
87entry:
88  %0 = call <8 x half> @llvm.arm.mve.vcaddq.v8f16(i32 1, i32 0, <8 x half> %a, <8 x half> %b)
89  ret <8 x half> %0
90}
91
92define arm_aapcs_vfpcc <4 x float> @test_vcaddq_rot90_f32(<4 x float> %a, <4 x float> %b) {
93; CHECK-LABEL: test_vcaddq_rot90_f32:
94; CHECK:       @ %bb.0: @ %entry
95; CHECK-NEXT:    vcadd.f32 q2, q0, q1, #90
96; CHECK-NEXT:    vmov q0, q2
97; CHECK-NEXT:    bx lr
98entry:
99  %0 = call <4 x float> @llvm.arm.mve.vcaddq.v4f32(i32 1, i32 0, <4 x float> %a, <4 x float> %b)
100  ret <4 x float> %0
101}
102
103define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot270_u8(<16 x i8> %a, <16 x i8> %b) {
104; CHECK-LABEL: test_vcaddq_rot270_u8:
105; CHECK:       @ %bb.0: @ %entry
106; CHECK-NEXT:    vcadd.i8 q0, q0, q1, #270
107; CHECK-NEXT:    bx lr
108entry:
109  %0 = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 1, i32 1, <16 x i8> %a, <16 x i8> %b)
110  ret <16 x i8> %0
111}
112
113define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot270_u16(<8 x i16> %a, <8 x i16> %b) {
114; CHECK-LABEL: test_vcaddq_rot270_u16:
115; CHECK:       @ %bb.0: @ %entry
116; CHECK-NEXT:    vcadd.i16 q0, q0, q1, #270
117; CHECK-NEXT:    bx lr
118entry:
119  %0 = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 1, i32 1, <8 x i16> %a, <8 x i16> %b)
120  ret <8 x i16> %0
121}
122
123define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot270_u32(<4 x i32> %a, <4 x i32> %b) {
124; CHECK-LABEL: test_vcaddq_rot270_u32:
125; CHECK:       @ %bb.0: @ %entry
126; CHECK-NEXT:    vcadd.i32 q2, q0, q1, #270
127; CHECK-NEXT:    vmov q0, q2
128; CHECK-NEXT:    bx lr
129entry:
130  %0 = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 1, i32 1, <4 x i32> %a, <4 x i32> %b)
131  ret <4 x i32> %0
132}
133
134define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot270_s8(<16 x i8> %a, <16 x i8> %b) {
135; CHECK-LABEL: test_vcaddq_rot270_s8:
136; CHECK:       @ %bb.0: @ %entry
137; CHECK-NEXT:    vcadd.i8 q0, q0, q1, #270
138; CHECK-NEXT:    bx lr
139entry:
140  %0 = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 1, i32 1, <16 x i8> %a, <16 x i8> %b)
141  ret <16 x i8> %0
142}
143
144define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot270_s16(<8 x i16> %a, <8 x i16> %b) {
145; CHECK-LABEL: test_vcaddq_rot270_s16:
146; CHECK:       @ %bb.0: @ %entry
147; CHECK-NEXT:    vcadd.i16 q0, q0, q1, #270
148; CHECK-NEXT:    bx lr
149entry:
150  %0 = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 1, i32 1, <8 x i16> %a, <8 x i16> %b)
151  ret <8 x i16> %0
152}
153
154define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot270_s32(<4 x i32> %a, <4 x i32> %b) {
155; CHECK-LABEL: test_vcaddq_rot270_s32:
156; CHECK:       @ %bb.0: @ %entry
157; CHECK-NEXT:    vcadd.i32 q2, q0, q1, #270
158; CHECK-NEXT:    vmov q0, q2
159; CHECK-NEXT:    bx lr
160entry:
161  %0 = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 1, i32 1, <4 x i32> %a, <4 x i32> %b)
162  ret <4 x i32> %0
163}
164
165define arm_aapcs_vfpcc <8 x half> @test_vcaddq_rot270_f16(<8 x half> %a, <8 x half> %b) {
166; CHECK-LABEL: test_vcaddq_rot270_f16:
167; CHECK:       @ %bb.0: @ %entry
168; CHECK-NEXT:    vcadd.f16 q0, q0, q1, #270
169; CHECK-NEXT:    bx lr
170entry:
171  %0 = call <8 x half> @llvm.arm.mve.vcaddq.v8f16(i32 1, i32 1, <8 x half> %a, <8 x half> %b)
172  ret <8 x half> %0
173}
174
175define arm_aapcs_vfpcc <4 x float> @test_vcaddq_rot270_f32(<4 x float> %a, <4 x float> %b) {
176; CHECK-LABEL: test_vcaddq_rot270_f32:
177; CHECK:       @ %bb.0: @ %entry
178; CHECK-NEXT:    vcadd.f32 q2, q0, q1, #270
179; CHECK-NEXT:    vmov q0, q2
180; CHECK-NEXT:    bx lr
181entry:
182  %0 = call <4 x float> @llvm.arm.mve.vcaddq.v4f32(i32 1, i32 1, <4 x float> %a, <4 x float> %b)
183  ret <4 x float> %0
184}
185
186define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot90_m_u8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
187; CHECK-LABEL: test_vcaddq_rot90_m_u8:
188; CHECK:       @ %bb.0: @ %entry
189; CHECK-NEXT:    vmsr p0, r0
190; CHECK-NEXT:    vpst
191; CHECK-NEXT:    vcaddt.i8 q0, q1, q2, #90
192; CHECK-NEXT:    bx lr
193entry:
194  %0 = zext i16 %p to i32
195  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
196  %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 0, <16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
197  ret <16 x i8> %2
198}
199
200define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot90_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
201; CHECK-LABEL: test_vcaddq_rot90_m_u16:
202; CHECK:       @ %bb.0: @ %entry
203; CHECK-NEXT:    vmsr p0, r0
204; CHECK-NEXT:    vpst
205; CHECK-NEXT:    vcaddt.i16 q0, q1, q2, #90
206; CHECK-NEXT:    bx lr
207entry:
208  %0 = zext i16 %p to i32
209  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
210  %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 0, <8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
211  ret <8 x i16> %2
212}
213
214define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot90_m_u32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
215; CHECK-LABEL: test_vcaddq_rot90_m_u32:
216; CHECK:       @ %bb.0: @ %entry
217; CHECK-NEXT:    vmsr p0, r0
218; CHECK-NEXT:    vpst
219; CHECK-NEXT:    vcaddt.i32 q0, q1, q2, #90
220; CHECK-NEXT:    bx lr
221entry:
222  %0 = zext i16 %p to i32
223  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
224  %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 0, <4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
225  ret <4 x i32> %2
226}
227
228define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot90_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
229; CHECK-LABEL: test_vcaddq_rot90_m_s8:
230; CHECK:       @ %bb.0: @ %entry
231; CHECK-NEXT:    vmsr p0, r0
232; CHECK-NEXT:    vpst
233; CHECK-NEXT:    vcaddt.i8 q0, q1, q2, #90
234; CHECK-NEXT:    bx lr
235entry:
236  %0 = zext i16 %p to i32
237  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
238  %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 0, <16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
239  ret <16 x i8> %2
240}
241
242define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot90_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
243; CHECK-LABEL: test_vcaddq_rot90_m_s16:
244; CHECK:       @ %bb.0: @ %entry
245; CHECK-NEXT:    vmsr p0, r0
246; CHECK-NEXT:    vpst
247; CHECK-NEXT:    vcaddt.i16 q0, q1, q2, #90
248; CHECK-NEXT:    bx lr
249entry:
250  %0 = zext i16 %p to i32
251  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
252  %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 0, <8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
253  ret <8 x i16> %2
254}
255
256define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot90_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
257; CHECK-LABEL: test_vcaddq_rot90_m_s32:
258; CHECK:       @ %bb.0: @ %entry
259; CHECK-NEXT:    vmsr p0, r0
260; CHECK-NEXT:    vpst
261; CHECK-NEXT:    vcaddt.i32 q0, q1, q2, #90
262; CHECK-NEXT:    bx lr
263entry:
264  %0 = zext i16 %p to i32
265  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
266  %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 0, <4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
267  ret <4 x i32> %2
268}
269
270define arm_aapcs_vfpcc <8 x half> @test_vcaddq_rot90_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) {
271; CHECK-LABEL: test_vcaddq_rot90_m_f16:
272; CHECK:       @ %bb.0: @ %entry
273; CHECK-NEXT:    vmsr p0, r0
274; CHECK-NEXT:    vpst
275; CHECK-NEXT:    vcaddt.f16 q0, q1, q2, #90
276; CHECK-NEXT:    bx lr
277entry:
278  %0 = zext i16 %p to i32
279  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
280  %2 = call <8 x half> @llvm.arm.mve.vcaddq.predicated.v8f16.v8i1(i32 1, i32 0, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
281  ret <8 x half> %2
282}
283
284define arm_aapcs_vfpcc <4 x float> @test_vcaddq_rot90_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) {
285; CHECK-LABEL: test_vcaddq_rot90_m_f32:
286; CHECK:       @ %bb.0: @ %entry
287; CHECK-NEXT:    vmsr p0, r0
288; CHECK-NEXT:    vpst
289; CHECK-NEXT:    vcaddt.f32 q0, q1, q2, #90
290; CHECK-NEXT:    bx lr
291entry:
292  %0 = zext i16 %p to i32
293  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
294  %2 = call <4 x float> @llvm.arm.mve.vcaddq.predicated.v4f32.v4i1(i32 1, i32 0, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
295  ret <4 x float> %2
296}
297
298define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot270_m_u8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
299; CHECK-LABEL: test_vcaddq_rot270_m_u8:
300; CHECK:       @ %bb.0: @ %entry
301; CHECK-NEXT:    vmsr p0, r0
302; CHECK-NEXT:    vpst
303; CHECK-NEXT:    vcaddt.i8 q0, q1, q2, #270
304; CHECK-NEXT:    bx lr
305entry:
306  %0 = zext i16 %p to i32
307  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
308  %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 1, <16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
309  ret <16 x i8> %2
310}
311
312define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot270_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
313; CHECK-LABEL: test_vcaddq_rot270_m_u16:
314; CHECK:       @ %bb.0: @ %entry
315; CHECK-NEXT:    vmsr p0, r0
316; CHECK-NEXT:    vpst
317; CHECK-NEXT:    vcaddt.i16 q0, q1, q2, #270
318; CHECK-NEXT:    bx lr
319entry:
320  %0 = zext i16 %p to i32
321  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
322  %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 1, <8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
323  ret <8 x i16> %2
324}
325
326define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot270_m_u32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
327; CHECK-LABEL: test_vcaddq_rot270_m_u32:
328; CHECK:       @ %bb.0: @ %entry
329; CHECK-NEXT:    vmsr p0, r0
330; CHECK-NEXT:    vpst
331; CHECK-NEXT:    vcaddt.i32 q0, q1, q2, #270
332; CHECK-NEXT:    bx lr
333entry:
334  %0 = zext i16 %p to i32
335  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
336  %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 1, <4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
337  ret <4 x i32> %2
338}
339
340define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot270_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
341; CHECK-LABEL: test_vcaddq_rot270_m_s8:
342; CHECK:       @ %bb.0: @ %entry
343; CHECK-NEXT:    vmsr p0, r0
344; CHECK-NEXT:    vpst
345; CHECK-NEXT:    vcaddt.i8 q0, q1, q2, #270
346; CHECK-NEXT:    bx lr
347entry:
348  %0 = zext i16 %p to i32
349  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
350  %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 1, <16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
351  ret <16 x i8> %2
352}
353
354define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot270_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
355; CHECK-LABEL: test_vcaddq_rot270_m_s16:
356; CHECK:       @ %bb.0: @ %entry
357; CHECK-NEXT:    vmsr p0, r0
358; CHECK-NEXT:    vpst
359; CHECK-NEXT:    vcaddt.i16 q0, q1, q2, #270
360; CHECK-NEXT:    bx lr
361entry:
362  %0 = zext i16 %p to i32
363  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
364  %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 1, <8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
365  ret <8 x i16> %2
366}
367
368define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot270_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
369; CHECK-LABEL: test_vcaddq_rot270_m_s32:
370; CHECK:       @ %bb.0: @ %entry
371; CHECK-NEXT:    vmsr p0, r0
372; CHECK-NEXT:    vpst
373; CHECK-NEXT:    vcaddt.i32 q0, q1, q2, #270
374; CHECK-NEXT:    bx lr
375entry:
376  %0 = zext i16 %p to i32
377  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
378  %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 1, <4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
379  ret <4 x i32> %2
380}
381
382define arm_aapcs_vfpcc <8 x half> @test_vcaddq_rot270_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) {
383; CHECK-LABEL: test_vcaddq_rot270_m_f16:
384; CHECK:       @ %bb.0: @ %entry
385; CHECK-NEXT:    vmsr p0, r0
386; CHECK-NEXT:    vpst
387; CHECK-NEXT:    vcaddt.f16 q0, q1, q2, #270
388; CHECK-NEXT:    bx lr
389entry:
390  %0 = zext i16 %p to i32
391  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
392  %2 = call <8 x half> @llvm.arm.mve.vcaddq.predicated.v8f16.v8i1(i32 1, i32 1, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
393  ret <8 x half> %2
394}
395
396define arm_aapcs_vfpcc <4 x float> @test_vcaddq_rot270_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) {
397; CHECK-LABEL: test_vcaddq_rot270_m_f32:
398; CHECK:       @ %bb.0: @ %entry
399; CHECK-NEXT:    vmsr p0, r0
400; CHECK-NEXT:    vpst
401; CHECK-NEXT:    vcaddt.f32 q0, q1, q2, #270
402; CHECK-NEXT:    bx lr
403entry:
404  %0 = zext i16 %p to i32
405  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
406  %2 = call <4 x float> @llvm.arm.mve.vcaddq.predicated.v4f32.v4i1(i32 1, i32 1, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
407  ret <4 x float> %2
408}
409
410define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot90_x_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
411; CHECK-LABEL: test_vcaddq_rot90_x_u8:
412; CHECK:       @ %bb.0: @ %entry
413; CHECK-NEXT:    vmsr p0, r0
414; CHECK-NEXT:    vpst
415; CHECK-NEXT:    vcaddt.i8 q0, q0, q1, #90
416; CHECK-NEXT:    bx lr
417entry:
418  %0 = zext i16 %p to i32
419  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
420  %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 0, <16 x i8> undef, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
421  ret <16 x i8> %2
422}
423
424define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot90_x_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
425; CHECK-LABEL: test_vcaddq_rot90_x_u16:
426; CHECK:       @ %bb.0: @ %entry
427; CHECK-NEXT:    vmsr p0, r0
428; CHECK-NEXT:    vpst
429; CHECK-NEXT:    vcaddt.i16 q0, q0, q1, #90
430; CHECK-NEXT:    bx lr
431entry:
432  %0 = zext i16 %p to i32
433  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
434  %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 0, <8 x i16> undef, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
435  ret <8 x i16> %2
436}
437
438define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot90_x_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
439; CHECK-LABEL: test_vcaddq_rot90_x_u32:
440; CHECK:       @ %bb.0: @ %entry
441; CHECK-NEXT:    vmsr p0, r0
442; CHECK-NEXT:    vpst
443; CHECK-NEXT:    vcaddt.i32 q2, q0, q1, #90
444; CHECK-NEXT:    vmov q0, q2
445; CHECK-NEXT:    bx lr
446entry:
447  %0 = zext i16 %p to i32
448  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
449  %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 0, <4 x i32> undef, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
450  ret <4 x i32> %2
451}
452
453define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot90_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
454; CHECK-LABEL: test_vcaddq_rot90_x_s8:
455; CHECK:       @ %bb.0: @ %entry
456; CHECK-NEXT:    vmsr p0, r0
457; CHECK-NEXT:    vpst
458; CHECK-NEXT:    vcaddt.i8 q0, q0, q1, #90
459; CHECK-NEXT:    bx lr
460entry:
461  %0 = zext i16 %p to i32
462  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
463  %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 0, <16 x i8> undef, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
464  ret <16 x i8> %2
465}
466
467define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot90_x_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
468; CHECK-LABEL: test_vcaddq_rot90_x_s16:
469; CHECK:       @ %bb.0: @ %entry
470; CHECK-NEXT:    vmsr p0, r0
471; CHECK-NEXT:    vpst
472; CHECK-NEXT:    vcaddt.i16 q0, q0, q1, #90
473; CHECK-NEXT:    bx lr
474entry:
475  %0 = zext i16 %p to i32
476  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
477  %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 0, <8 x i16> undef, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
478  ret <8 x i16> %2
479}
480
481define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot90_x_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
482; CHECK-LABEL: test_vcaddq_rot90_x_s32:
483; CHECK:       @ %bb.0: @ %entry
484; CHECK-NEXT:    vmsr p0, r0
485; CHECK-NEXT:    vpst
486; CHECK-NEXT:    vcaddt.i32 q2, q0, q1, #90
487; CHECK-NEXT:    vmov q0, q2
488; CHECK-NEXT:    bx lr
489entry:
490  %0 = zext i16 %p to i32
491  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
492  %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 0, <4 x i32> undef, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
493  ret <4 x i32> %2
494}
495
496define arm_aapcs_vfpcc <8 x half> @test_vcaddq_rot90_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) {
497; CHECK-LABEL: test_vcaddq_rot90_x_f16:
498; CHECK:       @ %bb.0: @ %entry
499; CHECK-NEXT:    vmsr p0, r0
500; CHECK-NEXT:    vpst
501; CHECK-NEXT:    vcaddt.f16 q0, q0, q1, #90
502; CHECK-NEXT:    bx lr
503entry:
504  %0 = zext i16 %p to i32
505  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
506  %2 = call <8 x half> @llvm.arm.mve.vcaddq.predicated.v8f16.v8i1(i32 1, i32 0, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
507  ret <8 x half> %2
508}
509
510define arm_aapcs_vfpcc <4 x float> @test_vcaddq_rot90_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) {
511; CHECK-LABEL: test_vcaddq_rot90_x_f32:
512; CHECK:       @ %bb.0: @ %entry
513; CHECK-NEXT:    vmsr p0, r0
514; CHECK-NEXT:    vpst
515; CHECK-NEXT:    vcaddt.f32 q2, q0, q1, #90
516; CHECK-NEXT:    vmov q0, q2
517; CHECK-NEXT:    bx lr
518entry:
519  %0 = zext i16 %p to i32
520  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
521  %2 = call <4 x float> @llvm.arm.mve.vcaddq.predicated.v4f32.v4i1(i32 1, i32 0, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
522  ret <4 x float> %2
523}
524
525define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot270_x_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
526; CHECK-LABEL: test_vcaddq_rot270_x_u8:
527; CHECK:       @ %bb.0: @ %entry
528; CHECK-NEXT:    vmsr p0, r0
529; CHECK-NEXT:    vpst
530; CHECK-NEXT:    vcaddt.i8 q0, q0, q1, #270
531; CHECK-NEXT:    bx lr
532entry:
533  %0 = zext i16 %p to i32
534  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
535  %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 1, <16 x i8> undef, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
536  ret <16 x i8> %2
537}
538
539define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot270_x_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
540; CHECK-LABEL: test_vcaddq_rot270_x_u16:
541; CHECK:       @ %bb.0: @ %entry
542; CHECK-NEXT:    vmsr p0, r0
543; CHECK-NEXT:    vpst
544; CHECK-NEXT:    vcaddt.i16 q0, q0, q1, #270
545; CHECK-NEXT:    bx lr
546entry:
547  %0 = zext i16 %p to i32
548  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
549  %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 1, <8 x i16> undef, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
550  ret <8 x i16> %2
551}
552
553define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot270_x_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
554; CHECK-LABEL: test_vcaddq_rot270_x_u32:
555; CHECK:       @ %bb.0: @ %entry
556; CHECK-NEXT:    vmsr p0, r0
557; CHECK-NEXT:    vpst
558; CHECK-NEXT:    vcaddt.i32 q2, q0, q1, #270
559; CHECK-NEXT:    vmov q0, q2
560; CHECK-NEXT:    bx lr
561entry:
562  %0 = zext i16 %p to i32
563  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
564  %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 1, <4 x i32> undef, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
565  ret <4 x i32> %2
566}
567
568define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot270_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
569; CHECK-LABEL: test_vcaddq_rot270_x_s8:
570; CHECK:       @ %bb.0: @ %entry
571; CHECK-NEXT:    vmsr p0, r0
572; CHECK-NEXT:    vpst
573; CHECK-NEXT:    vcaddt.i8 q0, q0, q1, #270
574; CHECK-NEXT:    bx lr
575entry:
576  %0 = zext i16 %p to i32
577  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
578  %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 1, <16 x i8> undef, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
579  ret <16 x i8> %2
580}
581
582define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot270_x_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
583; CHECK-LABEL: test_vcaddq_rot270_x_s16:
584; CHECK:       @ %bb.0: @ %entry
585; CHECK-NEXT:    vmsr p0, r0
586; CHECK-NEXT:    vpst
587; CHECK-NEXT:    vcaddt.i16 q0, q0, q1, #270
588; CHECK-NEXT:    bx lr
589entry:
590  %0 = zext i16 %p to i32
591  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
592  %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 1, <8 x i16> undef, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
593  ret <8 x i16> %2
594}
595
596define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot270_x_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
597; CHECK-LABEL: test_vcaddq_rot270_x_s32:
598; CHECK:       @ %bb.0: @ %entry
599; CHECK-NEXT:    vmsr p0, r0
600; CHECK-NEXT:    vpst
601; CHECK-NEXT:    vcaddt.i32 q2, q0, q1, #270
602; CHECK-NEXT:    vmov q0, q2
603; CHECK-NEXT:    bx lr
604entry:
605  %0 = zext i16 %p to i32
606  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
607  %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 1, <4 x i32> undef, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
608  ret <4 x i32> %2
609}
610
611define arm_aapcs_vfpcc <8 x half> @test_vcaddq_rot270_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) {
612; CHECK-LABEL: test_vcaddq_rot270_x_f16:
613; CHECK:       @ %bb.0: @ %entry
614; CHECK-NEXT:    vmsr p0, r0
615; CHECK-NEXT:    vpst
616; CHECK-NEXT:    vcaddt.f16 q0, q0, q1, #270
617; CHECK-NEXT:    bx lr
618entry:
619  %0 = zext i16 %p to i32
620  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
621  %2 = call <8 x half> @llvm.arm.mve.vcaddq.predicated.v8f16.v8i1(i32 1, i32 1, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
622  ret <8 x half> %2
623}
624
625define arm_aapcs_vfpcc <4 x float> @test_vcaddq_rot270_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) {
626; CHECK-LABEL: test_vcaddq_rot270_x_f32:
627; CHECK:       @ %bb.0: @ %entry
628; CHECK-NEXT:    vmsr p0, r0
629; CHECK-NEXT:    vpst
630; CHECK-NEXT:    vcaddt.f32 q2, q0, q1, #270
631; CHECK-NEXT:    vmov q0, q2
632; CHECK-NEXT:    bx lr
633entry:
634  %0 = zext i16 %p to i32
635  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
636  %2 = call <4 x float> @llvm.arm.mve.vcaddq.predicated.v4f32.v4i1(i32 1, i32 1, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
637  ret <4 x float> %2
638}
639
640define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot90_s8(<16 x i8> %a, <16 x i8> %b) {
641; CHECK-LABEL: test_vhcaddq_rot90_s8:
642; CHECK:       @ %bb.0: @ %entry
643; CHECK-NEXT:    vhcadd.s8 q0, q0, q1, #90
644; CHECK-NEXT:    bx lr
645entry:
646  %0 = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 0, i32 0, <16 x i8> %a, <16 x i8> %b)
647  ret <16 x i8> %0
648}
649
650define arm_aapcs_vfpcc <8 x i16> @test_vhcaddq_rot90_s16(<8 x i16> %a, <8 x i16> %b) {
651; CHECK-LABEL: test_vhcaddq_rot90_s16:
652; CHECK:       @ %bb.0: @ %entry
653; CHECK-NEXT:    vhcadd.s16 q0, q0, q1, #90
654; CHECK-NEXT:    bx lr
655entry:
656  %0 = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 0, i32 0, <8 x i16> %a, <8 x i16> %b)
657  ret <8 x i16> %0
658}
659
660define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot90_s32(<4 x i32> %a, <4 x i32> %b) {
661; CHECK-LABEL: test_vhcaddq_rot90_s32:
662; CHECK:       @ %bb.0: @ %entry
663; CHECK-NEXT:    vhcadd.s32 q2, q0, q1, #90
664; CHECK-NEXT:    vmov q0, q2
665; CHECK-NEXT:    bx lr
666entry:
667  %0 = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 0, i32 0, <4 x i32> %a, <4 x i32> %b)
668  ret <4 x i32> %0
669}
670
671define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot270_s8(<16 x i8> %a, <16 x i8> %b) {
672; CHECK-LABEL: test_vhcaddq_rot270_s8:
673; CHECK:       @ %bb.0: @ %entry
674; CHECK-NEXT:    vhcadd.s8 q0, q0, q1, #270
675; CHECK-NEXT:    bx lr
676entry:
677  %0 = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 0, i32 1, <16 x i8> %a, <16 x i8> %b)
678  ret <16 x i8> %0
679}
680
681define arm_aapcs_vfpcc <8 x i16> @test_vhcaddq_rot270_s16(<8 x i16> %a, <8 x i16> %b) {
682; CHECK-LABEL: test_vhcaddq_rot270_s16:
683; CHECK:       @ %bb.0: @ %entry
684; CHECK-NEXT:    vhcadd.s16 q0, q0, q1, #270
685; CHECK-NEXT:    bx lr
686entry:
687  %0 = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 0, i32 1, <8 x i16> %a, <8 x i16> %b)
688  ret <8 x i16> %0
689}
690
691define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot270_s32(<4 x i32> %a, <4 x i32> %b) {
692; CHECK-LABEL: test_vhcaddq_rot270_s32:
693; CHECK:       @ %bb.0: @ %entry
694; CHECK-NEXT:    vhcadd.s32 q2, q0, q1, #270
695; CHECK-NEXT:    vmov q0, q2
696; CHECK-NEXT:    bx lr
697entry:
698  %0 = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 0, i32 1, <4 x i32> %a, <4 x i32> %b)
699  ret <4 x i32> %0
700}
701
702define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot90_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
703; CHECK-LABEL: test_vhcaddq_rot90_x_s8:
704; CHECK:       @ %bb.0: @ %entry
705; CHECK-NEXT:    vmsr p0, r0
706; CHECK-NEXT:    vpst
707; CHECK-NEXT:    vhcaddt.s8 q0, q0, q1, #90
708; CHECK-NEXT:    bx lr
709entry:
710  %0 = zext i16 %p to i32
711  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
712  %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 0, i32 0, <16 x i8> undef, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
713  ret <16 x i8> %2
714}
715
716define arm_aapcs_vfpcc <8 x i16> @test_vhcaddq_rot90_x_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
717; CHECK-LABEL: test_vhcaddq_rot90_x_s16:
718; CHECK:       @ %bb.0: @ %entry
719; CHECK-NEXT:    vmsr p0, r0
720; CHECK-NEXT:    vpst
721; CHECK-NEXT:    vhcaddt.s16 q0, q0, q1, #90
722; CHECK-NEXT:    bx lr
723entry:
724  %0 = zext i16 %p to i32
725  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
726  %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 0, i32 0, <8 x i16> undef, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
727  ret <8 x i16> %2
728}
729
730define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot90_x_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
731; CHECK-LABEL: test_vhcaddq_rot90_x_s32:
732; CHECK:       @ %bb.0: @ %entry
733; CHECK-NEXT:    vmsr p0, r0
734; CHECK-NEXT:    vpst
735; CHECK-NEXT:    vhcaddt.s32 q2, q0, q1, #90
736; CHECK-NEXT:    vmov q0, q2
737; CHECK-NEXT:    bx lr
738entry:
739  %0 = zext i16 %p to i32
740  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
741  %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 0, i32 0, <4 x i32> undef, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
742  ret <4 x i32> %2
743}
744
745define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot270_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
746; CHECK-LABEL: test_vhcaddq_rot270_x_s8:
747; CHECK:       @ %bb.0: @ %entry
748; CHECK-NEXT:    vmsr p0, r0
749; CHECK-NEXT:    vpst
750; CHECK-NEXT:    vhcaddt.s8 q0, q0, q1, #270
751; CHECK-NEXT:    bx lr
752entry:
753  %0 = zext i16 %p to i32
754  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
755  %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 0, i32 1, <16 x i8> undef, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
756  ret <16 x i8> %2
757}
758
759define arm_aapcs_vfpcc <8 x i16> @test_vhcaddq_rot270_x_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
760; CHECK-LABEL: test_vhcaddq_rot270_x_s16:
761; CHECK:       @ %bb.0: @ %entry
762; CHECK-NEXT:    vmsr p0, r0
763; CHECK-NEXT:    vpst
764; CHECK-NEXT:    vhcaddt.s16 q0, q0, q1, #270
765; CHECK-NEXT:    bx lr
766entry:
767  %0 = zext i16 %p to i32
768  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
769  %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 0, i32 1, <8 x i16> undef, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
770  ret <8 x i16> %2
771}
772
773define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot270_x_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
774; CHECK-LABEL: test_vhcaddq_rot270_x_s32:
775; CHECK:       @ %bb.0: @ %entry
776; CHECK-NEXT:    vmsr p0, r0
777; CHECK-NEXT:    vpst
778; CHECK-NEXT:    vhcaddt.s32 q2, q0, q1, #270
779; CHECK-NEXT:    vmov q0, q2
780; CHECK-NEXT:    bx lr
781entry:
782  %0 = zext i16 %p to i32
783  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
784  %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 0, i32 1, <4 x i32> undef, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
785  ret <4 x i32> %2
786}
787
788define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot90_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
789; CHECK-LABEL: test_vhcaddq_rot90_m_s8:
790; CHECK:       @ %bb.0: @ %entry
791; CHECK-NEXT:    vmsr p0, r0
792; CHECK-NEXT:    vpst
793; CHECK-NEXT:    vhcaddt.s8 q0, q1, q2, #90
794; CHECK-NEXT:    bx lr
795entry:
796  %0 = zext i16 %p to i32
797  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
798  %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 0, i32 0, <16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
799  ret <16 x i8> %2
800}
801
802define arm_aapcs_vfpcc <8 x i16> @test_vhcaddq_rot90_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
803; CHECK-LABEL: test_vhcaddq_rot90_m_s16:
804; CHECK:       @ %bb.0: @ %entry
805; CHECK-NEXT:    vmsr p0, r0
806; CHECK-NEXT:    vpst
807; CHECK-NEXT:    vhcaddt.s16 q0, q1, q2, #90
808; CHECK-NEXT:    bx lr
809entry:
810  %0 = zext i16 %p to i32
811  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
812  %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 0, i32 0, <8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
813  ret <8 x i16> %2
814}
815
816define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot90_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
817; CHECK-LABEL: test_vhcaddq_rot90_m_s32:
818; CHECK:       @ %bb.0: @ %entry
819; CHECK-NEXT:    vmsr p0, r0
820; CHECK-NEXT:    vpst
821; CHECK-NEXT:    vhcaddt.s32 q0, q1, q2, #90
822; CHECK-NEXT:    bx lr
823entry:
824  %0 = zext i16 %p to i32
825  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
826  %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 0, i32 0, <4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
827  ret <4 x i32> %2
828}
829
830define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot270_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
831; CHECK-LABEL: test_vhcaddq_rot270_m_s8:
832; CHECK:       @ %bb.0: @ %entry
833; CHECK-NEXT:    vmsr p0, r0
834; CHECK-NEXT:    vpst
835; CHECK-NEXT:    vhcaddt.s8 q0, q1, q2, #270
836; CHECK-NEXT:    bx lr
837entry:
838  %0 = zext i16 %p to i32
839  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
840  %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 0, i32 1, <16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
841  ret <16 x i8> %2
842}
843
844define arm_aapcs_vfpcc <8 x i16> @test_vhcaddq_rot270_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
845; CHECK-LABEL: test_vhcaddq_rot270_m_s16:
846; CHECK:       @ %bb.0: @ %entry
847; CHECK-NEXT:    vmsr p0, r0
848; CHECK-NEXT:    vpst
849; CHECK-NEXT:    vhcaddt.s16 q0, q1, q2, #270
850; CHECK-NEXT:    bx lr
851entry:
852  %0 = zext i16 %p to i32
853  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
854  %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 0, i32 1, <8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
855  ret <8 x i16> %2
856}
857
858define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot270_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
859; CHECK-LABEL: test_vhcaddq_rot270_m_s32:
860; CHECK:       @ %bb.0: @ %entry
861; CHECK-NEXT:    vmsr p0, r0
862; CHECK-NEXT:    vpst
863; CHECK-NEXT:    vhcaddt.s32 q0, q1, q2, #270
864; CHECK-NEXT:    bx lr
865entry:
866  %0 = zext i16 %p to i32
867  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
868  %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 0, i32 1, <4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
869  ret <4 x i32> %2
870}
871