• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
3
4define arm_aapcs_vfpcc <4 x i32> @add_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
5; CHECK-LABEL: add_v4i32_x:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vctp.32 r0
8; CHECK-NEXT:    vpst
9; CHECK-NEXT:    vaddt.i32 q0, q0, q1
10; CHECK-NEXT:    bx lr
11entry:
12  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
13  %a = add <4 x i32> %x, %y
14  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
15  ret <4 x i32> %b
16}
17
18define arm_aapcs_vfpcc <8 x i16> @add_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
19; CHECK-LABEL: add_v8i16_x:
20; CHECK:       @ %bb.0: @ %entry
21; CHECK-NEXT:    vctp.16 r0
22; CHECK-NEXT:    vpst
23; CHECK-NEXT:    vaddt.i16 q0, q0, q1
24; CHECK-NEXT:    bx lr
25entry:
26  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
27  %a = add <8 x i16> %x, %y
28  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
29  ret <8 x i16> %b
30}
31
32define arm_aapcs_vfpcc <16 x i8> @add_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
33; CHECK-LABEL: add_v16i8_x:
34; CHECK:       @ %bb.0: @ %entry
35; CHECK-NEXT:    vctp.8 r0
36; CHECK-NEXT:    vpst
37; CHECK-NEXT:    vaddt.i8 q0, q0, q1
38; CHECK-NEXT:    bx lr
39entry:
40  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
41  %a = add <16 x i8> %x, %y
42  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
43  ret <16 x i8> %b
44}
45
46define arm_aapcs_vfpcc <4 x i32> @sub_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
47; CHECK-LABEL: sub_v4i32_x:
48; CHECK:       @ %bb.0: @ %entry
49; CHECK-NEXT:    vctp.32 r0
50; CHECK-NEXT:    vpst
51; CHECK-NEXT:    vsubt.i32 q0, q0, q1
52; CHECK-NEXT:    bx lr
53entry:
54  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
55  %a = sub <4 x i32> %x, %y
56  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
57  ret <4 x i32> %b
58}
59
60define arm_aapcs_vfpcc <8 x i16> @sub_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
61; CHECK-LABEL: sub_v8i16_x:
62; CHECK:       @ %bb.0: @ %entry
63; CHECK-NEXT:    vctp.16 r0
64; CHECK-NEXT:    vpst
65; CHECK-NEXT:    vsubt.i16 q0, q0, q1
66; CHECK-NEXT:    bx lr
67entry:
68  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
69  %a = sub <8 x i16> %x, %y
70  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
71  ret <8 x i16> %b
72}
73
74define arm_aapcs_vfpcc <16 x i8> @sub_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
75; CHECK-LABEL: sub_v16i8_x:
76; CHECK:       @ %bb.0: @ %entry
77; CHECK-NEXT:    vctp.8 r0
78; CHECK-NEXT:    vpst
79; CHECK-NEXT:    vsubt.i8 q0, q0, q1
80; CHECK-NEXT:    bx lr
81entry:
82  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
83  %a = sub <16 x i8> %x, %y
84  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
85  ret <16 x i8> %b
86}
87
88define arm_aapcs_vfpcc <4 x i32> @mul_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
89; CHECK-LABEL: mul_v4i32_x:
90; CHECK:       @ %bb.0: @ %entry
91; CHECK-NEXT:    vctp.32 r0
92; CHECK-NEXT:    vpst
93; CHECK-NEXT:    vmult.i32 q0, q0, q1
94; CHECK-NEXT:    bx lr
95entry:
96  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
97  %a = mul <4 x i32> %x, %y
98  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
99  ret <4 x i32> %b
100}
101
102define arm_aapcs_vfpcc <8 x i16> @mul_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
103; CHECK-LABEL: mul_v8i16_x:
104; CHECK:       @ %bb.0: @ %entry
105; CHECK-NEXT:    vctp.16 r0
106; CHECK-NEXT:    vpst
107; CHECK-NEXT:    vmult.i16 q0, q0, q1
108; CHECK-NEXT:    bx lr
109entry:
110  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
111  %a = mul <8 x i16> %x, %y
112  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
113  ret <8 x i16> %b
114}
115
116define arm_aapcs_vfpcc <16 x i8> @mul_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
117; CHECK-LABEL: mul_v16i8_x:
118; CHECK:       @ %bb.0: @ %entry
119; CHECK-NEXT:    vctp.8 r0
120; CHECK-NEXT:    vpst
121; CHECK-NEXT:    vmult.i8 q0, q0, q1
122; CHECK-NEXT:    bx lr
123entry:
124  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
125  %a = mul <16 x i8> %x, %y
126  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
127  ret <16 x i8> %b
128}
129
130define arm_aapcs_vfpcc <4 x i32> @and_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
131; CHECK-LABEL: and_v4i32_x:
132; CHECK:       @ %bb.0: @ %entry
133; CHECK-NEXT:    vctp.32 r0
134; CHECK-NEXT:    vpst
135; CHECK-NEXT:    vandt q0, q0, q1
136; CHECK-NEXT:    bx lr
137entry:
138  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
139  %a = and <4 x i32> %x, %y
140  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
141  ret <4 x i32> %b
142}
143
144define arm_aapcs_vfpcc <8 x i16> @and_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
145; CHECK-LABEL: and_v8i16_x:
146; CHECK:       @ %bb.0: @ %entry
147; CHECK-NEXT:    vctp.16 r0
148; CHECK-NEXT:    vpst
149; CHECK-NEXT:    vandt q0, q0, q1
150; CHECK-NEXT:    bx lr
151entry:
152  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
153  %a = and <8 x i16> %x, %y
154  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
155  ret <8 x i16> %b
156}
157
158define arm_aapcs_vfpcc <16 x i8> @and_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
159; CHECK-LABEL: and_v16i8_x:
160; CHECK:       @ %bb.0: @ %entry
161; CHECK-NEXT:    vctp.8 r0
162; CHECK-NEXT:    vpst
163; CHECK-NEXT:    vandt q0, q0, q1
164; CHECK-NEXT:    bx lr
165entry:
166  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
167  %a = and <16 x i8> %x, %y
168  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
169  ret <16 x i8> %b
170}
171
172define arm_aapcs_vfpcc <4 x i32> @or_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
173; CHECK-LABEL: or_v4i32_x:
174; CHECK:       @ %bb.0: @ %entry
175; CHECK-NEXT:    vctp.32 r0
176; CHECK-NEXT:    vpst
177; CHECK-NEXT:    vorrt q0, q0, q1
178; CHECK-NEXT:    bx lr
179entry:
180  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
181  %a = or <4 x i32> %x, %y
182  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
183  ret <4 x i32> %b
184}
185
186define arm_aapcs_vfpcc <8 x i16> @or_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
187; CHECK-LABEL: or_v8i16_x:
188; CHECK:       @ %bb.0: @ %entry
189; CHECK-NEXT:    vctp.16 r0
190; CHECK-NEXT:    vpst
191; CHECK-NEXT:    vorrt q0, q0, q1
192; CHECK-NEXT:    bx lr
193entry:
194  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
195  %a = or <8 x i16> %x, %y
196  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
197  ret <8 x i16> %b
198}
199
200define arm_aapcs_vfpcc <16 x i8> @or_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
201; CHECK-LABEL: or_v16i8_x:
202; CHECK:       @ %bb.0: @ %entry
203; CHECK-NEXT:    vctp.8 r0
204; CHECK-NEXT:    vpst
205; CHECK-NEXT:    vorrt q0, q0, q1
206; CHECK-NEXT:    bx lr
207entry:
208  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
209  %a = or <16 x i8> %x, %y
210  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
211  ret <16 x i8> %b
212}
213
214define arm_aapcs_vfpcc <4 x i32> @xor_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
215; CHECK-LABEL: xor_v4i32_x:
216; CHECK:       @ %bb.0: @ %entry
217; CHECK-NEXT:    vctp.32 r0
218; CHECK-NEXT:    vpst
219; CHECK-NEXT:    veort q0, q0, q1
220; CHECK-NEXT:    bx lr
221entry:
222  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
223  %a = xor <4 x i32> %x, %y
224  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
225  ret <4 x i32> %b
226}
227
228define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
229; CHECK-LABEL: xor_v8i16_x:
230; CHECK:       @ %bb.0: @ %entry
231; CHECK-NEXT:    vctp.16 r0
232; CHECK-NEXT:    vpst
233; CHECK-NEXT:    veort q0, q0, q1
234; CHECK-NEXT:    bx lr
235entry:
236  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
237  %a = xor <8 x i16> %x, %y
238  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
239  ret <8 x i16> %b
240}
241
242define arm_aapcs_vfpcc <16 x i8> @xor_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
243; CHECK-LABEL: xor_v16i8_x:
244; CHECK:       @ %bb.0: @ %entry
245; CHECK-NEXT:    vctp.8 r0
246; CHECK-NEXT:    vpst
247; CHECK-NEXT:    veort q0, q0, q1
248; CHECK-NEXT:    bx lr
249entry:
250  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
251  %a = xor <16 x i8> %x, %y
252  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
253  ret <16 x i8> %b
254}
255
256define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
257; CHECK-LABEL: andnot_v4i32_x:
258; CHECK:       @ %bb.0: @ %entry
259; CHECK-NEXT:    vctp.32 r0
260; CHECK-NEXT:    vpst
261; CHECK-NEXT:    vbict q0, q0, q1
262; CHECK-NEXT:    bx lr
263entry:
264  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
265  %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
266  %a = and <4 x i32> %x, %y1
267  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
268  ret <4 x i32> %b
269}
270
271define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
272; CHECK-LABEL: andnot_v8i16_x:
273; CHECK:       @ %bb.0: @ %entry
274; CHECK-NEXT:    vctp.16 r0
275; CHECK-NEXT:    vpst
276; CHECK-NEXT:    vbict q0, q0, q1
277; CHECK-NEXT:    bx lr
278entry:
279  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
280  %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
281  %a = and <8 x i16> %x, %y1
282  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
283  ret <8 x i16> %b
284}
285
286define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
287; CHECK-LABEL: andnot_v16i8_x:
288; CHECK:       @ %bb.0: @ %entry
289; CHECK-NEXT:    vctp.8 r0
290; CHECK-NEXT:    vpst
291; CHECK-NEXT:    vbict q0, q0, q1
292; CHECK-NEXT:    bx lr
293entry:
294  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
295  %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
296  %a = and <16 x i8> %x, %y1
297  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
298  ret <16 x i8> %b
299}
300
301define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
302; CHECK-LABEL: ornot_v4i32_x:
303; CHECK:       @ %bb.0: @ %entry
304; CHECK-NEXT:    vctp.32 r0
305; CHECK-NEXT:    vpst
306; CHECK-NEXT:    vornt q0, q0, q1
307; CHECK-NEXT:    bx lr
308entry:
309  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
310  %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
311  %a = or <4 x i32> %x, %y1
312  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
313  ret <4 x i32> %b
314}
315
316define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
317; CHECK-LABEL: ornot_v8i16_x:
318; CHECK:       @ %bb.0: @ %entry
319; CHECK-NEXT:    vctp.16 r0
320; CHECK-NEXT:    vpst
321; CHECK-NEXT:    vornt q0, q0, q1
322; CHECK-NEXT:    bx lr
323entry:
324  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
325  %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
326  %a = or <8 x i16> %x, %y1
327  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
328  ret <8 x i16> %b
329}
330
331define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
332; CHECK-LABEL: ornot_v16i8_x:
333; CHECK:       @ %bb.0: @ %entry
334; CHECK-NEXT:    vctp.8 r0
335; CHECK-NEXT:    vpst
336; CHECK-NEXT:    vornt q0, q0, q1
337; CHECK-NEXT:    bx lr
338entry:
339  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
340  %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
341  %a = or <16 x i8> %x, %y1
342  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
343  ret <16 x i8> %b
344}
345
346define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
347; CHECK-LABEL: fadd_v4f32_x:
348; CHECK:       @ %bb.0: @ %entry
349; CHECK-NEXT:    vctp.32 r0
350; CHECK-NEXT:    vpst
351; CHECK-NEXT:    vaddt.f32 q0, q0, q1
352; CHECK-NEXT:    bx lr
353entry:
354  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
355  %a = fadd <4 x float> %x, %y
356  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
357  ret <4 x float> %b
358}
359
360define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
361; CHECK-LABEL: fadd_v8f16_x:
362; CHECK:       @ %bb.0: @ %entry
363; CHECK-NEXT:    vctp.16 r0
364; CHECK-NEXT:    vpst
365; CHECK-NEXT:    vaddt.f16 q0, q0, q1
366; CHECK-NEXT:    bx lr
367entry:
368  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
369  %a = fadd <8 x half> %x, %y
370  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
371  ret <8 x half> %b
372}
373
374define arm_aapcs_vfpcc <4 x float> @fsub_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
375; CHECK-LABEL: fsub_v4f32_x:
376; CHECK:       @ %bb.0: @ %entry
377; CHECK-NEXT:    vctp.32 r0
378; CHECK-NEXT:    vpst
379; CHECK-NEXT:    vsubt.f32 q0, q0, q1
380; CHECK-NEXT:    bx lr
381entry:
382  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
383  %a = fsub <4 x float> %x, %y
384  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
385  ret <4 x float> %b
386}
387
388define arm_aapcs_vfpcc <8 x half> @fsub_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
389; CHECK-LABEL: fsub_v8f16_x:
390; CHECK:       @ %bb.0: @ %entry
391; CHECK-NEXT:    vctp.16 r0
392; CHECK-NEXT:    vpst
393; CHECK-NEXT:    vsubt.f16 q0, q0, q1
394; CHECK-NEXT:    bx lr
395entry:
396  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
397  %a = fsub <8 x half> %x, %y
398  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
399  ret <8 x half> %b
400}
401
402define arm_aapcs_vfpcc <4 x float> @fmul_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
403; CHECK-LABEL: fmul_v4f32_x:
404; CHECK:       @ %bb.0: @ %entry
405; CHECK-NEXT:    vctp.32 r0
406; CHECK-NEXT:    vpst
407; CHECK-NEXT:    vmult.f32 q0, q0, q1
408; CHECK-NEXT:    bx lr
409entry:
410  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
411  %a = fmul <4 x float> %x, %y
412  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
413  ret <4 x float> %b
414}
415
416define arm_aapcs_vfpcc <8 x half> @fmul_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
417; CHECK-LABEL: fmul_v8f16_x:
418; CHECK:       @ %bb.0: @ %entry
419; CHECK-NEXT:    vctp.16 r0
420; CHECK-NEXT:    vpst
421; CHECK-NEXT:    vmult.f16 q0, q0, q1
422; CHECK-NEXT:    bx lr
423entry:
424  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
425  %a = fmul <8 x half> %x, %y
426  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
427  ret <8 x half> %b
428}
429
430define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
431; CHECK-LABEL: icmp_slt_v4i32_x:
432; CHECK:       @ %bb.0: @ %entry
433; CHECK-NEXT:    vctp.32 r0
434; CHECK-NEXT:    vpst
435; CHECK-NEXT:    vmint.s32 q0, q0, q1
436; CHECK-NEXT:    bx lr
437entry:
438  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
439  %a1 = icmp slt <4 x i32> %x, %y
440  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
441  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
442  ret <4 x i32> %b
443}
444
445define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
446; CHECK-LABEL: icmp_slt_v8i16_x:
447; CHECK:       @ %bb.0: @ %entry
448; CHECK-NEXT:    vctp.16 r0
449; CHECK-NEXT:    vpst
450; CHECK-NEXT:    vmint.s16 q0, q0, q1
451; CHECK-NEXT:    bx lr
452entry:
453  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
454  %a1 = icmp slt <8 x i16> %x, %y
455  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
456  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
457  ret <8 x i16> %b
458}
459
460define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
461; CHECK-LABEL: icmp_slt_v16i8_x:
462; CHECK:       @ %bb.0: @ %entry
463; CHECK-NEXT:    vctp.8 r0
464; CHECK-NEXT:    vpst
465; CHECK-NEXT:    vmint.s8 q0, q0, q1
466; CHECK-NEXT:    bx lr
467entry:
468  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
469  %a1 = icmp slt <16 x i8> %x, %y
470  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
471  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
472  ret <16 x i8> %b
473}
474
475define arm_aapcs_vfpcc <4 x i32> @icmp_sgt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
476; CHECK-LABEL: icmp_sgt_v4i32_x:
477; CHECK:       @ %bb.0: @ %entry
478; CHECK-NEXT:    vctp.32 r0
479; CHECK-NEXT:    vpst
480; CHECK-NEXT:    vmaxt.s32 q0, q0, q1
481; CHECK-NEXT:    bx lr
482entry:
483  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
484  %a1 = icmp sgt <4 x i32> %x, %y
485  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
486  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
487  ret <4 x i32> %b
488}
489
490define arm_aapcs_vfpcc <8 x i16> @icmp_sgt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
491; CHECK-LABEL: icmp_sgt_v8i16_x:
492; CHECK:       @ %bb.0: @ %entry
493; CHECK-NEXT:    vctp.16 r0
494; CHECK-NEXT:    vpst
495; CHECK-NEXT:    vmaxt.s16 q0, q0, q1
496; CHECK-NEXT:    bx lr
497entry:
498  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
499  %a1 = icmp sgt <8 x i16> %x, %y
500  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
501  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
502  ret <8 x i16> %b
503}
504
505define arm_aapcs_vfpcc <16 x i8> @icmp_sgt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
506; CHECK-LABEL: icmp_sgt_v16i8_x:
507; CHECK:       @ %bb.0: @ %entry
508; CHECK-NEXT:    vctp.8 r0
509; CHECK-NEXT:    vpst
510; CHECK-NEXT:    vmaxt.s8 q0, q0, q1
511; CHECK-NEXT:    bx lr
512entry:
513  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
514  %a1 = icmp sgt <16 x i8> %x, %y
515  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
516  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
517  ret <16 x i8> %b
518}
519
520define arm_aapcs_vfpcc <4 x i32> @icmp_ult_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
521; CHECK-LABEL: icmp_ult_v4i32_x:
522; CHECK:       @ %bb.0: @ %entry
523; CHECK-NEXT:    vctp.32 r0
524; CHECK-NEXT:    vpst
525; CHECK-NEXT:    vmint.u32 q0, q0, q1
526; CHECK-NEXT:    bx lr
527entry:
528  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
529  %a1 = icmp ult <4 x i32> %x, %y
530  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
531  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
532  ret <4 x i32> %b
533}
534
535define arm_aapcs_vfpcc <8 x i16> @icmp_ult_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
536; CHECK-LABEL: icmp_ult_v8i16_x:
537; CHECK:       @ %bb.0: @ %entry
538; CHECK-NEXT:    vctp.16 r0
539; CHECK-NEXT:    vpst
540; CHECK-NEXT:    vmint.u16 q0, q0, q1
541; CHECK-NEXT:    bx lr
542entry:
543  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
544  %a1 = icmp ult <8 x i16> %x, %y
545  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
546  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
547  ret <8 x i16> %b
548}
549
550define arm_aapcs_vfpcc <16 x i8> @icmp_ult_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
551; CHECK-LABEL: icmp_ult_v16i8_x:
552; CHECK:       @ %bb.0: @ %entry
553; CHECK-NEXT:    vctp.8 r0
554; CHECK-NEXT:    vpst
555; CHECK-NEXT:    vmint.u8 q0, q0, q1
556; CHECK-NEXT:    bx lr
557entry:
558  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
559  %a1 = icmp ult <16 x i8> %x, %y
560  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
561  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
562  ret <16 x i8> %b
563}
564
565define arm_aapcs_vfpcc <4 x i32> @icmp_ugt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
566; CHECK-LABEL: icmp_ugt_v4i32_x:
567; CHECK:       @ %bb.0: @ %entry
568; CHECK-NEXT:    vctp.32 r0
569; CHECK-NEXT:    vpst
570; CHECK-NEXT:    vmaxt.u32 q0, q0, q1
571; CHECK-NEXT:    bx lr
572entry:
573  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
574  %a1 = icmp ugt <4 x i32> %x, %y
575  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
576  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
577  ret <4 x i32> %b
578}
579
580define arm_aapcs_vfpcc <8 x i16> @icmp_ugt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
581; CHECK-LABEL: icmp_ugt_v8i16_x:
582; CHECK:       @ %bb.0: @ %entry
583; CHECK-NEXT:    vctp.16 r0
584; CHECK-NEXT:    vpst
585; CHECK-NEXT:    vmaxt.u16 q0, q0, q1
586; CHECK-NEXT:    bx lr
587entry:
588  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
589  %a1 = icmp ugt <8 x i16> %x, %y
590  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
591  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
592  ret <8 x i16> %b
593}
594
595define arm_aapcs_vfpcc <16 x i8> @icmp_ugt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
596; CHECK-LABEL: icmp_ugt_v16i8_x:
597; CHECK:       @ %bb.0: @ %entry
598; CHECK-NEXT:    vctp.8 r0
599; CHECK-NEXT:    vpst
600; CHECK-NEXT:    vmaxt.u8 q0, q0, q1
601; CHECK-NEXT:    bx lr
602entry:
603  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
604  %a1 = icmp ugt <16 x i8> %x, %y
605  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
606  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
607  ret <16 x i8> %b
608}
609
610define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
611; CHECK-LABEL: fcmp_fast_olt_v4f32_x:
612; CHECK:       @ %bb.0: @ %entry
613; CHECK-NEXT:    vctp.32 r0
614; CHECK-NEXT:    vpst
615; CHECK-NEXT:    vminnmt.f32 q0, q0, q1
616; CHECK-NEXT:    bx lr
617entry:
618  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
619  %a1 = fcmp fast olt <4 x float> %x, %y
620  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
621  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
622  ret <4 x float> %b
623}
624
625define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
626; CHECK-LABEL: fcmp_fast_olt_v8f16_x:
627; CHECK:       @ %bb.0: @ %entry
628; CHECK-NEXT:    vctp.16 r0
629; CHECK-NEXT:    vpst
630; CHECK-NEXT:    vminnmt.f16 q0, q0, q1
631; CHECK-NEXT:    bx lr
632entry:
633  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
634  %a1 = fcmp fast olt <8 x half> %x, %y
635  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
636  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
637  ret <8 x half> %b
638}
639
640define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
641; CHECK-LABEL: fcmp_fast_ogt_v4f32_x:
642; CHECK:       @ %bb.0: @ %entry
643; CHECK-NEXT:    vctp.32 r0
644; CHECK-NEXT:    vpst
645; CHECK-NEXT:    vmaxnmt.f32 q0, q0, q1
646; CHECK-NEXT:    bx lr
647entry:
648  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
649  %a1 = fcmp fast ogt <4 x float> %x, %y
650  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
651  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
652  ret <4 x float> %b
653}
654
655define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
656; CHECK-LABEL: fcmp_fast_ogt_v8f16_x:
657; CHECK:       @ %bb.0: @ %entry
658; CHECK-NEXT:    vctp.16 r0
659; CHECK-NEXT:    vpst
660; CHECK-NEXT:    vmaxnmt.f16 q0, q0, q1
661; CHECK-NEXT:    bx lr
662entry:
663  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
664  %a1 = fcmp fast ogt <8 x half> %x, %y
665  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
666  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
667  ret <8 x half> %b
668}
669
670define arm_aapcs_vfpcc <4 x i32> @sadd_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
671; CHECK-LABEL: sadd_sat_v4i32_x:
672; CHECK:       @ %bb.0: @ %entry
673; CHECK-NEXT:    vctp.32 r0
674; CHECK-NEXT:    vpst
675; CHECK-NEXT:    vqaddt.s32 q0, q0, q1
676; CHECK-NEXT:    bx lr
677entry:
678  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
679  %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
680  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
681  ret <4 x i32> %b
682}
683
684define arm_aapcs_vfpcc <8 x i16> @sadd_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
685; CHECK-LABEL: sadd_sat_v8i16_x:
686; CHECK:       @ %bb.0: @ %entry
687; CHECK-NEXT:    vctp.16 r0
688; CHECK-NEXT:    vpst
689; CHECK-NEXT:    vqaddt.s16 q0, q0, q1
690; CHECK-NEXT:    bx lr
691entry:
692  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
693  %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
694  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
695  ret <8 x i16> %b
696}
697
698define arm_aapcs_vfpcc <16 x i8> @sadd_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
699; CHECK-LABEL: sadd_sat_v16i8_x:
700; CHECK:       @ %bb.0: @ %entry
701; CHECK-NEXT:    vctp.8 r0
702; CHECK-NEXT:    vpst
703; CHECK-NEXT:    vqaddt.s8 q0, q0, q1
704; CHECK-NEXT:    bx lr
705entry:
706  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
707  %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
708  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
709  ret <16 x i8> %b
710}
711
712define arm_aapcs_vfpcc <4 x i32> @uadd_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
713; CHECK-LABEL: uadd_sat_v4i32_x:
714; CHECK:       @ %bb.0: @ %entry
715; CHECK-NEXT:    vctp.32 r0
716; CHECK-NEXT:    vpst
717; CHECK-NEXT:    vqaddt.u32 q0, q0, q1
718; CHECK-NEXT:    bx lr
719entry:
720  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
721  %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
722  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
723  ret <4 x i32> %b
724}
725
726define arm_aapcs_vfpcc <8 x i16> @uadd_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
727; CHECK-LABEL: uadd_sat_v8i16_x:
728; CHECK:       @ %bb.0: @ %entry
729; CHECK-NEXT:    vctp.16 r0
730; CHECK-NEXT:    vpst
731; CHECK-NEXT:    vqaddt.u16 q0, q0, q1
732; CHECK-NEXT:    bx lr
733entry:
734  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
735  %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
736  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
737  ret <8 x i16> %b
738}
739
740define arm_aapcs_vfpcc <16 x i8> @uadd_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
741; CHECK-LABEL: uadd_sat_v16i8_x:
742; CHECK:       @ %bb.0: @ %entry
743; CHECK-NEXT:    vctp.8 r0
744; CHECK-NEXT:    vpst
745; CHECK-NEXT:    vqaddt.u8 q0, q0, q1
746; CHECK-NEXT:    bx lr
747entry:
748  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
749  %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
750  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
751  ret <16 x i8> %b
752}
753
754define arm_aapcs_vfpcc <4 x i32> @ssub_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
755; CHECK-LABEL: ssub_sat_v4i32_x:
756; CHECK:       @ %bb.0: @ %entry
757; CHECK-NEXT:    vctp.32 r0
758; CHECK-NEXT:    vpst
759; CHECK-NEXT:    vqsubt.s32 q0, q0, q1
760; CHECK-NEXT:    bx lr
761entry:
762  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
763  %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
764  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
765  ret <4 x i32> %b
766}
767
768define arm_aapcs_vfpcc <8 x i16> @ssub_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
769; CHECK-LABEL: ssub_sat_v8i16_x:
770; CHECK:       @ %bb.0: @ %entry
771; CHECK-NEXT:    vctp.16 r0
772; CHECK-NEXT:    vpst
773; CHECK-NEXT:    vqsubt.s16 q0, q0, q1
774; CHECK-NEXT:    bx lr
775entry:
776  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
777  %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
778  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
779  ret <8 x i16> %b
780}
781
782define arm_aapcs_vfpcc <16 x i8> @ssub_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
783; CHECK-LABEL: ssub_sat_v16i8_x:
784; CHECK:       @ %bb.0: @ %entry
785; CHECK-NEXT:    vctp.8 r0
786; CHECK-NEXT:    vpst
787; CHECK-NEXT:    vqsubt.s8 q0, q0, q1
788; CHECK-NEXT:    bx lr
789entry:
790  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
791  %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
792  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
793  ret <16 x i8> %b
794}
795
796define arm_aapcs_vfpcc <4 x i32> @usub_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
797; CHECK-LABEL: usub_sat_v4i32_x:
798; CHECK:       @ %bb.0: @ %entry
799; CHECK-NEXT:    vctp.32 r0
800; CHECK-NEXT:    vpst
801; CHECK-NEXT:    vqsubt.u32 q0, q0, q1
802; CHECK-NEXT:    bx lr
803entry:
804  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
805  %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
806  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
807  ret <4 x i32> %b
808}
809
810define arm_aapcs_vfpcc <8 x i16> @usub_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
811; CHECK-LABEL: usub_sat_v8i16_x:
812; CHECK:       @ %bb.0: @ %entry
813; CHECK-NEXT:    vctp.16 r0
814; CHECK-NEXT:    vpst
815; CHECK-NEXT:    vqsubt.u16 q0, q0, q1
816; CHECK-NEXT:    bx lr
817entry:
818  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
819  %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
820  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
821  ret <8 x i16> %b
822}
823
824define arm_aapcs_vfpcc <16 x i8> @usub_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
825; CHECK-LABEL: usub_sat_v16i8_x:
826; CHECK:       @ %bb.0: @ %entry
827; CHECK-NEXT:    vctp.8 r0
828; CHECK-NEXT:    vpst
829; CHECK-NEXT:    vqsubt.u8 q0, q0, q1
830; CHECK-NEXT:    bx lr
831entry:
832  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
833  %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
834  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
835  ret <16 x i8> %b
836}
837
838define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
839; CHECK-LABEL: addqr_v4i32_x:
840; CHECK:       @ %bb.0: @ %entry
841; CHECK-NEXT:    vctp.32 r1
842; CHECK-NEXT:    vpst
843; CHECK-NEXT:    vaddt.i32 q0, q0, r0
844; CHECK-NEXT:    bx lr
845entry:
846  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
847  %i = insertelement <4 x i32> undef, i32 %y, i32 0
848  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
849  %a = add <4 x i32> %x, %ys
850  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
851  ret <4 x i32> %b
852}
853
854define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
855; CHECK-LABEL: addqr_v8i16_x:
856; CHECK:       @ %bb.0: @ %entry
857; CHECK-NEXT:    vctp.16 r1
858; CHECK-NEXT:    vpst
859; CHECK-NEXT:    vaddt.i16 q0, q0, r0
860; CHECK-NEXT:    bx lr
861entry:
862  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
863  %i = insertelement <8 x i16> undef, i16 %y, i32 0
864  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
865  %a = add <8 x i16> %x, %ys
866  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
867  ret <8 x i16> %b
868}
869
870define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
871; CHECK-LABEL: addqr_v16i8_x:
872; CHECK:       @ %bb.0: @ %entry
873; CHECK-NEXT:    vctp.8 r1
874; CHECK-NEXT:    vpst
875; CHECK-NEXT:    vaddt.i8 q0, q0, r0
876; CHECK-NEXT:    bx lr
877entry:
878  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
879  %i = insertelement <16 x i8> undef, i8 %y, i32 0
880  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
881  %a = add <16 x i8> %x, %ys
882  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
883  ret <16 x i8> %b
884}
885
886define arm_aapcs_vfpcc <4 x i32> @subqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
887; CHECK-LABEL: subqr_v4i32_x:
888; CHECK:       @ %bb.0: @ %entry
889; CHECK-NEXT:    vctp.32 r1
890; CHECK-NEXT:    vpst
891; CHECK-NEXT:    vsubt.i32 q0, q0, r0
892; CHECK-NEXT:    bx lr
893entry:
894  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
895  %i = insertelement <4 x i32> undef, i32 %y, i32 0
896  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
897  %a = sub <4 x i32> %x, %ys
898  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
899  ret <4 x i32> %b
900}
901
902define arm_aapcs_vfpcc <8 x i16> @subqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
903; CHECK-LABEL: subqr_v8i16_x:
904; CHECK:       @ %bb.0: @ %entry
905; CHECK-NEXT:    vctp.16 r1
906; CHECK-NEXT:    vpst
907; CHECK-NEXT:    vsubt.i16 q0, q0, r0
908; CHECK-NEXT:    bx lr
909entry:
910  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
911  %i = insertelement <8 x i16> undef, i16 %y, i32 0
912  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
913  %a = sub <8 x i16> %x, %ys
914  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
915  ret <8 x i16> %b
916}
917
918define arm_aapcs_vfpcc <16 x i8> @subqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
919; CHECK-LABEL: subqr_v16i8_x:
920; CHECK:       @ %bb.0: @ %entry
921; CHECK-NEXT:    vctp.8 r1
922; CHECK-NEXT:    vpst
923; CHECK-NEXT:    vsubt.i8 q0, q0, r0
924; CHECK-NEXT:    bx lr
925entry:
926  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
927  %i = insertelement <16 x i8> undef, i8 %y, i32 0
928  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
929  %a = sub <16 x i8> %x, %ys
930  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
931  ret <16 x i8> %b
932}
933
934define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
935; CHECK-LABEL: mulqr_v4i32_x:
936; CHECK:       @ %bb.0: @ %entry
937; CHECK-NEXT:    vctp.32 r1
938; CHECK-NEXT:    vpst
939; CHECK-NEXT:    vmult.i32 q0, q0, r0
940; CHECK-NEXT:    bx lr
941entry:
942  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
943  %i = insertelement <4 x i32> undef, i32 %y, i32 0
944  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
945  %a = mul <4 x i32> %x, %ys
946  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
947  ret <4 x i32> %b
948}
949
950define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
951; CHECK-LABEL: mulqr_v8i16_x:
952; CHECK:       @ %bb.0: @ %entry
953; CHECK-NEXT:    vctp.16 r1
954; CHECK-NEXT:    vpst
955; CHECK-NEXT:    vmult.i16 q0, q0, r0
956; CHECK-NEXT:    bx lr
957entry:
958  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
959  %i = insertelement <8 x i16> undef, i16 %y, i32 0
960  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
961  %a = mul <8 x i16> %x, %ys
962  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
963  ret <8 x i16> %b
964}
965
966define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
967; CHECK-LABEL: mulqr_v16i8_x:
968; CHECK:       @ %bb.0: @ %entry
969; CHECK-NEXT:    vctp.8 r1
970; CHECK-NEXT:    vpst
971; CHECK-NEXT:    vmult.i8 q0, q0, r0
972; CHECK-NEXT:    bx lr
973entry:
974  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
975  %i = insertelement <16 x i8> undef, i8 %y, i32 0
976  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
977  %a = mul <16 x i8> %x, %ys
978  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
979  ret <16 x i8> %b
980}
981
982define arm_aapcs_vfpcc <4 x float> @faddqr_v4f32_x(<4 x float> %x, float %y, i32 %n) {
983; CHECK-LABEL: faddqr_v4f32_x:
984; CHECK:       @ %bb.0: @ %entry
985; CHECK-NEXT:    vmov r1, s4
986; CHECK-NEXT:    vctp.32 r0
987; CHECK-NEXT:    vpst
988; CHECK-NEXT:    vaddt.f32 q0, q0, r1
989; CHECK-NEXT:    bx lr
990entry:
991  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
992  %i = insertelement <4 x float> undef, float %y, i32 0
993  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
994  %a = fadd <4 x float> %x, %ys
995  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
996  ret <4 x float> %b
997}
998
999define arm_aapcs_vfpcc <8 x half> @faddqr_v8f16_x(<8 x half> %x, half %y, i32 %n) {
1000; CHECK-LABEL: faddqr_v8f16_x:
1001; CHECK:       @ %bb.0: @ %entry
1002; CHECK-NEXT:    vmov.f16 r1, s4
1003; CHECK-NEXT:    vctp.16 r0
1004; CHECK-NEXT:    vpst
1005; CHECK-NEXT:    vaddt.f16 q0, q0, r1
1006; CHECK-NEXT:    bx lr
1007entry:
1008  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1009  %i = insertelement <8 x half> undef, half %y, i32 0
1010  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
1011  %a = fadd <8 x half> %x, %ys
1012  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
1013  ret <8 x half> %b
1014}
1015
1016define arm_aapcs_vfpcc <4 x float> @fsubqr_v4f32_x(<4 x float> %x, float %y, i32 %n) {
1017; CHECK-LABEL: fsubqr_v4f32_x:
1018; CHECK:       @ %bb.0: @ %entry
1019; CHECK-NEXT:    vmov r1, s4
1020; CHECK-NEXT:    vctp.32 r0
1021; CHECK-NEXT:    vpst
1022; CHECK-NEXT:    vsubt.f32 q0, q0, r1
1023; CHECK-NEXT:    bx lr
1024entry:
1025  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1026  %i = insertelement <4 x float> undef, float %y, i32 0
1027  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
1028  %a = fsub <4 x float> %x, %ys
1029  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
1030  ret <4 x float> %b
1031}
1032
1033define arm_aapcs_vfpcc <8 x half> @fsubqr_v8f16_x(<8 x half> %x, half %y, i32 %n) {
1034; CHECK-LABEL: fsubqr_v8f16_x:
1035; CHECK:       @ %bb.0: @ %entry
1036; CHECK-NEXT:    vmov.f16 r1, s4
1037; CHECK-NEXT:    vctp.16 r0
1038; CHECK-NEXT:    vpst
1039; CHECK-NEXT:    vsubt.f16 q0, q0, r1
1040; CHECK-NEXT:    bx lr
1041entry:
1042  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1043  %i = insertelement <8 x half> undef, half %y, i32 0
1044  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
1045  %a = fsub <8 x half> %x, %ys
1046  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
1047  ret <8 x half> %b
1048}
1049
1050define arm_aapcs_vfpcc <4 x float> @fmulqr_v4f32_x(<4 x float> %x, float %y, i32 %n) {
1051; CHECK-LABEL: fmulqr_v4f32_x:
1052; CHECK:       @ %bb.0: @ %entry
1053; CHECK-NEXT:    vmov r1, s4
1054; CHECK-NEXT:    vctp.32 r0
1055; CHECK-NEXT:    vpst
1056; CHECK-NEXT:    vmult.f32 q0, q0, r1
1057; CHECK-NEXT:    bx lr
1058entry:
1059  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1060  %i = insertelement <4 x float> undef, float %y, i32 0
1061  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
1062  %a = fmul <4 x float> %x, %ys
1063  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
1064  ret <4 x float> %b
1065}
1066
1067define arm_aapcs_vfpcc <8 x half> @fmulqr_v8f16_x(<8 x half> %x, half %y, i32 %n) {
1068; CHECK-LABEL: fmulqr_v8f16_x:
1069; CHECK:       @ %bb.0: @ %entry
1070; CHECK-NEXT:    vmov.f16 r1, s4
1071; CHECK-NEXT:    vctp.16 r0
1072; CHECK-NEXT:    vpst
1073; CHECK-NEXT:    vmult.f16 q0, q0, r1
1074; CHECK-NEXT:    bx lr
1075entry:
1076  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1077  %i = insertelement <8 x half> undef, half %y, i32 0
1078  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
1079  %a = fmul <8 x half> %x, %ys
1080  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
1081  ret <8 x half> %b
1082}
1083
1084define arm_aapcs_vfpcc <4 x i32> @sadd_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1085; CHECK-LABEL: sadd_satqr_v4i32_x:
1086; CHECK:       @ %bb.0: @ %entry
1087; CHECK-NEXT:    vctp.32 r1
1088; CHECK-NEXT:    vpst
1089; CHECK-NEXT:    vqaddt.s32 q0, q0, r0
1090; CHECK-NEXT:    bx lr
1091entry:
1092  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1093  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1094  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1095  %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1096  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1097  ret <4 x i32> %b
1098}
1099
1100define arm_aapcs_vfpcc <8 x i16> @sadd_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1101; CHECK-LABEL: sadd_satqr_v8i16_x:
1102; CHECK:       @ %bb.0: @ %entry
1103; CHECK-NEXT:    vctp.16 r1
1104; CHECK-NEXT:    vpst
1105; CHECK-NEXT:    vqaddt.s16 q0, q0, r0
1106; CHECK-NEXT:    bx lr
1107entry:
1108  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1109  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1110  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1111  %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1112  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1113  ret <8 x i16> %b
1114}
1115
1116define arm_aapcs_vfpcc <16 x i8> @sadd_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1117; CHECK-LABEL: sadd_satqr_v16i8_x:
1118; CHECK:       @ %bb.0: @ %entry
1119; CHECK-NEXT:    vctp.8 r1
1120; CHECK-NEXT:    vpst
1121; CHECK-NEXT:    vqaddt.s8 q0, q0, r0
1122; CHECK-NEXT:    bx lr
1123entry:
1124  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1125  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1126  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1127  %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1128  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1129  ret <16 x i8> %b
1130}
1131
1132define arm_aapcs_vfpcc <4 x i32> @uadd_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1133; CHECK-LABEL: uadd_satqr_v4i32_x:
1134; CHECK:       @ %bb.0: @ %entry
1135; CHECK-NEXT:    vctp.32 r1
1136; CHECK-NEXT:    vpst
1137; CHECK-NEXT:    vqaddt.u32 q0, q0, r0
1138; CHECK-NEXT:    bx lr
1139entry:
1140  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1141  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1142  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1143  %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1144  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1145  ret <4 x i32> %b
1146}
1147
1148define arm_aapcs_vfpcc <8 x i16> @uadd_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1149; CHECK-LABEL: uadd_satqr_v8i16_x:
1150; CHECK:       @ %bb.0: @ %entry
1151; CHECK-NEXT:    vctp.16 r1
1152; CHECK-NEXT:    vpst
1153; CHECK-NEXT:    vqaddt.u16 q0, q0, r0
1154; CHECK-NEXT:    bx lr
1155entry:
1156  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1157  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1158  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1159  %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1160  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1161  ret <8 x i16> %b
1162}
1163
1164define arm_aapcs_vfpcc <16 x i8> @uadd_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1165; CHECK-LABEL: uadd_satqr_v16i8_x:
1166; CHECK:       @ %bb.0: @ %entry
1167; CHECK-NEXT:    vctp.8 r1
1168; CHECK-NEXT:    vpst
1169; CHECK-NEXT:    vqaddt.u8 q0, q0, r0
1170; CHECK-NEXT:    bx lr
1171entry:
1172  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1173  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1174  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1175  %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1176  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1177  ret <16 x i8> %b
1178}
1179
1180define arm_aapcs_vfpcc <4 x i32> @ssub_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1181; CHECK-LABEL: ssub_satqr_v4i32_x:
1182; CHECK:       @ %bb.0: @ %entry
1183; CHECK-NEXT:    vctp.32 r1
1184; CHECK-NEXT:    vpst
1185; CHECK-NEXT:    vqsubt.s32 q0, q0, r0
1186; CHECK-NEXT:    bx lr
1187entry:
1188  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1189  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1190  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1191  %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1192  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1193  ret <4 x i32> %b
1194}
1195
1196define arm_aapcs_vfpcc <8 x i16> @ssub_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1197; CHECK-LABEL: ssub_satqr_v8i16_x:
1198; CHECK:       @ %bb.0: @ %entry
1199; CHECK-NEXT:    vctp.16 r1
1200; CHECK-NEXT:    vpst
1201; CHECK-NEXT:    vqsubt.s16 q0, q0, r0
1202; CHECK-NEXT:    bx lr
1203entry:
1204  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1205  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1206  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1207  %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1208  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1209  ret <8 x i16> %b
1210}
1211
1212define arm_aapcs_vfpcc <16 x i8> @ssub_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1213; CHECK-LABEL: ssub_satqr_v16i8_x:
1214; CHECK:       @ %bb.0: @ %entry
1215; CHECK-NEXT:    vctp.8 r1
1216; CHECK-NEXT:    vpst
1217; CHECK-NEXT:    vqsubt.s8 q0, q0, r0
1218; CHECK-NEXT:    bx lr
1219entry:
1220  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1221  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1222  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1223  %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1224  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1225  ret <16 x i8> %b
1226}
1227
1228define arm_aapcs_vfpcc <4 x i32> @usub_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1229; CHECK-LABEL: usub_satqr_v4i32_x:
1230; CHECK:       @ %bb.0: @ %entry
1231; CHECK-NEXT:    vctp.32 r1
1232; CHECK-NEXT:    vpst
1233; CHECK-NEXT:    vqsubt.u32 q0, q0, r0
1234; CHECK-NEXT:    bx lr
1235entry:
1236  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1237  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1238  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1239  %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1240  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1241  ret <4 x i32> %b
1242}
1243
1244define arm_aapcs_vfpcc <8 x i16> @usub_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1245; CHECK-LABEL: usub_satqr_v8i16_x:
1246; CHECK:       @ %bb.0: @ %entry
1247; CHECK-NEXT:    vctp.16 r1
1248; CHECK-NEXT:    vpst
1249; CHECK-NEXT:    vqsubt.u16 q0, q0, r0
1250; CHECK-NEXT:    bx lr
1251entry:
1252  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1253  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1254  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1255  %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1256  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1257  ret <8 x i16> %b
1258}
1259
1260define arm_aapcs_vfpcc <16 x i8> @usub_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1261; CHECK-LABEL: usub_satqr_v16i8_x:
1262; CHECK:       @ %bb.0: @ %entry
1263; CHECK-NEXT:    vctp.8 r1
1264; CHECK-NEXT:    vpst
1265; CHECK-NEXT:    vqsubt.u8 q0, q0, r0
1266; CHECK-NEXT:    bx lr
1267entry:
1268  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1269  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1270  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1271  %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1272  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1273  ret <16 x i8> %b
1274}
1275
1276define arm_aapcs_vfpcc <4 x i32> @add_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1277; CHECK-LABEL: add_v4i32_y:
1278; CHECK:       @ %bb.0: @ %entry
1279; CHECK-NEXT:    vctp.32 r0
1280; CHECK-NEXT:    vpst
1281; CHECK-NEXT:    vaddt.i32 q1, q0, q1
1282; CHECK-NEXT:    vmov q0, q1
1283; CHECK-NEXT:    bx lr
1284entry:
1285  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1286  %a = add <4 x i32> %x, %y
1287  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1288  ret <4 x i32> %b
1289}
1290
1291define arm_aapcs_vfpcc <8 x i16> @add_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1292; CHECK-LABEL: add_v8i16_y:
1293; CHECK:       @ %bb.0: @ %entry
1294; CHECK-NEXT:    vctp.16 r0
1295; CHECK-NEXT:    vpst
1296; CHECK-NEXT:    vaddt.i16 q1, q0, q1
1297; CHECK-NEXT:    vmov q0, q1
1298; CHECK-NEXT:    bx lr
1299entry:
1300  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1301  %a = add <8 x i16> %x, %y
1302  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1303  ret <8 x i16> %b
1304}
1305
1306define arm_aapcs_vfpcc <16 x i8> @add_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1307; CHECK-LABEL: add_v16i8_y:
1308; CHECK:       @ %bb.0: @ %entry
1309; CHECK-NEXT:    vctp.8 r0
1310; CHECK-NEXT:    vpst
1311; CHECK-NEXT:    vaddt.i8 q1, q0, q1
1312; CHECK-NEXT:    vmov q0, q1
1313; CHECK-NEXT:    bx lr
1314entry:
1315  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1316  %a = add <16 x i8> %x, %y
1317  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1318  ret <16 x i8> %b
1319}
1320
1321define arm_aapcs_vfpcc <4 x i32> @sub_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1322; CHECK-LABEL: sub_v4i32_y:
1323; CHECK:       @ %bb.0: @ %entry
1324; CHECK-NEXT:    vctp.32 r0
1325; CHECK-NEXT:    vpst
1326; CHECK-NEXT:    vsubt.i32 q1, q0, q1
1327; CHECK-NEXT:    vmov q0, q1
1328; CHECK-NEXT:    bx lr
1329entry:
1330  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1331  %a = sub <4 x i32> %x, %y
1332  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1333  ret <4 x i32> %b
1334}
1335
1336define arm_aapcs_vfpcc <8 x i16> @sub_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1337; CHECK-LABEL: sub_v8i16_y:
1338; CHECK:       @ %bb.0: @ %entry
1339; CHECK-NEXT:    vctp.16 r0
1340; CHECK-NEXT:    vpst
1341; CHECK-NEXT:    vsubt.i16 q1, q0, q1
1342; CHECK-NEXT:    vmov q0, q1
1343; CHECK-NEXT:    bx lr
1344entry:
1345  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1346  %a = sub <8 x i16> %x, %y
1347  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1348  ret <8 x i16> %b
1349}
1350
1351define arm_aapcs_vfpcc <16 x i8> @sub_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1352; CHECK-LABEL: sub_v16i8_y:
1353; CHECK:       @ %bb.0: @ %entry
1354; CHECK-NEXT:    vctp.8 r0
1355; CHECK-NEXT:    vpst
1356; CHECK-NEXT:    vsubt.i8 q1, q0, q1
1357; CHECK-NEXT:    vmov q0, q1
1358; CHECK-NEXT:    bx lr
1359entry:
1360  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1361  %a = sub <16 x i8> %x, %y
1362  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1363  ret <16 x i8> %b
1364}
1365
1366define arm_aapcs_vfpcc <4 x i32> @mul_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1367; CHECK-LABEL: mul_v4i32_y:
1368; CHECK:       @ %bb.0: @ %entry
1369; CHECK-NEXT:    vctp.32 r0
1370; CHECK-NEXT:    vpst
1371; CHECK-NEXT:    vmult.i32 q1, q0, q1
1372; CHECK-NEXT:    vmov q0, q1
1373; CHECK-NEXT:    bx lr
1374entry:
1375  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1376  %a = mul <4 x i32> %x, %y
1377  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1378  ret <4 x i32> %b
1379}
1380
1381define arm_aapcs_vfpcc <8 x i16> @mul_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1382; CHECK-LABEL: mul_v8i16_y:
1383; CHECK:       @ %bb.0: @ %entry
1384; CHECK-NEXT:    vctp.16 r0
1385; CHECK-NEXT:    vpst
1386; CHECK-NEXT:    vmult.i16 q1, q0, q1
1387; CHECK-NEXT:    vmov q0, q1
1388; CHECK-NEXT:    bx lr
1389entry:
1390  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1391  %a = mul <8 x i16> %x, %y
1392  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1393  ret <8 x i16> %b
1394}
1395
1396define arm_aapcs_vfpcc <16 x i8> @mul_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1397; CHECK-LABEL: mul_v16i8_y:
1398; CHECK:       @ %bb.0: @ %entry
1399; CHECK-NEXT:    vctp.8 r0
1400; CHECK-NEXT:    vpst
1401; CHECK-NEXT:    vmult.i8 q1, q0, q1
1402; CHECK-NEXT:    vmov q0, q1
1403; CHECK-NEXT:    bx lr
1404entry:
1405  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1406  %a = mul <16 x i8> %x, %y
1407  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1408  ret <16 x i8> %b
1409}
1410
1411define arm_aapcs_vfpcc <4 x i32> @and_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1412; CHECK-LABEL: and_v4i32_y:
1413; CHECK:       @ %bb.0: @ %entry
1414; CHECK-NEXT:    vctp.32 r0
1415; CHECK-NEXT:    vpst
1416; CHECK-NEXT:    vandt q1, q0, q1
1417; CHECK-NEXT:    vmov q0, q1
1418; CHECK-NEXT:    bx lr
1419entry:
1420  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1421  %a = and <4 x i32> %x, %y
1422  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1423  ret <4 x i32> %b
1424}
1425
1426define arm_aapcs_vfpcc <8 x i16> @and_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1427; CHECK-LABEL: and_v8i16_y:
1428; CHECK:       @ %bb.0: @ %entry
1429; CHECK-NEXT:    vctp.16 r0
1430; CHECK-NEXT:    vpst
1431; CHECK-NEXT:    vandt q1, q0, q1
1432; CHECK-NEXT:    vmov q0, q1
1433; CHECK-NEXT:    bx lr
1434entry:
1435  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1436  %a = and <8 x i16> %x, %y
1437  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1438  ret <8 x i16> %b
1439}
1440
1441define arm_aapcs_vfpcc <16 x i8> @and_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1442; CHECK-LABEL: and_v16i8_y:
1443; CHECK:       @ %bb.0: @ %entry
1444; CHECK-NEXT:    vctp.8 r0
1445; CHECK-NEXT:    vpst
1446; CHECK-NEXT:    vandt q1, q0, q1
1447; CHECK-NEXT:    vmov q0, q1
1448; CHECK-NEXT:    bx lr
1449entry:
1450  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1451  %a = and <16 x i8> %x, %y
1452  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1453  ret <16 x i8> %b
1454}
1455
1456define arm_aapcs_vfpcc <4 x i32> @or_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1457; CHECK-LABEL: or_v4i32_y:
1458; CHECK:       @ %bb.0: @ %entry
1459; CHECK-NEXT:    vctp.32 r0
1460; CHECK-NEXT:    vpst
1461; CHECK-NEXT:    vorrt q1, q0, q1
1462; CHECK-NEXT:    vmov q0, q1
1463; CHECK-NEXT:    bx lr
1464entry:
1465  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1466  %a = or <4 x i32> %x, %y
1467  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1468  ret <4 x i32> %b
1469}
1470
1471define arm_aapcs_vfpcc <8 x i16> @or_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1472; CHECK-LABEL: or_v8i16_y:
1473; CHECK:       @ %bb.0: @ %entry
1474; CHECK-NEXT:    vctp.16 r0
1475; CHECK-NEXT:    vpst
1476; CHECK-NEXT:    vorrt q1, q0, q1
1477; CHECK-NEXT:    vmov q0, q1
1478; CHECK-NEXT:    bx lr
1479entry:
1480  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1481  %a = or <8 x i16> %x, %y
1482  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1483  ret <8 x i16> %b
1484}
1485
1486define arm_aapcs_vfpcc <16 x i8> @or_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1487; CHECK-LABEL: or_v16i8_y:
1488; CHECK:       @ %bb.0: @ %entry
1489; CHECK-NEXT:    vctp.8 r0
1490; CHECK-NEXT:    vpst
1491; CHECK-NEXT:    vorrt q1, q0, q1
1492; CHECK-NEXT:    vmov q0, q1
1493; CHECK-NEXT:    bx lr
1494entry:
1495  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1496  %a = or <16 x i8> %x, %y
1497  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1498  ret <16 x i8> %b
1499}
1500
1501define arm_aapcs_vfpcc <4 x i32> @xor_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1502; CHECK-LABEL: xor_v4i32_y:
1503; CHECK:       @ %bb.0: @ %entry
1504; CHECK-NEXT:    vctp.32 r0
1505; CHECK-NEXT:    vpst
1506; CHECK-NEXT:    veort q1, q0, q1
1507; CHECK-NEXT:    vmov q0, q1
1508; CHECK-NEXT:    bx lr
1509entry:
1510  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1511  %a = xor <4 x i32> %x, %y
1512  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1513  ret <4 x i32> %b
1514}
1515
1516define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1517; CHECK-LABEL: xor_v8i16_y:
1518; CHECK:       @ %bb.0: @ %entry
1519; CHECK-NEXT:    vctp.16 r0
1520; CHECK-NEXT:    vpst
1521; CHECK-NEXT:    veort q1, q0, q1
1522; CHECK-NEXT:    vmov q0, q1
1523; CHECK-NEXT:    bx lr
1524entry:
1525  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1526  %a = xor <8 x i16> %x, %y
1527  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1528  ret <8 x i16> %b
1529}
1530
1531define arm_aapcs_vfpcc <16 x i8> @xor_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1532; CHECK-LABEL: xor_v16i8_y:
1533; CHECK:       @ %bb.0: @ %entry
1534; CHECK-NEXT:    vctp.8 r0
1535; CHECK-NEXT:    vpst
1536; CHECK-NEXT:    veort q1, q0, q1
1537; CHECK-NEXT:    vmov q0, q1
1538; CHECK-NEXT:    bx lr
1539entry:
1540  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1541  %a = xor <16 x i8> %x, %y
1542  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1543  ret <16 x i8> %b
1544}
1545
1546define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1547; CHECK-LABEL: andnot_v4i32_y:
1548; CHECK:       @ %bb.0: @ %entry
1549; CHECK-NEXT:    vctp.32 r0
1550; CHECK-NEXT:    vpst
1551; CHECK-NEXT:    vbict q1, q0, q1
1552; CHECK-NEXT:    vmov q0, q1
1553; CHECK-NEXT:    bx lr
1554entry:
1555  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1556  %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
1557  %a = and <4 x i32> %x, %y1
1558  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1559  ret <4 x i32> %b
1560}
1561
1562define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1563; CHECK-LABEL: andnot_v8i16_y:
1564; CHECK:       @ %bb.0: @ %entry
1565; CHECK-NEXT:    vctp.16 r0
1566; CHECK-NEXT:    vpst
1567; CHECK-NEXT:    vbict q1, q0, q1
1568; CHECK-NEXT:    vmov q0, q1
1569; CHECK-NEXT:    bx lr
1570entry:
1571  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1572  %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1573  %a = and <8 x i16> %x, %y1
1574  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1575  ret <8 x i16> %b
1576}
1577
1578define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1579; CHECK-LABEL: andnot_v16i8_y:
1580; CHECK:       @ %bb.0: @ %entry
1581; CHECK-NEXT:    vctp.8 r0
1582; CHECK-NEXT:    vpst
1583; CHECK-NEXT:    vbict q1, q0, q1
1584; CHECK-NEXT:    vmov q0, q1
1585; CHECK-NEXT:    bx lr
1586entry:
1587  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1588  %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1589  %a = and <16 x i8> %x, %y1
1590  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1591  ret <16 x i8> %b
1592}
1593
1594define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1595; CHECK-LABEL: ornot_v4i32_y:
1596; CHECK:       @ %bb.0: @ %entry
1597; CHECK-NEXT:    vctp.32 r0
1598; CHECK-NEXT:    vpst
1599; CHECK-NEXT:    vornt q1, q0, q1
1600; CHECK-NEXT:    vmov q0, q1
1601; CHECK-NEXT:    bx lr
1602entry:
1603  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1604  %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
1605  %a = or <4 x i32> %x, %y1
1606  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1607  ret <4 x i32> %b
1608}
1609
1610define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1611; CHECK-LABEL: ornot_v8i16_y:
1612; CHECK:       @ %bb.0: @ %entry
1613; CHECK-NEXT:    vctp.16 r0
1614; CHECK-NEXT:    vpst
1615; CHECK-NEXT:    vornt q1, q0, q1
1616; CHECK-NEXT:    vmov q0, q1
1617; CHECK-NEXT:    bx lr
1618entry:
1619  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1620  %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1621  %a = or <8 x i16> %x, %y1
1622  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1623  ret <8 x i16> %b
1624}
1625
1626define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1627; CHECK-LABEL: ornot_v16i8_y:
1628; CHECK:       @ %bb.0: @ %entry
1629; CHECK-NEXT:    vctp.8 r0
1630; CHECK-NEXT:    vpst
1631; CHECK-NEXT:    vornt q1, q0, q1
1632; CHECK-NEXT:    vmov q0, q1
1633; CHECK-NEXT:    bx lr
1634entry:
1635  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1636  %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1637  %a = or <16 x i8> %x, %y1
1638  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1639  ret <16 x i8> %b
1640}
1641
1642define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
1643; CHECK-LABEL: fadd_v4f32_y:
1644; CHECK:       @ %bb.0: @ %entry
1645; CHECK-NEXT:    vctp.32 r0
1646; CHECK-NEXT:    vpst
1647; CHECK-NEXT:    vaddt.f32 q1, q0, q1
1648; CHECK-NEXT:    vmov q0, q1
1649; CHECK-NEXT:    bx lr
1650entry:
1651  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1652  %a = fadd <4 x float> %x, %y
1653  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
1654  ret <4 x float> %b
1655}
1656
1657define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
1658; CHECK-LABEL: fadd_v8f16_y:
1659; CHECK:       @ %bb.0: @ %entry
1660; CHECK-NEXT:    vctp.16 r0
1661; CHECK-NEXT:    vpst
1662; CHECK-NEXT:    vaddt.f16 q1, q0, q1
1663; CHECK-NEXT:    vmov q0, q1
1664; CHECK-NEXT:    bx lr
1665entry:
1666  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1667  %a = fadd <8 x half> %x, %y
1668  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
1669  ret <8 x half> %b
1670}
1671
1672define arm_aapcs_vfpcc <4 x float> @fsub_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
1673; CHECK-LABEL: fsub_v4f32_y:
1674; CHECK:       @ %bb.0: @ %entry
1675; CHECK-NEXT:    vctp.32 r0
1676; CHECK-NEXT:    vpst
1677; CHECK-NEXT:    vsubt.f32 q1, q0, q1
1678; CHECK-NEXT:    vmov q0, q1
1679; CHECK-NEXT:    bx lr
1680entry:
1681  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1682  %a = fsub <4 x float> %x, %y
1683  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
1684  ret <4 x float> %b
1685}
1686
1687define arm_aapcs_vfpcc <8 x half> @fsub_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
1688; CHECK-LABEL: fsub_v8f16_y:
1689; CHECK:       @ %bb.0: @ %entry
1690; CHECK-NEXT:    vctp.16 r0
1691; CHECK-NEXT:    vpst
1692; CHECK-NEXT:    vsubt.f16 q1, q0, q1
1693; CHECK-NEXT:    vmov q0, q1
1694; CHECK-NEXT:    bx lr
1695entry:
1696  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1697  %a = fsub <8 x half> %x, %y
1698  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
1699  ret <8 x half> %b
1700}
1701
1702define arm_aapcs_vfpcc <4 x float> @fmul_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
1703; CHECK-LABEL: fmul_v4f32_y:
1704; CHECK:       @ %bb.0: @ %entry
1705; CHECK-NEXT:    vctp.32 r0
1706; CHECK-NEXT:    vpst
1707; CHECK-NEXT:    vmult.f32 q1, q0, q1
1708; CHECK-NEXT:    vmov q0, q1
1709; CHECK-NEXT:    bx lr
1710entry:
1711  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1712  %a = fmul <4 x float> %x, %y
1713  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
1714  ret <4 x float> %b
1715}
1716
1717define arm_aapcs_vfpcc <8 x half> @fmul_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
1718; CHECK-LABEL: fmul_v8f16_y:
1719; CHECK:       @ %bb.0: @ %entry
1720; CHECK-NEXT:    vctp.16 r0
1721; CHECK-NEXT:    vpst
1722; CHECK-NEXT:    vmult.f16 q1, q0, q1
1723; CHECK-NEXT:    vmov q0, q1
1724; CHECK-NEXT:    bx lr
1725entry:
1726  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1727  %a = fmul <8 x half> %x, %y
1728  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
1729  ret <8 x half> %b
1730}
1731
1732define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1733; CHECK-LABEL: icmp_slt_v4i32_y:
1734; CHECK:       @ %bb.0: @ %entry
1735; CHECK-NEXT:    vctp.32 r0
1736; CHECK-NEXT:    vpst
1737; CHECK-NEXT:    vmint.s32 q1, q0, q1
1738; CHECK-NEXT:    vmov q0, q1
1739; CHECK-NEXT:    bx lr
1740entry:
1741  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1742  %a1 = icmp slt <4 x i32> %x, %y
1743  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
1744  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1745  ret <4 x i32> %b
1746}
1747
1748define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1749; CHECK-LABEL: icmp_slt_v8i16_y:
1750; CHECK:       @ %bb.0: @ %entry
1751; CHECK-NEXT:    vctp.16 r0
1752; CHECK-NEXT:    vpst
1753; CHECK-NEXT:    vmint.s16 q1, q0, q1
1754; CHECK-NEXT:    vmov q0, q1
1755; CHECK-NEXT:    bx lr
1756entry:
1757  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1758  %a1 = icmp slt <8 x i16> %x, %y
1759  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
1760  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1761  ret <8 x i16> %b
1762}
1763
1764define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1765; CHECK-LABEL: icmp_slt_v16i8_y:
1766; CHECK:       @ %bb.0: @ %entry
1767; CHECK-NEXT:    vctp.8 r0
1768; CHECK-NEXT:    vpst
1769; CHECK-NEXT:    vmint.s8 q1, q0, q1
1770; CHECK-NEXT:    vmov q0, q1
1771; CHECK-NEXT:    bx lr
1772entry:
1773  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1774  %a1 = icmp slt <16 x i8> %x, %y
1775  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
1776  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1777  ret <16 x i8> %b
1778}
1779
1780define arm_aapcs_vfpcc <4 x i32> @icmp_sgt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1781; CHECK-LABEL: icmp_sgt_v4i32_y:
1782; CHECK:       @ %bb.0: @ %entry
1783; CHECK-NEXT:    vctp.32 r0
1784; CHECK-NEXT:    vpst
1785; CHECK-NEXT:    vmaxt.s32 q1, q0, q1
1786; CHECK-NEXT:    vmov q0, q1
1787; CHECK-NEXT:    bx lr
1788entry:
1789  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1790  %a1 = icmp sgt <4 x i32> %x, %y
1791  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
1792  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1793  ret <4 x i32> %b
1794}
1795
1796define arm_aapcs_vfpcc <8 x i16> @icmp_sgt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1797; CHECK-LABEL: icmp_sgt_v8i16_y:
1798; CHECK:       @ %bb.0: @ %entry
1799; CHECK-NEXT:    vctp.16 r0
1800; CHECK-NEXT:    vpst
1801; CHECK-NEXT:    vmaxt.s16 q1, q0, q1
1802; CHECK-NEXT:    vmov q0, q1
1803; CHECK-NEXT:    bx lr
1804entry:
1805  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1806  %a1 = icmp sgt <8 x i16> %x, %y
1807  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
1808  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1809  ret <8 x i16> %b
1810}
1811
1812define arm_aapcs_vfpcc <16 x i8> @icmp_sgt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1813; CHECK-LABEL: icmp_sgt_v16i8_y:
1814; CHECK:       @ %bb.0: @ %entry
1815; CHECK-NEXT:    vctp.8 r0
1816; CHECK-NEXT:    vpst
1817; CHECK-NEXT:    vmaxt.s8 q1, q0, q1
1818; CHECK-NEXT:    vmov q0, q1
1819; CHECK-NEXT:    bx lr
1820entry:
1821  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1822  %a1 = icmp sgt <16 x i8> %x, %y
1823  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
1824  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1825  ret <16 x i8> %b
1826}
1827
1828define arm_aapcs_vfpcc <4 x i32> @icmp_ult_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1829; CHECK-LABEL: icmp_ult_v4i32_y:
1830; CHECK:       @ %bb.0: @ %entry
1831; CHECK-NEXT:    vctp.32 r0
1832; CHECK-NEXT:    vpst
1833; CHECK-NEXT:    vmint.u32 q1, q0, q1
1834; CHECK-NEXT:    vmov q0, q1
1835; CHECK-NEXT:    bx lr
1836entry:
1837  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1838  %a1 = icmp ult <4 x i32> %x, %y
1839  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
1840  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1841  ret <4 x i32> %b
1842}
1843
1844define arm_aapcs_vfpcc <8 x i16> @icmp_ult_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1845; CHECK-LABEL: icmp_ult_v8i16_y:
1846; CHECK:       @ %bb.0: @ %entry
1847; CHECK-NEXT:    vctp.16 r0
1848; CHECK-NEXT:    vpst
1849; CHECK-NEXT:    vmint.u16 q1, q0, q1
1850; CHECK-NEXT:    vmov q0, q1
1851; CHECK-NEXT:    bx lr
1852entry:
1853  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1854  %a1 = icmp ult <8 x i16> %x, %y
1855  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
1856  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1857  ret <8 x i16> %b
1858}
1859
1860define arm_aapcs_vfpcc <16 x i8> @icmp_ult_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1861; CHECK-LABEL: icmp_ult_v16i8_y:
1862; CHECK:       @ %bb.0: @ %entry
1863; CHECK-NEXT:    vctp.8 r0
1864; CHECK-NEXT:    vpst
1865; CHECK-NEXT:    vmint.u8 q1, q0, q1
1866; CHECK-NEXT:    vmov q0, q1
1867; CHECK-NEXT:    bx lr
1868entry:
1869  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1870  %a1 = icmp ult <16 x i8> %x, %y
1871  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
1872  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1873  ret <16 x i8> %b
1874}
1875
1876define arm_aapcs_vfpcc <4 x i32> @icmp_ugt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1877; CHECK-LABEL: icmp_ugt_v4i32_y:
1878; CHECK:       @ %bb.0: @ %entry
1879; CHECK-NEXT:    vctp.32 r0
1880; CHECK-NEXT:    vpst
1881; CHECK-NEXT:    vmaxt.u32 q1, q0, q1
1882; CHECK-NEXT:    vmov q0, q1
1883; CHECK-NEXT:    bx lr
1884entry:
1885  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1886  %a1 = icmp ugt <4 x i32> %x, %y
1887  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
1888  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1889  ret <4 x i32> %b
1890}
1891
1892define arm_aapcs_vfpcc <8 x i16> @icmp_ugt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1893; CHECK-LABEL: icmp_ugt_v8i16_y:
1894; CHECK:       @ %bb.0: @ %entry
1895; CHECK-NEXT:    vctp.16 r0
1896; CHECK-NEXT:    vpst
1897; CHECK-NEXT:    vmaxt.u16 q1, q0, q1
1898; CHECK-NEXT:    vmov q0, q1
1899; CHECK-NEXT:    bx lr
1900entry:
1901  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1902  %a1 = icmp ugt <8 x i16> %x, %y
1903  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
1904  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1905  ret <8 x i16> %b
1906}
1907
1908define arm_aapcs_vfpcc <16 x i8> @icmp_ugt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1909; CHECK-LABEL: icmp_ugt_v16i8_y:
1910; CHECK:       @ %bb.0: @ %entry
1911; CHECK-NEXT:    vctp.8 r0
1912; CHECK-NEXT:    vpst
1913; CHECK-NEXT:    vmaxt.u8 q1, q0, q1
1914; CHECK-NEXT:    vmov q0, q1
1915; CHECK-NEXT:    bx lr
1916entry:
1917  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1918  %a1 = icmp ugt <16 x i8> %x, %y
1919  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
1920  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1921  ret <16 x i8> %b
1922}
1923
1924define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
1925; CHECK-LABEL: fcmp_fast_olt_v4f32_y:
1926; CHECK:       @ %bb.0: @ %entry
1927; CHECK-NEXT:    vctp.32 r0
1928; CHECK-NEXT:    vpst
1929; CHECK-NEXT:    vminnmt.f32 q1, q0, q1
1930; CHECK-NEXT:    vmov q0, q1
1931; CHECK-NEXT:    bx lr
1932entry:
1933  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1934  %a1 = fcmp fast olt <4 x float> %x, %y
1935  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
1936  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
1937  ret <4 x float> %b
1938}
1939
1940define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
1941; CHECK-LABEL: fcmp_fast_olt_v8f16_y:
1942; CHECK:       @ %bb.0: @ %entry
1943; CHECK-NEXT:    vctp.16 r0
1944; CHECK-NEXT:    vpst
1945; CHECK-NEXT:    vminnmt.f16 q1, q0, q1
1946; CHECK-NEXT:    vmov q0, q1
1947; CHECK-NEXT:    bx lr
1948entry:
1949  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1950  %a1 = fcmp fast olt <8 x half> %x, %y
1951  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
1952  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
1953  ret <8 x half> %b
1954}
1955
1956define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
1957; CHECK-LABEL: fcmp_fast_ogt_v4f32_y:
1958; CHECK:       @ %bb.0: @ %entry
1959; CHECK-NEXT:    vctp.32 r0
1960; CHECK-NEXT:    vpst
1961; CHECK-NEXT:    vmaxnmt.f32 q1, q0, q1
1962; CHECK-NEXT:    vmov q0, q1
1963; CHECK-NEXT:    bx lr
1964entry:
1965  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1966  %a1 = fcmp fast ogt <4 x float> %x, %y
1967  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
1968  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
1969  ret <4 x float> %b
1970}
1971
1972define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
1973; CHECK-LABEL: fcmp_fast_ogt_v8f16_y:
1974; CHECK:       @ %bb.0: @ %entry
1975; CHECK-NEXT:    vctp.16 r0
1976; CHECK-NEXT:    vpst
1977; CHECK-NEXT:    vmaxnmt.f16 q1, q0, q1
1978; CHECK-NEXT:    vmov q0, q1
1979; CHECK-NEXT:    bx lr
1980entry:
1981  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1982  %a1 = fcmp fast ogt <8 x half> %x, %y
1983  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
1984  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
1985  ret <8 x half> %b
1986}
1987
1988define arm_aapcs_vfpcc <4 x i32> @sadd_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1989; CHECK-LABEL: sadd_sat_v4i32_y:
1990; CHECK:       @ %bb.0: @ %entry
1991; CHECK-NEXT:    vctp.32 r0
1992; CHECK-NEXT:    vpst
1993; CHECK-NEXT:    vqaddt.s32 q1, q0, q1
1994; CHECK-NEXT:    vmov q0, q1
1995; CHECK-NEXT:    bx lr
1996entry:
1997  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1998  %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
1999  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2000  ret <4 x i32> %b
2001}
2002
2003define arm_aapcs_vfpcc <8 x i16> @sadd_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2004; CHECK-LABEL: sadd_sat_v8i16_y:
2005; CHECK:       @ %bb.0: @ %entry
2006; CHECK-NEXT:    vctp.16 r0
2007; CHECK-NEXT:    vpst
2008; CHECK-NEXT:    vqaddt.s16 q1, q0, q1
2009; CHECK-NEXT:    vmov q0, q1
2010; CHECK-NEXT:    bx lr
2011entry:
2012  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2013  %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
2014  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2015  ret <8 x i16> %b
2016}
2017
2018define arm_aapcs_vfpcc <16 x i8> @sadd_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2019; CHECK-LABEL: sadd_sat_v16i8_y:
2020; CHECK:       @ %bb.0: @ %entry
2021; CHECK-NEXT:    vctp.8 r0
2022; CHECK-NEXT:    vpst
2023; CHECK-NEXT:    vqaddt.s8 q1, q0, q1
2024; CHECK-NEXT:    vmov q0, q1
2025; CHECK-NEXT:    bx lr
2026entry:
2027  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2028  %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
2029  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2030  ret <16 x i8> %b
2031}
2032
2033define arm_aapcs_vfpcc <4 x i32> @uadd_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2034; CHECK-LABEL: uadd_sat_v4i32_y:
2035; CHECK:       @ %bb.0: @ %entry
2036; CHECK-NEXT:    vctp.32 r0
2037; CHECK-NEXT:    vpst
2038; CHECK-NEXT:    vqaddt.u32 q1, q0, q1
2039; CHECK-NEXT:    vmov q0, q1
2040; CHECK-NEXT:    bx lr
2041entry:
2042  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2043  %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
2044  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2045  ret <4 x i32> %b
2046}
2047
2048define arm_aapcs_vfpcc <8 x i16> @uadd_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2049; CHECK-LABEL: uadd_sat_v8i16_y:
2050; CHECK:       @ %bb.0: @ %entry
2051; CHECK-NEXT:    vctp.16 r0
2052; CHECK-NEXT:    vpst
2053; CHECK-NEXT:    vqaddt.u16 q1, q0, q1
2054; CHECK-NEXT:    vmov q0, q1
2055; CHECK-NEXT:    bx lr
2056entry:
2057  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2058  %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
2059  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2060  ret <8 x i16> %b
2061}
2062
2063define arm_aapcs_vfpcc <16 x i8> @uadd_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2064; CHECK-LABEL: uadd_sat_v16i8_y:
2065; CHECK:       @ %bb.0: @ %entry
2066; CHECK-NEXT:    vctp.8 r0
2067; CHECK-NEXT:    vpst
2068; CHECK-NEXT:    vqaddt.u8 q1, q0, q1
2069; CHECK-NEXT:    vmov q0, q1
2070; CHECK-NEXT:    bx lr
2071entry:
2072  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2073  %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
2074  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2075  ret <16 x i8> %b
2076}
2077
2078define arm_aapcs_vfpcc <4 x i32> @ssub_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2079; CHECK-LABEL: ssub_sat_v4i32_y:
2080; CHECK:       @ %bb.0: @ %entry
2081; CHECK-NEXT:    vctp.32 r0
2082; CHECK-NEXT:    vpst
2083; CHECK-NEXT:    vqsubt.s32 q1, q0, q1
2084; CHECK-NEXT:    vmov q0, q1
2085; CHECK-NEXT:    bx lr
2086entry:
2087  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2088  %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
2089  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2090  ret <4 x i32> %b
2091}
2092
2093define arm_aapcs_vfpcc <8 x i16> @ssub_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2094; CHECK-LABEL: ssub_sat_v8i16_y:
2095; CHECK:       @ %bb.0: @ %entry
2096; CHECK-NEXT:    vctp.16 r0
2097; CHECK-NEXT:    vpst
2098; CHECK-NEXT:    vqsubt.s16 q1, q0, q1
2099; CHECK-NEXT:    vmov q0, q1
2100; CHECK-NEXT:    bx lr
2101entry:
2102  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2103  %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
2104  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2105  ret <8 x i16> %b
2106}
2107
2108define arm_aapcs_vfpcc <16 x i8> @ssub_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2109; CHECK-LABEL: ssub_sat_v16i8_y:
2110; CHECK:       @ %bb.0: @ %entry
2111; CHECK-NEXT:    vctp.8 r0
2112; CHECK-NEXT:    vpst
2113; CHECK-NEXT:    vqsubt.s8 q1, q0, q1
2114; CHECK-NEXT:    vmov q0, q1
2115; CHECK-NEXT:    bx lr
2116entry:
2117  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2118  %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
2119  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2120  ret <16 x i8> %b
2121}
2122
2123define arm_aapcs_vfpcc <4 x i32> @usub_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2124; CHECK-LABEL: usub_sat_v4i32_y:
2125; CHECK:       @ %bb.0: @ %entry
2126; CHECK-NEXT:    vctp.32 r0
2127; CHECK-NEXT:    vpst
2128; CHECK-NEXT:    vqsubt.u32 q1, q0, q1
2129; CHECK-NEXT:    vmov q0, q1
2130; CHECK-NEXT:    bx lr
2131entry:
2132  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2133  %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
2134  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2135  ret <4 x i32> %b
2136}
2137
2138define arm_aapcs_vfpcc <8 x i16> @usub_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2139; CHECK-LABEL: usub_sat_v8i16_y:
2140; CHECK:       @ %bb.0: @ %entry
2141; CHECK-NEXT:    vctp.16 r0
2142; CHECK-NEXT:    vpst
2143; CHECK-NEXT:    vqsubt.u16 q1, q0, q1
2144; CHECK-NEXT:    vmov q0, q1
2145; CHECK-NEXT:    bx lr
2146entry:
2147  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2148  %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
2149  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2150  ret <8 x i16> %b
2151}
2152
2153define arm_aapcs_vfpcc <16 x i8> @usub_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2154; CHECK-LABEL: usub_sat_v16i8_y:
2155; CHECK:       @ %bb.0: @ %entry
2156; CHECK-NEXT:    vctp.8 r0
2157; CHECK-NEXT:    vpst
2158; CHECK-NEXT:    vqsubt.u8 q1, q0, q1
2159; CHECK-NEXT:    vmov q0, q1
2160; CHECK-NEXT:    bx lr
2161entry:
2162  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2163  %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
2164  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2165  ret <16 x i8> %b
2166}
2167
2168define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2169; CHECK-LABEL: addqr_v4i32_y:
2170; CHECK:       @ %bb.0: @ %entry
2171; CHECK-NEXT:    vdup.32 q1, r0
2172; CHECK-NEXT:    vctp.32 r1
2173; CHECK-NEXT:    vpst
2174; CHECK-NEXT:    vaddt.i32 q1, q0, r0
2175; CHECK-NEXT:    vmov q0, q1
2176; CHECK-NEXT:    bx lr
2177entry:
2178  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2179  %i = insertelement <4 x i32> undef, i32 %y, i32 0
2180  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
2181  %a = add <4 x i32> %x, %ys
2182  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
2183  ret <4 x i32> %b
2184}
2185
2186define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
2187; CHECK-LABEL: addqr_v8i16_y:
2188; CHECK:       @ %bb.0: @ %entry
2189; CHECK-NEXT:    vdup.16 q1, r0
2190; CHECK-NEXT:    vctp.16 r1
2191; CHECK-NEXT:    vpst
2192; CHECK-NEXT:    vaddt.i16 q1, q0, r0
2193; CHECK-NEXT:    vmov q0, q1
2194; CHECK-NEXT:    bx lr
2195entry:
2196  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2197  %i = insertelement <8 x i16> undef, i16 %y, i32 0
2198  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
2199  %a = add <8 x i16> %x, %ys
2200  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
2201  ret <8 x i16> %b
2202}
2203
2204define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
2205; CHECK-LABEL: addqr_v16i8_y:
2206; CHECK:       @ %bb.0: @ %entry
2207; CHECK-NEXT:    vdup.8 q1, r0
2208; CHECK-NEXT:    vctp.8 r1
2209; CHECK-NEXT:    vpst
2210; CHECK-NEXT:    vaddt.i8 q1, q0, r0
2211; CHECK-NEXT:    vmov q0, q1
2212; CHECK-NEXT:    bx lr
2213entry:
2214  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2215  %i = insertelement <16 x i8> undef, i8 %y, i32 0
2216  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
2217  %a = add <16 x i8> %x, %ys
2218  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
2219  ret <16 x i8> %b
2220}
2221
2222define arm_aapcs_vfpcc <4 x i32> @subqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2223; CHECK-LABEL: subqr_v4i32_y:
2224; CHECK:       @ %bb.0: @ %entry
2225; CHECK-NEXT:    vdup.32 q1, r0
2226; CHECK-NEXT:    vctp.32 r1
2227; CHECK-NEXT:    vpst
2228; CHECK-NEXT:    vsubt.i32 q1, q0, r0
2229; CHECK-NEXT:    vmov q0, q1
2230; CHECK-NEXT:    bx lr
2231entry:
2232  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2233  %i = insertelement <4 x i32> undef, i32 %y, i32 0
2234  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
2235  %a = sub <4 x i32> %x, %ys
2236  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
2237  ret <4 x i32> %b
2238}
2239
2240define arm_aapcs_vfpcc <8 x i16> @subqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
2241; CHECK-LABEL: subqr_v8i16_y:
2242; CHECK:       @ %bb.0: @ %entry
2243; CHECK-NEXT:    vdup.16 q1, r0
2244; CHECK-NEXT:    vctp.16 r1
2245; CHECK-NEXT:    vpst
2246; CHECK-NEXT:    vsubt.i16 q1, q0, r0
2247; CHECK-NEXT:    vmov q0, q1
2248; CHECK-NEXT:    bx lr
2249entry:
2250  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2251  %i = insertelement <8 x i16> undef, i16 %y, i32 0
2252  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
2253  %a = sub <8 x i16> %x, %ys
2254  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
2255  ret <8 x i16> %b
2256}
2257
2258define arm_aapcs_vfpcc <16 x i8> @subqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
2259; CHECK-LABEL: subqr_v16i8_y:
2260; CHECK:       @ %bb.0: @ %entry
2261; CHECK-NEXT:    vdup.8 q1, r0
2262; CHECK-NEXT:    vctp.8 r1
2263; CHECK-NEXT:    vpst
2264; CHECK-NEXT:    vsubt.i8 q1, q0, r0
2265; CHECK-NEXT:    vmov q0, q1
2266; CHECK-NEXT:    bx lr
2267entry:
2268  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2269  %i = insertelement <16 x i8> undef, i8 %y, i32 0
2270  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
2271  %a = sub <16 x i8> %x, %ys
2272  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
2273  ret <16 x i8> %b
2274}
2275
2276define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2277; CHECK-LABEL: mulqr_v4i32_y:
2278; CHECK:       @ %bb.0: @ %entry
2279; CHECK-NEXT:    vdup.32 q1, r0
2280; CHECK-NEXT:    vctp.32 r1
2281; CHECK-NEXT:    vpst
2282; CHECK-NEXT:    vmult.i32 q1, q0, r0
2283; CHECK-NEXT:    vmov q0, q1
2284; CHECK-NEXT:    bx lr
2285entry:
2286  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2287  %i = insertelement <4 x i32> undef, i32 %y, i32 0
2288  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
2289  %a = mul <4 x i32> %x, %ys
2290  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
2291  ret <4 x i32> %b
2292}
2293
2294define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
2295; CHECK-LABEL: mulqr_v8i16_y:
2296; CHECK:       @ %bb.0: @ %entry
2297; CHECK-NEXT:    vdup.16 q1, r0
2298; CHECK-NEXT:    vctp.16 r1
2299; CHECK-NEXT:    vpst
2300; CHECK-NEXT:    vmult.i16 q1, q0, r0
2301; CHECK-NEXT:    vmov q0, q1
2302; CHECK-NEXT:    bx lr
2303entry:
2304  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2305  %i = insertelement <8 x i16> undef, i16 %y, i32 0
2306  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
2307  %a = mul <8 x i16> %x, %ys
2308  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
2309  ret <8 x i16> %b
2310}
2311
2312define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
2313; CHECK-LABEL: mulqr_v16i8_y:
2314; CHECK:       @ %bb.0: @ %entry
2315; CHECK-NEXT:    vdup.8 q1, r0
2316; CHECK-NEXT:    vctp.8 r1
2317; CHECK-NEXT:    vpst
2318; CHECK-NEXT:    vmult.i8 q1, q0, r0
2319; CHECK-NEXT:    vmov q0, q1
2320; CHECK-NEXT:    bx lr
2321entry:
2322  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2323  %i = insertelement <16 x i8> undef, i8 %y, i32 0
2324  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
2325  %a = mul <16 x i8> %x, %ys
2326  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
2327  ret <16 x i8> %b
2328}
2329
2330define arm_aapcs_vfpcc <4 x float> @faddqr_v4f32_y(<4 x float> %x, float %y, i32 %n) {
2331; CHECK-LABEL: faddqr_v4f32_y:
2332; CHECK:       @ %bb.0: @ %entry
2333; CHECK-NEXT:    vmov r1, s4
2334; CHECK-NEXT:    vctp.32 r0
2335; CHECK-NEXT:    vdup.32 q1, r1
2336; CHECK-NEXT:    vpst
2337; CHECK-NEXT:    vaddt.f32 q1, q0, r1
2338; CHECK-NEXT:    vmov q0, q1
2339; CHECK-NEXT:    bx lr
2340entry:
2341  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2342  %i = insertelement <4 x float> undef, float %y, i32 0
2343  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
2344  %a = fadd <4 x float> %x, %ys
2345  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %ys
2346  ret <4 x float> %b
2347}
2348
2349define arm_aapcs_vfpcc <8 x half> @faddqr_v8f16_y(<8 x half> %x, half %y, i32 %n) {
2350; CHECK-LABEL: faddqr_v8f16_y:
2351; CHECK:       @ %bb.0: @ %entry
2352; CHECK-NEXT:    vmov.f16 r1, s4
2353; CHECK-NEXT:    vctp.16 r0
2354; CHECK-NEXT:    vdup.16 q1, r1
2355; CHECK-NEXT:    vpst
2356; CHECK-NEXT:    vaddt.f16 q1, q0, r1
2357; CHECK-NEXT:    vmov q0, q1
2358; CHECK-NEXT:    bx lr
2359entry:
2360  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2361  %i = insertelement <8 x half> undef, half %y, i32 0
2362  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
2363  %a = fadd <8 x half> %x, %ys
2364  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %ys
2365  ret <8 x half> %b
2366}
2367
2368define arm_aapcs_vfpcc <4 x float> @fsubqr_v4f32_y(<4 x float> %x, float %y, i32 %n) {
2369; CHECK-LABEL: fsubqr_v4f32_y:
2370; CHECK:       @ %bb.0: @ %entry
2371; CHECK-NEXT:    vmov r1, s4
2372; CHECK-NEXT:    vctp.32 r0
2373; CHECK-NEXT:    vdup.32 q1, r1
2374; CHECK-NEXT:    vpst
2375; CHECK-NEXT:    vsubt.f32 q1, q0, r1
2376; CHECK-NEXT:    vmov q0, q1
2377; CHECK-NEXT:    bx lr
2378entry:
2379  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2380  %i = insertelement <4 x float> undef, float %y, i32 0
2381  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
2382  %a = fsub <4 x float> %x, %ys
2383  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %ys
2384  ret <4 x float> %b
2385}
2386
2387define arm_aapcs_vfpcc <8 x half> @fsubqr_v8f16_y(<8 x half> %x, half %y, i32 %n) {
2388; CHECK-LABEL: fsubqr_v8f16_y:
2389; CHECK:       @ %bb.0: @ %entry
2390; CHECK-NEXT:    vmov.f16 r1, s4
2391; CHECK-NEXT:    vctp.16 r0
2392; CHECK-NEXT:    vdup.16 q1, r1
2393; CHECK-NEXT:    vpst
2394; CHECK-NEXT:    vsubt.f16 q1, q0, r1
2395; CHECK-NEXT:    vmov q0, q1
2396; CHECK-NEXT:    bx lr
2397entry:
2398  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2399  %i = insertelement <8 x half> undef, half %y, i32 0
2400  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
2401  %a = fsub <8 x half> %x, %ys
2402  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %ys
2403  ret <8 x half> %b
2404}
2405
2406define arm_aapcs_vfpcc <4 x float> @fmulqr_v4f32_y(<4 x float> %x, float %y, i32 %n) {
2407; CHECK-LABEL: fmulqr_v4f32_y:
2408; CHECK:       @ %bb.0: @ %entry
2409; CHECK-NEXT:    vmov r1, s4
2410; CHECK-NEXT:    vctp.32 r0
2411; CHECK-NEXT:    vdup.32 q1, r1
2412; CHECK-NEXT:    vpst
2413; CHECK-NEXT:    vmult.f32 q1, q0, r1
2414; CHECK-NEXT:    vmov q0, q1
2415; CHECK-NEXT:    bx lr
2416entry:
2417  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2418  %i = insertelement <4 x float> undef, float %y, i32 0
2419  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
2420  %a = fmul <4 x float> %x, %ys
2421  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %ys
2422  ret <4 x float> %b
2423}
2424
2425define arm_aapcs_vfpcc <8 x half> @fmulqr_v8f16_y(<8 x half> %x, half %y, i32 %n) {
2426; CHECK-LABEL: fmulqr_v8f16_y:
2427; CHECK:       @ %bb.0: @ %entry
2428; CHECK-NEXT:    vmov.f16 r1, s4
2429; CHECK-NEXT:    vctp.16 r0
2430; CHECK-NEXT:    vdup.16 q1, r1
2431; CHECK-NEXT:    vpst
2432; CHECK-NEXT:    vmult.f16 q1, q0, r1
2433; CHECK-NEXT:    vmov q0, q1
2434; CHECK-NEXT:    bx lr
2435entry:
2436  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2437  %i = insertelement <8 x half> undef, half %y, i32 0
2438  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
2439  %a = fmul <8 x half> %x, %ys
2440  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %ys
2441  ret <8 x half> %b
2442}
2443
2444define arm_aapcs_vfpcc <4 x i32> @sadd_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2445; CHECK-LABEL: sadd_satqr_v4i32_y:
2446; CHECK:       @ %bb.0: @ %entry
2447; CHECK-NEXT:    vdup.32 q1, r0
2448; CHECK-NEXT:    vctp.32 r1
2449; CHECK-NEXT:    vpst
2450; CHECK-NEXT:    vqaddt.s32 q1, q0, r0
2451; CHECK-NEXT:    vmov q0, q1
2452; CHECK-NEXT:    bx lr
2453entry:
2454  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2455  %i = insertelement <4 x i32> undef, i32 %y, i32 0
2456  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
2457  %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
2458  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
2459  ret <4 x i32> %b
2460}
2461
2462define arm_aapcs_vfpcc <8 x i16> @sadd_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
2463; CHECK-LABEL: sadd_satqr_v8i16_y:
2464; CHECK:       @ %bb.0: @ %entry
2465; CHECK-NEXT:    vdup.16 q1, r0
2466; CHECK-NEXT:    vctp.16 r1
2467; CHECK-NEXT:    vpst
2468; CHECK-NEXT:    vqaddt.s16 q1, q0, r0
2469; CHECK-NEXT:    vmov q0, q1
2470; CHECK-NEXT:    bx lr
2471entry:
2472  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2473  %i = insertelement <8 x i16> undef, i16 %y, i32 0
2474  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
2475  %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
2476  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
2477  ret <8 x i16> %b
2478}
2479
2480define arm_aapcs_vfpcc <16 x i8> @sadd_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
2481; CHECK-LABEL: sadd_satqr_v16i8_y:
2482; CHECK:       @ %bb.0: @ %entry
2483; CHECK-NEXT:    vdup.8 q1, r0
2484; CHECK-NEXT:    vctp.8 r1
2485; CHECK-NEXT:    vpst
2486; CHECK-NEXT:    vqaddt.s8 q1, q0, r0
2487; CHECK-NEXT:    vmov q0, q1
2488; CHECK-NEXT:    bx lr
2489entry:
2490  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2491  %i = insertelement <16 x i8> undef, i8 %y, i32 0
2492  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
2493  %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
2494  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
2495  ret <16 x i8> %b
2496}
2497
2498define arm_aapcs_vfpcc <4 x i32> @uadd_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2499; CHECK-LABEL: uadd_satqr_v4i32_y:
2500; CHECK:       @ %bb.0: @ %entry
2501; CHECK-NEXT:    vdup.32 q1, r0
2502; CHECK-NEXT:    vctp.32 r1
2503; CHECK-NEXT:    vpst
2504; CHECK-NEXT:    vqaddt.u32 q1, q0, r0
2505; CHECK-NEXT:    vmov q0, q1
2506; CHECK-NEXT:    bx lr
2507entry:
2508  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2509  %i = insertelement <4 x i32> undef, i32 %y, i32 0
2510  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
2511  %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
2512  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
2513  ret <4 x i32> %b
2514}
2515
2516define arm_aapcs_vfpcc <8 x i16> @uadd_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
2517; CHECK-LABEL: uadd_satqr_v8i16_y:
2518; CHECK:       @ %bb.0: @ %entry
2519; CHECK-NEXT:    vdup.16 q1, r0
2520; CHECK-NEXT:    vctp.16 r1
2521; CHECK-NEXT:    vpst
2522; CHECK-NEXT:    vqaddt.u16 q1, q0, r0
2523; CHECK-NEXT:    vmov q0, q1
2524; CHECK-NEXT:    bx lr
2525entry:
2526  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2527  %i = insertelement <8 x i16> undef, i16 %y, i32 0
2528  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
2529  %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
2530  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
2531  ret <8 x i16> %b
2532}
2533
2534define arm_aapcs_vfpcc <16 x i8> @uadd_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
2535; CHECK-LABEL: uadd_satqr_v16i8_y:
2536; CHECK:       @ %bb.0: @ %entry
2537; CHECK-NEXT:    vdup.8 q1, r0
2538; CHECK-NEXT:    vctp.8 r1
2539; CHECK-NEXT:    vpst
2540; CHECK-NEXT:    vqaddt.u8 q1, q0, r0
2541; CHECK-NEXT:    vmov q0, q1
2542; CHECK-NEXT:    bx lr
2543entry:
2544  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2545  %i = insertelement <16 x i8> undef, i8 %y, i32 0
2546  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
2547  %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
2548  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
2549  ret <16 x i8> %b
2550}
2551
2552define arm_aapcs_vfpcc <4 x i32> @ssub_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2553; CHECK-LABEL: ssub_satqr_v4i32_y:
2554; CHECK:       @ %bb.0: @ %entry
2555; CHECK-NEXT:    vdup.32 q1, r0
2556; CHECK-NEXT:    vctp.32 r1
2557; CHECK-NEXT:    vpst
2558; CHECK-NEXT:    vqsubt.s32 q1, q0, r0
2559; CHECK-NEXT:    vmov q0, q1
2560; CHECK-NEXT:    bx lr
2561entry:
2562  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2563  %i = insertelement <4 x i32> undef, i32 %y, i32 0
2564  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
2565  %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
2566  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
2567  ret <4 x i32> %b
2568}
2569
2570define arm_aapcs_vfpcc <8 x i16> @ssub_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
2571; CHECK-LABEL: ssub_satqr_v8i16_y:
2572; CHECK:       @ %bb.0: @ %entry
2573; CHECK-NEXT:    vdup.16 q1, r0
2574; CHECK-NEXT:    vctp.16 r1
2575; CHECK-NEXT:    vpst
2576; CHECK-NEXT:    vqsubt.s16 q1, q0, r0
2577; CHECK-NEXT:    vmov q0, q1
2578; CHECK-NEXT:    bx lr
2579entry:
2580  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2581  %i = insertelement <8 x i16> undef, i16 %y, i32 0
2582  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
2583  %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
2584  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
2585  ret <8 x i16> %b
2586}
2587
2588define arm_aapcs_vfpcc <16 x i8> @ssub_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
2589; CHECK-LABEL: ssub_satqr_v16i8_y:
2590; CHECK:       @ %bb.0: @ %entry
2591; CHECK-NEXT:    vdup.8 q1, r0
2592; CHECK-NEXT:    vctp.8 r1
2593; CHECK-NEXT:    vpst
2594; CHECK-NEXT:    vqsubt.s8 q1, q0, r0
2595; CHECK-NEXT:    vmov q0, q1
2596; CHECK-NEXT:    bx lr
2597entry:
2598  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2599  %i = insertelement <16 x i8> undef, i8 %y, i32 0
2600  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
2601  %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
2602  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
2603  ret <16 x i8> %b
2604}
2605
2606define arm_aapcs_vfpcc <4 x i32> @usub_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2607; CHECK-LABEL: usub_satqr_v4i32_y:
2608; CHECK:       @ %bb.0: @ %entry
2609; CHECK-NEXT:    vdup.32 q1, r0
2610; CHECK-NEXT:    vctp.32 r1
2611; CHECK-NEXT:    vpst
2612; CHECK-NEXT:    vqsubt.u32 q1, q0, r0
2613; CHECK-NEXT:    vmov q0, q1
2614; CHECK-NEXT:    bx lr
2615entry:
2616  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2617  %i = insertelement <4 x i32> undef, i32 %y, i32 0
2618  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
2619  %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
2620  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
2621  ret <4 x i32> %b
2622}
2623
2624define arm_aapcs_vfpcc <8 x i16> @usub_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
2625; CHECK-LABEL: usub_satqr_v8i16_y:
2626; CHECK:       @ %bb.0: @ %entry
2627; CHECK-NEXT:    vdup.16 q1, r0
2628; CHECK-NEXT:    vctp.16 r1
2629; CHECK-NEXT:    vpst
2630; CHECK-NEXT:    vqsubt.u16 q1, q0, r0
2631; CHECK-NEXT:    vmov q0, q1
2632; CHECK-NEXT:    bx lr
2633entry:
2634  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2635  %i = insertelement <8 x i16> undef, i16 %y, i32 0
2636  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
2637  %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
2638  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
2639  ret <8 x i16> %b
2640}
2641
2642define arm_aapcs_vfpcc <16 x i8> @usub_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
2643; CHECK-LABEL: usub_satqr_v16i8_y:
2644; CHECK:       @ %bb.0: @ %entry
2645; CHECK-NEXT:    vdup.8 q1, r0
2646; CHECK-NEXT:    vctp.8 r1
2647; CHECK-NEXT:    vpst
2648; CHECK-NEXT:    vqsubt.u8 q1, q0, r0
2649; CHECK-NEXT:    vmov q0, q1
2650; CHECK-NEXT:    bx lr
2651entry:
2652  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2653  %i = insertelement <16 x i8> undef, i8 %y, i32 0
2654  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
2655  %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
2656  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
2657  ret <16 x i8> %b
2658}
2659
2660declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>)
2661declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>)
2662declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>)
2663declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>)
2664declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>)
2665declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>)
2666declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>)
2667declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>)
2668declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>)
2669declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>)
2670declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>)
2671declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>)
2672
2673declare <16 x i1> @llvm.arm.mve.vctp8(i32)
2674declare <8 x i1> @llvm.arm.mve.vctp16(i32)
2675declare <4 x i1> @llvm.arm.mve.vctp32(i32)
2676