1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK 3 4define arm_aapcs_vfpcc <4 x i32> @add_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 5; CHECK-LABEL: add_v4i32_x: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vctp.32 r0 8; CHECK-NEXT: vpst 9; CHECK-NEXT: vaddt.i32 q0, q0, q1 10; CHECK-NEXT: bx lr 11entry: 12 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 13 %a = add <4 x i32> %x, %y 14 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 15 ret <4 x i32> %b 16} 17 18define arm_aapcs_vfpcc <8 x i16> @add_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 19; CHECK-LABEL: add_v8i16_x: 20; CHECK: @ %bb.0: @ %entry 21; CHECK-NEXT: vctp.16 r0 22; CHECK-NEXT: vpst 23; CHECK-NEXT: vaddt.i16 q0, q0, q1 24; CHECK-NEXT: bx lr 25entry: 26 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 27 %a = add <8 x i16> %x, %y 28 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 29 ret <8 x i16> %b 30} 31 32define arm_aapcs_vfpcc <16 x i8> @add_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 33; CHECK-LABEL: add_v16i8_x: 34; CHECK: @ %bb.0: @ %entry 35; CHECK-NEXT: vctp.8 r0 36; CHECK-NEXT: vpst 37; CHECK-NEXT: vaddt.i8 q0, q0, q1 38; CHECK-NEXT: bx lr 39entry: 40 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 41 %a = add <16 x i8> %x, %y 42 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 43 ret <16 x i8> %b 44} 45 46define arm_aapcs_vfpcc <4 x i32> @sub_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 47; CHECK-LABEL: sub_v4i32_x: 48; CHECK: @ %bb.0: @ %entry 49; CHECK-NEXT: vctp.32 r0 50; CHECK-NEXT: vpst 51; CHECK-NEXT: vsubt.i32 q0, q0, q1 52; CHECK-NEXT: bx lr 53entry: 54 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 55 %a = sub <4 x i32> %x, %y 56 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 57 ret <4 x i32> %b 58} 59 60define arm_aapcs_vfpcc <8 x i16> @sub_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 61; CHECK-LABEL: sub_v8i16_x: 62; CHECK: @ %bb.0: @ %entry 63; CHECK-NEXT: vctp.16 r0 64; CHECK-NEXT: vpst 65; CHECK-NEXT: vsubt.i16 q0, q0, q1 66; CHECK-NEXT: bx lr 67entry: 68 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 69 %a = sub <8 x i16> %x, %y 70 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 71 ret <8 x i16> %b 72} 73 74define arm_aapcs_vfpcc <16 x i8> @sub_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 75; CHECK-LABEL: sub_v16i8_x: 76; CHECK: @ %bb.0: @ %entry 77; CHECK-NEXT: vctp.8 r0 78; CHECK-NEXT: vpst 79; CHECK-NEXT: vsubt.i8 q0, q0, q1 80; CHECK-NEXT: bx lr 81entry: 82 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 83 %a = sub <16 x i8> %x, %y 84 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 85 ret <16 x i8> %b 86} 87 88define arm_aapcs_vfpcc <4 x i32> @mul_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 89; CHECK-LABEL: mul_v4i32_x: 90; CHECK: @ %bb.0: @ %entry 91; CHECK-NEXT: vctp.32 r0 92; CHECK-NEXT: vpst 93; CHECK-NEXT: vmult.i32 q0, q0, q1 94; CHECK-NEXT: bx lr 95entry: 96 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 97 %a = mul <4 x i32> %x, %y 98 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 99 ret <4 x i32> %b 100} 101 102define arm_aapcs_vfpcc <8 x i16> @mul_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 103; CHECK-LABEL: mul_v8i16_x: 104; CHECK: @ %bb.0: @ %entry 105; CHECK-NEXT: vctp.16 r0 106; CHECK-NEXT: vpst 107; CHECK-NEXT: vmult.i16 q0, q0, q1 108; CHECK-NEXT: bx lr 109entry: 110 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 111 %a = mul <8 x i16> %x, %y 112 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 113 ret <8 x i16> %b 114} 115 116define arm_aapcs_vfpcc <16 x i8> @mul_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 117; CHECK-LABEL: mul_v16i8_x: 118; CHECK: @ %bb.0: @ %entry 119; CHECK-NEXT: vctp.8 r0 120; CHECK-NEXT: vpst 121; CHECK-NEXT: vmult.i8 q0, q0, q1 122; CHECK-NEXT: bx lr 123entry: 124 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 125 %a = mul <16 x i8> %x, %y 126 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 127 ret <16 x i8> %b 128} 129 130define arm_aapcs_vfpcc <4 x i32> @and_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 131; CHECK-LABEL: and_v4i32_x: 132; CHECK: @ %bb.0: @ %entry 133; CHECK-NEXT: vctp.32 r0 134; CHECK-NEXT: vpst 135; CHECK-NEXT: vandt q0, q0, q1 136; CHECK-NEXT: bx lr 137entry: 138 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 139 %a = and <4 x i32> %x, %y 140 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 141 ret <4 x i32> %b 142} 143 144define arm_aapcs_vfpcc <8 x i16> @and_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 145; CHECK-LABEL: and_v8i16_x: 146; CHECK: @ %bb.0: @ %entry 147; CHECK-NEXT: vctp.16 r0 148; CHECK-NEXT: vpst 149; CHECK-NEXT: vandt q0, q0, q1 150; CHECK-NEXT: bx lr 151entry: 152 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 153 %a = and <8 x i16> %x, %y 154 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 155 ret <8 x i16> %b 156} 157 158define arm_aapcs_vfpcc <16 x i8> @and_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 159; CHECK-LABEL: and_v16i8_x: 160; CHECK: @ %bb.0: @ %entry 161; CHECK-NEXT: vctp.8 r0 162; CHECK-NEXT: vpst 163; CHECK-NEXT: vandt q0, q0, q1 164; CHECK-NEXT: bx lr 165entry: 166 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 167 %a = and <16 x i8> %x, %y 168 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 169 ret <16 x i8> %b 170} 171 172define arm_aapcs_vfpcc <4 x i32> @or_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 173; CHECK-LABEL: or_v4i32_x: 174; CHECK: @ %bb.0: @ %entry 175; CHECK-NEXT: vctp.32 r0 176; CHECK-NEXT: vpst 177; CHECK-NEXT: vorrt q0, q0, q1 178; CHECK-NEXT: bx lr 179entry: 180 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 181 %a = or <4 x i32> %x, %y 182 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 183 ret <4 x i32> %b 184} 185 186define arm_aapcs_vfpcc <8 x i16> @or_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 187; CHECK-LABEL: or_v8i16_x: 188; CHECK: @ %bb.0: @ %entry 189; CHECK-NEXT: vctp.16 r0 190; CHECK-NEXT: vpst 191; CHECK-NEXT: vorrt q0, q0, q1 192; CHECK-NEXT: bx lr 193entry: 194 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 195 %a = or <8 x i16> %x, %y 196 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 197 ret <8 x i16> %b 198} 199 200define arm_aapcs_vfpcc <16 x i8> @or_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 201; CHECK-LABEL: or_v16i8_x: 202; CHECK: @ %bb.0: @ %entry 203; CHECK-NEXT: vctp.8 r0 204; CHECK-NEXT: vpst 205; CHECK-NEXT: vorrt q0, q0, q1 206; CHECK-NEXT: bx lr 207entry: 208 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 209 %a = or <16 x i8> %x, %y 210 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 211 ret <16 x i8> %b 212} 213 214define arm_aapcs_vfpcc <4 x i32> @xor_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 215; CHECK-LABEL: xor_v4i32_x: 216; CHECK: @ %bb.0: @ %entry 217; CHECK-NEXT: vctp.32 r0 218; CHECK-NEXT: vpst 219; CHECK-NEXT: veort q0, q0, q1 220; CHECK-NEXT: bx lr 221entry: 222 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 223 %a = xor <4 x i32> %x, %y 224 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 225 ret <4 x i32> %b 226} 227 228define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 229; CHECK-LABEL: xor_v8i16_x: 230; CHECK: @ %bb.0: @ %entry 231; CHECK-NEXT: vctp.16 r0 232; CHECK-NEXT: vpst 233; CHECK-NEXT: veort q0, q0, q1 234; CHECK-NEXT: bx lr 235entry: 236 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 237 %a = xor <8 x i16> %x, %y 238 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 239 ret <8 x i16> %b 240} 241 242define arm_aapcs_vfpcc <16 x i8> @xor_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 243; CHECK-LABEL: xor_v16i8_x: 244; CHECK: @ %bb.0: @ %entry 245; CHECK-NEXT: vctp.8 r0 246; CHECK-NEXT: vpst 247; CHECK-NEXT: veort q0, q0, q1 248; CHECK-NEXT: bx lr 249entry: 250 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 251 %a = xor <16 x i8> %x, %y 252 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 253 ret <16 x i8> %b 254} 255 256define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 257; CHECK-LABEL: andnot_v4i32_x: 258; CHECK: @ %bb.0: @ %entry 259; CHECK-NEXT: vctp.32 r0 260; CHECK-NEXT: vpst 261; CHECK-NEXT: vbict q0, q0, q1 262; CHECK-NEXT: bx lr 263entry: 264 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 265 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 266 %a = and <4 x i32> %x, %y1 267 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 268 ret <4 x i32> %b 269} 270 271define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 272; CHECK-LABEL: andnot_v8i16_x: 273; CHECK: @ %bb.0: @ %entry 274; CHECK-NEXT: vctp.16 r0 275; CHECK-NEXT: vpst 276; CHECK-NEXT: vbict q0, q0, q1 277; CHECK-NEXT: bx lr 278entry: 279 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 280 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 281 %a = and <8 x i16> %x, %y1 282 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 283 ret <8 x i16> %b 284} 285 286define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 287; CHECK-LABEL: andnot_v16i8_x: 288; CHECK: @ %bb.0: @ %entry 289; CHECK-NEXT: vctp.8 r0 290; CHECK-NEXT: vpst 291; CHECK-NEXT: vbict q0, q0, q1 292; CHECK-NEXT: bx lr 293entry: 294 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 295 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 296 %a = and <16 x i8> %x, %y1 297 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 298 ret <16 x i8> %b 299} 300 301define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 302; CHECK-LABEL: ornot_v4i32_x: 303; CHECK: @ %bb.0: @ %entry 304; CHECK-NEXT: vctp.32 r0 305; CHECK-NEXT: vpst 306; CHECK-NEXT: vornt q0, q0, q1 307; CHECK-NEXT: bx lr 308entry: 309 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 310 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 311 %a = or <4 x i32> %x, %y1 312 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 313 ret <4 x i32> %b 314} 315 316define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 317; CHECK-LABEL: ornot_v8i16_x: 318; CHECK: @ %bb.0: @ %entry 319; CHECK-NEXT: vctp.16 r0 320; CHECK-NEXT: vpst 321; CHECK-NEXT: vornt q0, q0, q1 322; CHECK-NEXT: bx lr 323entry: 324 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 325 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 326 %a = or <8 x i16> %x, %y1 327 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 328 ret <8 x i16> %b 329} 330 331define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 332; CHECK-LABEL: ornot_v16i8_x: 333; CHECK: @ %bb.0: @ %entry 334; CHECK-NEXT: vctp.8 r0 335; CHECK-NEXT: vpst 336; CHECK-NEXT: vornt q0, q0, q1 337; CHECK-NEXT: bx lr 338entry: 339 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 340 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 341 %a = or <16 x i8> %x, %y1 342 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 343 ret <16 x i8> %b 344} 345 346define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 347; CHECK-LABEL: fadd_v4f32_x: 348; CHECK: @ %bb.0: @ %entry 349; CHECK-NEXT: vctp.32 r0 350; CHECK-NEXT: vpst 351; CHECK-NEXT: vaddt.f32 q0, q0, q1 352; CHECK-NEXT: bx lr 353entry: 354 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 355 %a = fadd <4 x float> %x, %y 356 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 357 ret <4 x float> %b 358} 359 360define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 361; CHECK-LABEL: fadd_v8f16_x: 362; CHECK: @ %bb.0: @ %entry 363; CHECK-NEXT: vctp.16 r0 364; CHECK-NEXT: vpst 365; CHECK-NEXT: vaddt.f16 q0, q0, q1 366; CHECK-NEXT: bx lr 367entry: 368 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 369 %a = fadd <8 x half> %x, %y 370 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 371 ret <8 x half> %b 372} 373 374define arm_aapcs_vfpcc <4 x float> @fsub_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 375; CHECK-LABEL: fsub_v4f32_x: 376; CHECK: @ %bb.0: @ %entry 377; CHECK-NEXT: vctp.32 r0 378; CHECK-NEXT: vpst 379; CHECK-NEXT: vsubt.f32 q0, q0, q1 380; CHECK-NEXT: bx lr 381entry: 382 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 383 %a = fsub <4 x float> %x, %y 384 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 385 ret <4 x float> %b 386} 387 388define arm_aapcs_vfpcc <8 x half> @fsub_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 389; CHECK-LABEL: fsub_v8f16_x: 390; CHECK: @ %bb.0: @ %entry 391; CHECK-NEXT: vctp.16 r0 392; CHECK-NEXT: vpst 393; CHECK-NEXT: vsubt.f16 q0, q0, q1 394; CHECK-NEXT: bx lr 395entry: 396 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 397 %a = fsub <8 x half> %x, %y 398 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 399 ret <8 x half> %b 400} 401 402define arm_aapcs_vfpcc <4 x float> @fmul_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 403; CHECK-LABEL: fmul_v4f32_x: 404; CHECK: @ %bb.0: @ %entry 405; CHECK-NEXT: vctp.32 r0 406; CHECK-NEXT: vpst 407; CHECK-NEXT: vmult.f32 q0, q0, q1 408; CHECK-NEXT: bx lr 409entry: 410 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 411 %a = fmul <4 x float> %x, %y 412 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 413 ret <4 x float> %b 414} 415 416define arm_aapcs_vfpcc <8 x half> @fmul_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 417; CHECK-LABEL: fmul_v8f16_x: 418; CHECK: @ %bb.0: @ %entry 419; CHECK-NEXT: vctp.16 r0 420; CHECK-NEXT: vpst 421; CHECK-NEXT: vmult.f16 q0, q0, q1 422; CHECK-NEXT: bx lr 423entry: 424 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 425 %a = fmul <8 x half> %x, %y 426 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 427 ret <8 x half> %b 428} 429 430define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 431; CHECK-LABEL: icmp_slt_v4i32_x: 432; CHECK: @ %bb.0: @ %entry 433; CHECK-NEXT: vctp.32 r0 434; CHECK-NEXT: vpst 435; CHECK-NEXT: vmint.s32 q0, q0, q1 436; CHECK-NEXT: bx lr 437entry: 438 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 439 %a1 = icmp slt <4 x i32> %x, %y 440 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 441 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 442 ret <4 x i32> %b 443} 444 445define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 446; CHECK-LABEL: icmp_slt_v8i16_x: 447; CHECK: @ %bb.0: @ %entry 448; CHECK-NEXT: vctp.16 r0 449; CHECK-NEXT: vpst 450; CHECK-NEXT: vmint.s16 q0, q0, q1 451; CHECK-NEXT: bx lr 452entry: 453 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 454 %a1 = icmp slt <8 x i16> %x, %y 455 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 456 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 457 ret <8 x i16> %b 458} 459 460define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 461; CHECK-LABEL: icmp_slt_v16i8_x: 462; CHECK: @ %bb.0: @ %entry 463; CHECK-NEXT: vctp.8 r0 464; CHECK-NEXT: vpst 465; CHECK-NEXT: vmint.s8 q0, q0, q1 466; CHECK-NEXT: bx lr 467entry: 468 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 469 %a1 = icmp slt <16 x i8> %x, %y 470 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 471 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 472 ret <16 x i8> %b 473} 474 475define arm_aapcs_vfpcc <4 x i32> @icmp_sgt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 476; CHECK-LABEL: icmp_sgt_v4i32_x: 477; CHECK: @ %bb.0: @ %entry 478; CHECK-NEXT: vctp.32 r0 479; CHECK-NEXT: vpst 480; CHECK-NEXT: vmaxt.s32 q0, q0, q1 481; CHECK-NEXT: bx lr 482entry: 483 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 484 %a1 = icmp sgt <4 x i32> %x, %y 485 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 486 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 487 ret <4 x i32> %b 488} 489 490define arm_aapcs_vfpcc <8 x i16> @icmp_sgt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 491; CHECK-LABEL: icmp_sgt_v8i16_x: 492; CHECK: @ %bb.0: @ %entry 493; CHECK-NEXT: vctp.16 r0 494; CHECK-NEXT: vpst 495; CHECK-NEXT: vmaxt.s16 q0, q0, q1 496; CHECK-NEXT: bx lr 497entry: 498 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 499 %a1 = icmp sgt <8 x i16> %x, %y 500 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 501 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 502 ret <8 x i16> %b 503} 504 505define arm_aapcs_vfpcc <16 x i8> @icmp_sgt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 506; CHECK-LABEL: icmp_sgt_v16i8_x: 507; CHECK: @ %bb.0: @ %entry 508; CHECK-NEXT: vctp.8 r0 509; CHECK-NEXT: vpst 510; CHECK-NEXT: vmaxt.s8 q0, q0, q1 511; CHECK-NEXT: bx lr 512entry: 513 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 514 %a1 = icmp sgt <16 x i8> %x, %y 515 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 516 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 517 ret <16 x i8> %b 518} 519 520define arm_aapcs_vfpcc <4 x i32> @icmp_ult_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 521; CHECK-LABEL: icmp_ult_v4i32_x: 522; CHECK: @ %bb.0: @ %entry 523; CHECK-NEXT: vctp.32 r0 524; CHECK-NEXT: vpst 525; CHECK-NEXT: vmint.u32 q0, q0, q1 526; CHECK-NEXT: bx lr 527entry: 528 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 529 %a1 = icmp ult <4 x i32> %x, %y 530 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 531 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 532 ret <4 x i32> %b 533} 534 535define arm_aapcs_vfpcc <8 x i16> @icmp_ult_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 536; CHECK-LABEL: icmp_ult_v8i16_x: 537; CHECK: @ %bb.0: @ %entry 538; CHECK-NEXT: vctp.16 r0 539; CHECK-NEXT: vpst 540; CHECK-NEXT: vmint.u16 q0, q0, q1 541; CHECK-NEXT: bx lr 542entry: 543 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 544 %a1 = icmp ult <8 x i16> %x, %y 545 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 546 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 547 ret <8 x i16> %b 548} 549 550define arm_aapcs_vfpcc <16 x i8> @icmp_ult_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 551; CHECK-LABEL: icmp_ult_v16i8_x: 552; CHECK: @ %bb.0: @ %entry 553; CHECK-NEXT: vctp.8 r0 554; CHECK-NEXT: vpst 555; CHECK-NEXT: vmint.u8 q0, q0, q1 556; CHECK-NEXT: bx lr 557entry: 558 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 559 %a1 = icmp ult <16 x i8> %x, %y 560 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 561 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 562 ret <16 x i8> %b 563} 564 565define arm_aapcs_vfpcc <4 x i32> @icmp_ugt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 566; CHECK-LABEL: icmp_ugt_v4i32_x: 567; CHECK: @ %bb.0: @ %entry 568; CHECK-NEXT: vctp.32 r0 569; CHECK-NEXT: vpst 570; CHECK-NEXT: vmaxt.u32 q0, q0, q1 571; CHECK-NEXT: bx lr 572entry: 573 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 574 %a1 = icmp ugt <4 x i32> %x, %y 575 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 576 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 577 ret <4 x i32> %b 578} 579 580define arm_aapcs_vfpcc <8 x i16> @icmp_ugt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 581; CHECK-LABEL: icmp_ugt_v8i16_x: 582; CHECK: @ %bb.0: @ %entry 583; CHECK-NEXT: vctp.16 r0 584; CHECK-NEXT: vpst 585; CHECK-NEXT: vmaxt.u16 q0, q0, q1 586; CHECK-NEXT: bx lr 587entry: 588 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 589 %a1 = icmp ugt <8 x i16> %x, %y 590 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 591 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 592 ret <8 x i16> %b 593} 594 595define arm_aapcs_vfpcc <16 x i8> @icmp_ugt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 596; CHECK-LABEL: icmp_ugt_v16i8_x: 597; CHECK: @ %bb.0: @ %entry 598; CHECK-NEXT: vctp.8 r0 599; CHECK-NEXT: vpst 600; CHECK-NEXT: vmaxt.u8 q0, q0, q1 601; CHECK-NEXT: bx lr 602entry: 603 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 604 %a1 = icmp ugt <16 x i8> %x, %y 605 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 606 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 607 ret <16 x i8> %b 608} 609 610define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 611; CHECK-LABEL: fcmp_fast_olt_v4f32_x: 612; CHECK: @ %bb.0: @ %entry 613; CHECK-NEXT: vctp.32 r0 614; CHECK-NEXT: vpst 615; CHECK-NEXT: vminnmt.f32 q0, q0, q1 616; CHECK-NEXT: bx lr 617entry: 618 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 619 %a1 = fcmp fast olt <4 x float> %x, %y 620 %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y 621 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 622 ret <4 x float> %b 623} 624 625define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 626; CHECK-LABEL: fcmp_fast_olt_v8f16_x: 627; CHECK: @ %bb.0: @ %entry 628; CHECK-NEXT: vctp.16 r0 629; CHECK-NEXT: vpst 630; CHECK-NEXT: vminnmt.f16 q0, q0, q1 631; CHECK-NEXT: bx lr 632entry: 633 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 634 %a1 = fcmp fast olt <8 x half> %x, %y 635 %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y 636 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 637 ret <8 x half> %b 638} 639 640define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 641; CHECK-LABEL: fcmp_fast_ogt_v4f32_x: 642; CHECK: @ %bb.0: @ %entry 643; CHECK-NEXT: vctp.32 r0 644; CHECK-NEXT: vpst 645; CHECK-NEXT: vmaxnmt.f32 q0, q0, q1 646; CHECK-NEXT: bx lr 647entry: 648 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 649 %a1 = fcmp fast ogt <4 x float> %x, %y 650 %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y 651 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 652 ret <4 x float> %b 653} 654 655define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 656; CHECK-LABEL: fcmp_fast_ogt_v8f16_x: 657; CHECK: @ %bb.0: @ %entry 658; CHECK-NEXT: vctp.16 r0 659; CHECK-NEXT: vpst 660; CHECK-NEXT: vmaxnmt.f16 q0, q0, q1 661; CHECK-NEXT: bx lr 662entry: 663 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 664 %a1 = fcmp fast ogt <8 x half> %x, %y 665 %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y 666 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 667 ret <8 x half> %b 668} 669 670define arm_aapcs_vfpcc <4 x i32> @sadd_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 671; CHECK-LABEL: sadd_sat_v4i32_x: 672; CHECK: @ %bb.0: @ %entry 673; CHECK-NEXT: vctp.32 r0 674; CHECK-NEXT: vpst 675; CHECK-NEXT: vqaddt.s32 q0, q0, q1 676; CHECK-NEXT: bx lr 677entry: 678 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 679 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 680 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 681 ret <4 x i32> %b 682} 683 684define arm_aapcs_vfpcc <8 x i16> @sadd_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 685; CHECK-LABEL: sadd_sat_v8i16_x: 686; CHECK: @ %bb.0: @ %entry 687; CHECK-NEXT: vctp.16 r0 688; CHECK-NEXT: vpst 689; CHECK-NEXT: vqaddt.s16 q0, q0, q1 690; CHECK-NEXT: bx lr 691entry: 692 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 693 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 694 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 695 ret <8 x i16> %b 696} 697 698define arm_aapcs_vfpcc <16 x i8> @sadd_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 699; CHECK-LABEL: sadd_sat_v16i8_x: 700; CHECK: @ %bb.0: @ %entry 701; CHECK-NEXT: vctp.8 r0 702; CHECK-NEXT: vpst 703; CHECK-NEXT: vqaddt.s8 q0, q0, q1 704; CHECK-NEXT: bx lr 705entry: 706 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 707 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 708 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 709 ret <16 x i8> %b 710} 711 712define arm_aapcs_vfpcc <4 x i32> @uadd_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 713; CHECK-LABEL: uadd_sat_v4i32_x: 714; CHECK: @ %bb.0: @ %entry 715; CHECK-NEXT: vctp.32 r0 716; CHECK-NEXT: vpst 717; CHECK-NEXT: vqaddt.u32 q0, q0, q1 718; CHECK-NEXT: bx lr 719entry: 720 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 721 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 722 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 723 ret <4 x i32> %b 724} 725 726define arm_aapcs_vfpcc <8 x i16> @uadd_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 727; CHECK-LABEL: uadd_sat_v8i16_x: 728; CHECK: @ %bb.0: @ %entry 729; CHECK-NEXT: vctp.16 r0 730; CHECK-NEXT: vpst 731; CHECK-NEXT: vqaddt.u16 q0, q0, q1 732; CHECK-NEXT: bx lr 733entry: 734 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 735 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 736 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 737 ret <8 x i16> %b 738} 739 740define arm_aapcs_vfpcc <16 x i8> @uadd_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 741; CHECK-LABEL: uadd_sat_v16i8_x: 742; CHECK: @ %bb.0: @ %entry 743; CHECK-NEXT: vctp.8 r0 744; CHECK-NEXT: vpst 745; CHECK-NEXT: vqaddt.u8 q0, q0, q1 746; CHECK-NEXT: bx lr 747entry: 748 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 749 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 750 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 751 ret <16 x i8> %b 752} 753 754define arm_aapcs_vfpcc <4 x i32> @ssub_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 755; CHECK-LABEL: ssub_sat_v4i32_x: 756; CHECK: @ %bb.0: @ %entry 757; CHECK-NEXT: vctp.32 r0 758; CHECK-NEXT: vpst 759; CHECK-NEXT: vqsubt.s32 q0, q0, q1 760; CHECK-NEXT: bx lr 761entry: 762 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 763 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 764 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 765 ret <4 x i32> %b 766} 767 768define arm_aapcs_vfpcc <8 x i16> @ssub_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 769; CHECK-LABEL: ssub_sat_v8i16_x: 770; CHECK: @ %bb.0: @ %entry 771; CHECK-NEXT: vctp.16 r0 772; CHECK-NEXT: vpst 773; CHECK-NEXT: vqsubt.s16 q0, q0, q1 774; CHECK-NEXT: bx lr 775entry: 776 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 777 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 778 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 779 ret <8 x i16> %b 780} 781 782define arm_aapcs_vfpcc <16 x i8> @ssub_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 783; CHECK-LABEL: ssub_sat_v16i8_x: 784; CHECK: @ %bb.0: @ %entry 785; CHECK-NEXT: vctp.8 r0 786; CHECK-NEXT: vpst 787; CHECK-NEXT: vqsubt.s8 q0, q0, q1 788; CHECK-NEXT: bx lr 789entry: 790 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 791 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 792 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 793 ret <16 x i8> %b 794} 795 796define arm_aapcs_vfpcc <4 x i32> @usub_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 797; CHECK-LABEL: usub_sat_v4i32_x: 798; CHECK: @ %bb.0: @ %entry 799; CHECK-NEXT: vctp.32 r0 800; CHECK-NEXT: vpst 801; CHECK-NEXT: vqsubt.u32 q0, q0, q1 802; CHECK-NEXT: bx lr 803entry: 804 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 805 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 806 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 807 ret <4 x i32> %b 808} 809 810define arm_aapcs_vfpcc <8 x i16> @usub_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 811; CHECK-LABEL: usub_sat_v8i16_x: 812; CHECK: @ %bb.0: @ %entry 813; CHECK-NEXT: vctp.16 r0 814; CHECK-NEXT: vpst 815; CHECK-NEXT: vqsubt.u16 q0, q0, q1 816; CHECK-NEXT: bx lr 817entry: 818 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 819 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 820 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 821 ret <8 x i16> %b 822} 823 824define arm_aapcs_vfpcc <16 x i8> @usub_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 825; CHECK-LABEL: usub_sat_v16i8_x: 826; CHECK: @ %bb.0: @ %entry 827; CHECK-NEXT: vctp.8 r0 828; CHECK-NEXT: vpst 829; CHECK-NEXT: vqsubt.u8 q0, q0, q1 830; CHECK-NEXT: bx lr 831entry: 832 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 833 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 834 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 835 ret <16 x i8> %b 836} 837 838define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 839; CHECK-LABEL: addqr_v4i32_x: 840; CHECK: @ %bb.0: @ %entry 841; CHECK-NEXT: vctp.32 r1 842; CHECK-NEXT: vpst 843; CHECK-NEXT: vaddt.i32 q0, q0, r0 844; CHECK-NEXT: bx lr 845entry: 846 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 847 %i = insertelement <4 x i32> undef, i32 %y, i32 0 848 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 849 %a = add <4 x i32> %x, %ys 850 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 851 ret <4 x i32> %b 852} 853 854define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 855; CHECK-LABEL: addqr_v8i16_x: 856; CHECK: @ %bb.0: @ %entry 857; CHECK-NEXT: vctp.16 r1 858; CHECK-NEXT: vpst 859; CHECK-NEXT: vaddt.i16 q0, q0, r0 860; CHECK-NEXT: bx lr 861entry: 862 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 863 %i = insertelement <8 x i16> undef, i16 %y, i32 0 864 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 865 %a = add <8 x i16> %x, %ys 866 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 867 ret <8 x i16> %b 868} 869 870define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 871; CHECK-LABEL: addqr_v16i8_x: 872; CHECK: @ %bb.0: @ %entry 873; CHECK-NEXT: vctp.8 r1 874; CHECK-NEXT: vpst 875; CHECK-NEXT: vaddt.i8 q0, q0, r0 876; CHECK-NEXT: bx lr 877entry: 878 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 879 %i = insertelement <16 x i8> undef, i8 %y, i32 0 880 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 881 %a = add <16 x i8> %x, %ys 882 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 883 ret <16 x i8> %b 884} 885 886define arm_aapcs_vfpcc <4 x i32> @subqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 887; CHECK-LABEL: subqr_v4i32_x: 888; CHECK: @ %bb.0: @ %entry 889; CHECK-NEXT: vctp.32 r1 890; CHECK-NEXT: vpst 891; CHECK-NEXT: vsubt.i32 q0, q0, r0 892; CHECK-NEXT: bx lr 893entry: 894 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 895 %i = insertelement <4 x i32> undef, i32 %y, i32 0 896 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 897 %a = sub <4 x i32> %x, %ys 898 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 899 ret <4 x i32> %b 900} 901 902define arm_aapcs_vfpcc <8 x i16> @subqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 903; CHECK-LABEL: subqr_v8i16_x: 904; CHECK: @ %bb.0: @ %entry 905; CHECK-NEXT: vctp.16 r1 906; CHECK-NEXT: vpst 907; CHECK-NEXT: vsubt.i16 q0, q0, r0 908; CHECK-NEXT: bx lr 909entry: 910 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 911 %i = insertelement <8 x i16> undef, i16 %y, i32 0 912 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 913 %a = sub <8 x i16> %x, %ys 914 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 915 ret <8 x i16> %b 916} 917 918define arm_aapcs_vfpcc <16 x i8> @subqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 919; CHECK-LABEL: subqr_v16i8_x: 920; CHECK: @ %bb.0: @ %entry 921; CHECK-NEXT: vctp.8 r1 922; CHECK-NEXT: vpst 923; CHECK-NEXT: vsubt.i8 q0, q0, r0 924; CHECK-NEXT: bx lr 925entry: 926 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 927 %i = insertelement <16 x i8> undef, i8 %y, i32 0 928 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 929 %a = sub <16 x i8> %x, %ys 930 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 931 ret <16 x i8> %b 932} 933 934define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 935; CHECK-LABEL: mulqr_v4i32_x: 936; CHECK: @ %bb.0: @ %entry 937; CHECK-NEXT: vctp.32 r1 938; CHECK-NEXT: vpst 939; CHECK-NEXT: vmult.i32 q0, q0, r0 940; CHECK-NEXT: bx lr 941entry: 942 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 943 %i = insertelement <4 x i32> undef, i32 %y, i32 0 944 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 945 %a = mul <4 x i32> %x, %ys 946 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 947 ret <4 x i32> %b 948} 949 950define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 951; CHECK-LABEL: mulqr_v8i16_x: 952; CHECK: @ %bb.0: @ %entry 953; CHECK-NEXT: vctp.16 r1 954; CHECK-NEXT: vpst 955; CHECK-NEXT: vmult.i16 q0, q0, r0 956; CHECK-NEXT: bx lr 957entry: 958 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 959 %i = insertelement <8 x i16> undef, i16 %y, i32 0 960 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 961 %a = mul <8 x i16> %x, %ys 962 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 963 ret <8 x i16> %b 964} 965 966define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 967; CHECK-LABEL: mulqr_v16i8_x: 968; CHECK: @ %bb.0: @ %entry 969; CHECK-NEXT: vctp.8 r1 970; CHECK-NEXT: vpst 971; CHECK-NEXT: vmult.i8 q0, q0, r0 972; CHECK-NEXT: bx lr 973entry: 974 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 975 %i = insertelement <16 x i8> undef, i8 %y, i32 0 976 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 977 %a = mul <16 x i8> %x, %ys 978 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 979 ret <16 x i8> %b 980} 981 982define arm_aapcs_vfpcc <4 x float> @faddqr_v4f32_x(<4 x float> %x, float %y, i32 %n) { 983; CHECK-LABEL: faddqr_v4f32_x: 984; CHECK: @ %bb.0: @ %entry 985; CHECK-NEXT: vmov r1, s4 986; CHECK-NEXT: vctp.32 r0 987; CHECK-NEXT: vpst 988; CHECK-NEXT: vaddt.f32 q0, q0, r1 989; CHECK-NEXT: bx lr 990entry: 991 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 992 %i = insertelement <4 x float> undef, float %y, i32 0 993 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 994 %a = fadd <4 x float> %x, %ys 995 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 996 ret <4 x float> %b 997} 998 999define arm_aapcs_vfpcc <8 x half> @faddqr_v8f16_x(<8 x half> %x, half %y, i32 %n) { 1000; CHECK-LABEL: faddqr_v8f16_x: 1001; CHECK: @ %bb.0: @ %entry 1002; CHECK-NEXT: vmov.f16 r1, s4 1003; CHECK-NEXT: vctp.16 r0 1004; CHECK-NEXT: vpst 1005; CHECK-NEXT: vaddt.f16 q0, q0, r1 1006; CHECK-NEXT: bx lr 1007entry: 1008 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1009 %i = insertelement <8 x half> undef, half %y, i32 0 1010 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 1011 %a = fadd <8 x half> %x, %ys 1012 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 1013 ret <8 x half> %b 1014} 1015 1016define arm_aapcs_vfpcc <4 x float> @fsubqr_v4f32_x(<4 x float> %x, float %y, i32 %n) { 1017; CHECK-LABEL: fsubqr_v4f32_x: 1018; CHECK: @ %bb.0: @ %entry 1019; CHECK-NEXT: vmov r1, s4 1020; CHECK-NEXT: vctp.32 r0 1021; CHECK-NEXT: vpst 1022; CHECK-NEXT: vsubt.f32 q0, q0, r1 1023; CHECK-NEXT: bx lr 1024entry: 1025 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1026 %i = insertelement <4 x float> undef, float %y, i32 0 1027 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 1028 %a = fsub <4 x float> %x, %ys 1029 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 1030 ret <4 x float> %b 1031} 1032 1033define arm_aapcs_vfpcc <8 x half> @fsubqr_v8f16_x(<8 x half> %x, half %y, i32 %n) { 1034; CHECK-LABEL: fsubqr_v8f16_x: 1035; CHECK: @ %bb.0: @ %entry 1036; CHECK-NEXT: vmov.f16 r1, s4 1037; CHECK-NEXT: vctp.16 r0 1038; CHECK-NEXT: vpst 1039; CHECK-NEXT: vsubt.f16 q0, q0, r1 1040; CHECK-NEXT: bx lr 1041entry: 1042 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1043 %i = insertelement <8 x half> undef, half %y, i32 0 1044 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 1045 %a = fsub <8 x half> %x, %ys 1046 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 1047 ret <8 x half> %b 1048} 1049 1050define arm_aapcs_vfpcc <4 x float> @fmulqr_v4f32_x(<4 x float> %x, float %y, i32 %n) { 1051; CHECK-LABEL: fmulqr_v4f32_x: 1052; CHECK: @ %bb.0: @ %entry 1053; CHECK-NEXT: vmov r1, s4 1054; CHECK-NEXT: vctp.32 r0 1055; CHECK-NEXT: vpst 1056; CHECK-NEXT: vmult.f32 q0, q0, r1 1057; CHECK-NEXT: bx lr 1058entry: 1059 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1060 %i = insertelement <4 x float> undef, float %y, i32 0 1061 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 1062 %a = fmul <4 x float> %x, %ys 1063 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 1064 ret <4 x float> %b 1065} 1066 1067define arm_aapcs_vfpcc <8 x half> @fmulqr_v8f16_x(<8 x half> %x, half %y, i32 %n) { 1068; CHECK-LABEL: fmulqr_v8f16_x: 1069; CHECK: @ %bb.0: @ %entry 1070; CHECK-NEXT: vmov.f16 r1, s4 1071; CHECK-NEXT: vctp.16 r0 1072; CHECK-NEXT: vpst 1073; CHECK-NEXT: vmult.f16 q0, q0, r1 1074; CHECK-NEXT: bx lr 1075entry: 1076 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1077 %i = insertelement <8 x half> undef, half %y, i32 0 1078 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 1079 %a = fmul <8 x half> %x, %ys 1080 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 1081 ret <8 x half> %b 1082} 1083 1084define arm_aapcs_vfpcc <4 x i32> @sadd_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1085; CHECK-LABEL: sadd_satqr_v4i32_x: 1086; CHECK: @ %bb.0: @ %entry 1087; CHECK-NEXT: vctp.32 r1 1088; CHECK-NEXT: vpst 1089; CHECK-NEXT: vqaddt.s32 q0, q0, r0 1090; CHECK-NEXT: bx lr 1091entry: 1092 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1093 %i = insertelement <4 x i32> undef, i32 %y, i32 0 1094 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1095 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 1096 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1097 ret <4 x i32> %b 1098} 1099 1100define arm_aapcs_vfpcc <8 x i16> @sadd_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1101; CHECK-LABEL: sadd_satqr_v8i16_x: 1102; CHECK: @ %bb.0: @ %entry 1103; CHECK-NEXT: vctp.16 r1 1104; CHECK-NEXT: vpst 1105; CHECK-NEXT: vqaddt.s16 q0, q0, r0 1106; CHECK-NEXT: bx lr 1107entry: 1108 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1109 %i = insertelement <8 x i16> undef, i16 %y, i32 0 1110 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1111 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 1112 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1113 ret <8 x i16> %b 1114} 1115 1116define arm_aapcs_vfpcc <16 x i8> @sadd_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1117; CHECK-LABEL: sadd_satqr_v16i8_x: 1118; CHECK: @ %bb.0: @ %entry 1119; CHECK-NEXT: vctp.8 r1 1120; CHECK-NEXT: vpst 1121; CHECK-NEXT: vqaddt.s8 q0, q0, r0 1122; CHECK-NEXT: bx lr 1123entry: 1124 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1125 %i = insertelement <16 x i8> undef, i8 %y, i32 0 1126 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1127 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 1128 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1129 ret <16 x i8> %b 1130} 1131 1132define arm_aapcs_vfpcc <4 x i32> @uadd_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1133; CHECK-LABEL: uadd_satqr_v4i32_x: 1134; CHECK: @ %bb.0: @ %entry 1135; CHECK-NEXT: vctp.32 r1 1136; CHECK-NEXT: vpst 1137; CHECK-NEXT: vqaddt.u32 q0, q0, r0 1138; CHECK-NEXT: bx lr 1139entry: 1140 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1141 %i = insertelement <4 x i32> undef, i32 %y, i32 0 1142 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1143 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 1144 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1145 ret <4 x i32> %b 1146} 1147 1148define arm_aapcs_vfpcc <8 x i16> @uadd_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1149; CHECK-LABEL: uadd_satqr_v8i16_x: 1150; CHECK: @ %bb.0: @ %entry 1151; CHECK-NEXT: vctp.16 r1 1152; CHECK-NEXT: vpst 1153; CHECK-NEXT: vqaddt.u16 q0, q0, r0 1154; CHECK-NEXT: bx lr 1155entry: 1156 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1157 %i = insertelement <8 x i16> undef, i16 %y, i32 0 1158 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1159 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 1160 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1161 ret <8 x i16> %b 1162} 1163 1164define arm_aapcs_vfpcc <16 x i8> @uadd_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1165; CHECK-LABEL: uadd_satqr_v16i8_x: 1166; CHECK: @ %bb.0: @ %entry 1167; CHECK-NEXT: vctp.8 r1 1168; CHECK-NEXT: vpst 1169; CHECK-NEXT: vqaddt.u8 q0, q0, r0 1170; CHECK-NEXT: bx lr 1171entry: 1172 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1173 %i = insertelement <16 x i8> undef, i8 %y, i32 0 1174 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1175 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 1176 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1177 ret <16 x i8> %b 1178} 1179 1180define arm_aapcs_vfpcc <4 x i32> @ssub_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1181; CHECK-LABEL: ssub_satqr_v4i32_x: 1182; CHECK: @ %bb.0: @ %entry 1183; CHECK-NEXT: vctp.32 r1 1184; CHECK-NEXT: vpst 1185; CHECK-NEXT: vqsubt.s32 q0, q0, r0 1186; CHECK-NEXT: bx lr 1187entry: 1188 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1189 %i = insertelement <4 x i32> undef, i32 %y, i32 0 1190 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1191 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 1192 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1193 ret <4 x i32> %b 1194} 1195 1196define arm_aapcs_vfpcc <8 x i16> @ssub_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1197; CHECK-LABEL: ssub_satqr_v8i16_x: 1198; CHECK: @ %bb.0: @ %entry 1199; CHECK-NEXT: vctp.16 r1 1200; CHECK-NEXT: vpst 1201; CHECK-NEXT: vqsubt.s16 q0, q0, r0 1202; CHECK-NEXT: bx lr 1203entry: 1204 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1205 %i = insertelement <8 x i16> undef, i16 %y, i32 0 1206 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1207 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 1208 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1209 ret <8 x i16> %b 1210} 1211 1212define arm_aapcs_vfpcc <16 x i8> @ssub_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1213; CHECK-LABEL: ssub_satqr_v16i8_x: 1214; CHECK: @ %bb.0: @ %entry 1215; CHECK-NEXT: vctp.8 r1 1216; CHECK-NEXT: vpst 1217; CHECK-NEXT: vqsubt.s8 q0, q0, r0 1218; CHECK-NEXT: bx lr 1219entry: 1220 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1221 %i = insertelement <16 x i8> undef, i8 %y, i32 0 1222 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1223 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 1224 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1225 ret <16 x i8> %b 1226} 1227 1228define arm_aapcs_vfpcc <4 x i32> @usub_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1229; CHECK-LABEL: usub_satqr_v4i32_x: 1230; CHECK: @ %bb.0: @ %entry 1231; CHECK-NEXT: vctp.32 r1 1232; CHECK-NEXT: vpst 1233; CHECK-NEXT: vqsubt.u32 q0, q0, r0 1234; CHECK-NEXT: bx lr 1235entry: 1236 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1237 %i = insertelement <4 x i32> undef, i32 %y, i32 0 1238 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1239 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 1240 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1241 ret <4 x i32> %b 1242} 1243 1244define arm_aapcs_vfpcc <8 x i16> @usub_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1245; CHECK-LABEL: usub_satqr_v8i16_x: 1246; CHECK: @ %bb.0: @ %entry 1247; CHECK-NEXT: vctp.16 r1 1248; CHECK-NEXT: vpst 1249; CHECK-NEXT: vqsubt.u16 q0, q0, r0 1250; CHECK-NEXT: bx lr 1251entry: 1252 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1253 %i = insertelement <8 x i16> undef, i16 %y, i32 0 1254 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1255 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 1256 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1257 ret <8 x i16> %b 1258} 1259 1260define arm_aapcs_vfpcc <16 x i8> @usub_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1261; CHECK-LABEL: usub_satqr_v16i8_x: 1262; CHECK: @ %bb.0: @ %entry 1263; CHECK-NEXT: vctp.8 r1 1264; CHECK-NEXT: vpst 1265; CHECK-NEXT: vqsubt.u8 q0, q0, r0 1266; CHECK-NEXT: bx lr 1267entry: 1268 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1269 %i = insertelement <16 x i8> undef, i8 %y, i32 0 1270 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1271 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 1272 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1273 ret <16 x i8> %b 1274} 1275 1276define arm_aapcs_vfpcc <4 x i32> @add_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1277; CHECK-LABEL: add_v4i32_y: 1278; CHECK: @ %bb.0: @ %entry 1279; CHECK-NEXT: vctp.32 r0 1280; CHECK-NEXT: vpst 1281; CHECK-NEXT: vaddt.i32 q1, q0, q1 1282; CHECK-NEXT: vmov q0, q1 1283; CHECK-NEXT: bx lr 1284entry: 1285 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1286 %a = add <4 x i32> %x, %y 1287 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1288 ret <4 x i32> %b 1289} 1290 1291define arm_aapcs_vfpcc <8 x i16> @add_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1292; CHECK-LABEL: add_v8i16_y: 1293; CHECK: @ %bb.0: @ %entry 1294; CHECK-NEXT: vctp.16 r0 1295; CHECK-NEXT: vpst 1296; CHECK-NEXT: vaddt.i16 q1, q0, q1 1297; CHECK-NEXT: vmov q0, q1 1298; CHECK-NEXT: bx lr 1299entry: 1300 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1301 %a = add <8 x i16> %x, %y 1302 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1303 ret <8 x i16> %b 1304} 1305 1306define arm_aapcs_vfpcc <16 x i8> @add_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1307; CHECK-LABEL: add_v16i8_y: 1308; CHECK: @ %bb.0: @ %entry 1309; CHECK-NEXT: vctp.8 r0 1310; CHECK-NEXT: vpst 1311; CHECK-NEXT: vaddt.i8 q1, q0, q1 1312; CHECK-NEXT: vmov q0, q1 1313; CHECK-NEXT: bx lr 1314entry: 1315 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1316 %a = add <16 x i8> %x, %y 1317 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1318 ret <16 x i8> %b 1319} 1320 1321define arm_aapcs_vfpcc <4 x i32> @sub_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1322; CHECK-LABEL: sub_v4i32_y: 1323; CHECK: @ %bb.0: @ %entry 1324; CHECK-NEXT: vctp.32 r0 1325; CHECK-NEXT: vpst 1326; CHECK-NEXT: vsubt.i32 q1, q0, q1 1327; CHECK-NEXT: vmov q0, q1 1328; CHECK-NEXT: bx lr 1329entry: 1330 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1331 %a = sub <4 x i32> %x, %y 1332 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1333 ret <4 x i32> %b 1334} 1335 1336define arm_aapcs_vfpcc <8 x i16> @sub_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1337; CHECK-LABEL: sub_v8i16_y: 1338; CHECK: @ %bb.0: @ %entry 1339; CHECK-NEXT: vctp.16 r0 1340; CHECK-NEXT: vpst 1341; CHECK-NEXT: vsubt.i16 q1, q0, q1 1342; CHECK-NEXT: vmov q0, q1 1343; CHECK-NEXT: bx lr 1344entry: 1345 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1346 %a = sub <8 x i16> %x, %y 1347 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1348 ret <8 x i16> %b 1349} 1350 1351define arm_aapcs_vfpcc <16 x i8> @sub_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1352; CHECK-LABEL: sub_v16i8_y: 1353; CHECK: @ %bb.0: @ %entry 1354; CHECK-NEXT: vctp.8 r0 1355; CHECK-NEXT: vpst 1356; CHECK-NEXT: vsubt.i8 q1, q0, q1 1357; CHECK-NEXT: vmov q0, q1 1358; CHECK-NEXT: bx lr 1359entry: 1360 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1361 %a = sub <16 x i8> %x, %y 1362 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1363 ret <16 x i8> %b 1364} 1365 1366define arm_aapcs_vfpcc <4 x i32> @mul_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1367; CHECK-LABEL: mul_v4i32_y: 1368; CHECK: @ %bb.0: @ %entry 1369; CHECK-NEXT: vctp.32 r0 1370; CHECK-NEXT: vpst 1371; CHECK-NEXT: vmult.i32 q1, q0, q1 1372; CHECK-NEXT: vmov q0, q1 1373; CHECK-NEXT: bx lr 1374entry: 1375 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1376 %a = mul <4 x i32> %x, %y 1377 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1378 ret <4 x i32> %b 1379} 1380 1381define arm_aapcs_vfpcc <8 x i16> @mul_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1382; CHECK-LABEL: mul_v8i16_y: 1383; CHECK: @ %bb.0: @ %entry 1384; CHECK-NEXT: vctp.16 r0 1385; CHECK-NEXT: vpst 1386; CHECK-NEXT: vmult.i16 q1, q0, q1 1387; CHECK-NEXT: vmov q0, q1 1388; CHECK-NEXT: bx lr 1389entry: 1390 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1391 %a = mul <8 x i16> %x, %y 1392 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1393 ret <8 x i16> %b 1394} 1395 1396define arm_aapcs_vfpcc <16 x i8> @mul_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1397; CHECK-LABEL: mul_v16i8_y: 1398; CHECK: @ %bb.0: @ %entry 1399; CHECK-NEXT: vctp.8 r0 1400; CHECK-NEXT: vpst 1401; CHECK-NEXT: vmult.i8 q1, q0, q1 1402; CHECK-NEXT: vmov q0, q1 1403; CHECK-NEXT: bx lr 1404entry: 1405 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1406 %a = mul <16 x i8> %x, %y 1407 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1408 ret <16 x i8> %b 1409} 1410 1411define arm_aapcs_vfpcc <4 x i32> @and_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1412; CHECK-LABEL: and_v4i32_y: 1413; CHECK: @ %bb.0: @ %entry 1414; CHECK-NEXT: vctp.32 r0 1415; CHECK-NEXT: vpst 1416; CHECK-NEXT: vandt q1, q0, q1 1417; CHECK-NEXT: vmov q0, q1 1418; CHECK-NEXT: bx lr 1419entry: 1420 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1421 %a = and <4 x i32> %x, %y 1422 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1423 ret <4 x i32> %b 1424} 1425 1426define arm_aapcs_vfpcc <8 x i16> @and_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1427; CHECK-LABEL: and_v8i16_y: 1428; CHECK: @ %bb.0: @ %entry 1429; CHECK-NEXT: vctp.16 r0 1430; CHECK-NEXT: vpst 1431; CHECK-NEXT: vandt q1, q0, q1 1432; CHECK-NEXT: vmov q0, q1 1433; CHECK-NEXT: bx lr 1434entry: 1435 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1436 %a = and <8 x i16> %x, %y 1437 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1438 ret <8 x i16> %b 1439} 1440 1441define arm_aapcs_vfpcc <16 x i8> @and_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1442; CHECK-LABEL: and_v16i8_y: 1443; CHECK: @ %bb.0: @ %entry 1444; CHECK-NEXT: vctp.8 r0 1445; CHECK-NEXT: vpst 1446; CHECK-NEXT: vandt q1, q0, q1 1447; CHECK-NEXT: vmov q0, q1 1448; CHECK-NEXT: bx lr 1449entry: 1450 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1451 %a = and <16 x i8> %x, %y 1452 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1453 ret <16 x i8> %b 1454} 1455 1456define arm_aapcs_vfpcc <4 x i32> @or_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1457; CHECK-LABEL: or_v4i32_y: 1458; CHECK: @ %bb.0: @ %entry 1459; CHECK-NEXT: vctp.32 r0 1460; CHECK-NEXT: vpst 1461; CHECK-NEXT: vorrt q1, q0, q1 1462; CHECK-NEXT: vmov q0, q1 1463; CHECK-NEXT: bx lr 1464entry: 1465 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1466 %a = or <4 x i32> %x, %y 1467 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1468 ret <4 x i32> %b 1469} 1470 1471define arm_aapcs_vfpcc <8 x i16> @or_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1472; CHECK-LABEL: or_v8i16_y: 1473; CHECK: @ %bb.0: @ %entry 1474; CHECK-NEXT: vctp.16 r0 1475; CHECK-NEXT: vpst 1476; CHECK-NEXT: vorrt q1, q0, q1 1477; CHECK-NEXT: vmov q0, q1 1478; CHECK-NEXT: bx lr 1479entry: 1480 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1481 %a = or <8 x i16> %x, %y 1482 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1483 ret <8 x i16> %b 1484} 1485 1486define arm_aapcs_vfpcc <16 x i8> @or_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1487; CHECK-LABEL: or_v16i8_y: 1488; CHECK: @ %bb.0: @ %entry 1489; CHECK-NEXT: vctp.8 r0 1490; CHECK-NEXT: vpst 1491; CHECK-NEXT: vorrt q1, q0, q1 1492; CHECK-NEXT: vmov q0, q1 1493; CHECK-NEXT: bx lr 1494entry: 1495 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1496 %a = or <16 x i8> %x, %y 1497 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1498 ret <16 x i8> %b 1499} 1500 1501define arm_aapcs_vfpcc <4 x i32> @xor_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1502; CHECK-LABEL: xor_v4i32_y: 1503; CHECK: @ %bb.0: @ %entry 1504; CHECK-NEXT: vctp.32 r0 1505; CHECK-NEXT: vpst 1506; CHECK-NEXT: veort q1, q0, q1 1507; CHECK-NEXT: vmov q0, q1 1508; CHECK-NEXT: bx lr 1509entry: 1510 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1511 %a = xor <4 x i32> %x, %y 1512 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1513 ret <4 x i32> %b 1514} 1515 1516define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1517; CHECK-LABEL: xor_v8i16_y: 1518; CHECK: @ %bb.0: @ %entry 1519; CHECK-NEXT: vctp.16 r0 1520; CHECK-NEXT: vpst 1521; CHECK-NEXT: veort q1, q0, q1 1522; CHECK-NEXT: vmov q0, q1 1523; CHECK-NEXT: bx lr 1524entry: 1525 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1526 %a = xor <8 x i16> %x, %y 1527 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1528 ret <8 x i16> %b 1529} 1530 1531define arm_aapcs_vfpcc <16 x i8> @xor_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1532; CHECK-LABEL: xor_v16i8_y: 1533; CHECK: @ %bb.0: @ %entry 1534; CHECK-NEXT: vctp.8 r0 1535; CHECK-NEXT: vpst 1536; CHECK-NEXT: veort q1, q0, q1 1537; CHECK-NEXT: vmov q0, q1 1538; CHECK-NEXT: bx lr 1539entry: 1540 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1541 %a = xor <16 x i8> %x, %y 1542 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1543 ret <16 x i8> %b 1544} 1545 1546define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1547; CHECK-LABEL: andnot_v4i32_y: 1548; CHECK: @ %bb.0: @ %entry 1549; CHECK-NEXT: vctp.32 r0 1550; CHECK-NEXT: vpst 1551; CHECK-NEXT: vbict q1, q0, q1 1552; CHECK-NEXT: vmov q0, q1 1553; CHECK-NEXT: bx lr 1554entry: 1555 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1556 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 1557 %a = and <4 x i32> %x, %y1 1558 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1559 ret <4 x i32> %b 1560} 1561 1562define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1563; CHECK-LABEL: andnot_v8i16_y: 1564; CHECK: @ %bb.0: @ %entry 1565; CHECK-NEXT: vctp.16 r0 1566; CHECK-NEXT: vpst 1567; CHECK-NEXT: vbict q1, q0, q1 1568; CHECK-NEXT: vmov q0, q1 1569; CHECK-NEXT: bx lr 1570entry: 1571 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1572 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1573 %a = and <8 x i16> %x, %y1 1574 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1575 ret <8 x i16> %b 1576} 1577 1578define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1579; CHECK-LABEL: andnot_v16i8_y: 1580; CHECK: @ %bb.0: @ %entry 1581; CHECK-NEXT: vctp.8 r0 1582; CHECK-NEXT: vpst 1583; CHECK-NEXT: vbict q1, q0, q1 1584; CHECK-NEXT: vmov q0, q1 1585; CHECK-NEXT: bx lr 1586entry: 1587 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1588 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1589 %a = and <16 x i8> %x, %y1 1590 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1591 ret <16 x i8> %b 1592} 1593 1594define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1595; CHECK-LABEL: ornot_v4i32_y: 1596; CHECK: @ %bb.0: @ %entry 1597; CHECK-NEXT: vctp.32 r0 1598; CHECK-NEXT: vpst 1599; CHECK-NEXT: vornt q1, q0, q1 1600; CHECK-NEXT: vmov q0, q1 1601; CHECK-NEXT: bx lr 1602entry: 1603 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1604 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 1605 %a = or <4 x i32> %x, %y1 1606 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1607 ret <4 x i32> %b 1608} 1609 1610define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1611; CHECK-LABEL: ornot_v8i16_y: 1612; CHECK: @ %bb.0: @ %entry 1613; CHECK-NEXT: vctp.16 r0 1614; CHECK-NEXT: vpst 1615; CHECK-NEXT: vornt q1, q0, q1 1616; CHECK-NEXT: vmov q0, q1 1617; CHECK-NEXT: bx lr 1618entry: 1619 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1620 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1621 %a = or <8 x i16> %x, %y1 1622 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1623 ret <8 x i16> %b 1624} 1625 1626define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1627; CHECK-LABEL: ornot_v16i8_y: 1628; CHECK: @ %bb.0: @ %entry 1629; CHECK-NEXT: vctp.8 r0 1630; CHECK-NEXT: vpst 1631; CHECK-NEXT: vornt q1, q0, q1 1632; CHECK-NEXT: vmov q0, q1 1633; CHECK-NEXT: bx lr 1634entry: 1635 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1636 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1637 %a = or <16 x i8> %x, %y1 1638 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1639 ret <16 x i8> %b 1640} 1641 1642define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 1643; CHECK-LABEL: fadd_v4f32_y: 1644; CHECK: @ %bb.0: @ %entry 1645; CHECK-NEXT: vctp.32 r0 1646; CHECK-NEXT: vpst 1647; CHECK-NEXT: vaddt.f32 q1, q0, q1 1648; CHECK-NEXT: vmov q0, q1 1649; CHECK-NEXT: bx lr 1650entry: 1651 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1652 %a = fadd <4 x float> %x, %y 1653 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y 1654 ret <4 x float> %b 1655} 1656 1657define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 1658; CHECK-LABEL: fadd_v8f16_y: 1659; CHECK: @ %bb.0: @ %entry 1660; CHECK-NEXT: vctp.16 r0 1661; CHECK-NEXT: vpst 1662; CHECK-NEXT: vaddt.f16 q1, q0, q1 1663; CHECK-NEXT: vmov q0, q1 1664; CHECK-NEXT: bx lr 1665entry: 1666 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1667 %a = fadd <8 x half> %x, %y 1668 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y 1669 ret <8 x half> %b 1670} 1671 1672define arm_aapcs_vfpcc <4 x float> @fsub_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 1673; CHECK-LABEL: fsub_v4f32_y: 1674; CHECK: @ %bb.0: @ %entry 1675; CHECK-NEXT: vctp.32 r0 1676; CHECK-NEXT: vpst 1677; CHECK-NEXT: vsubt.f32 q1, q0, q1 1678; CHECK-NEXT: vmov q0, q1 1679; CHECK-NEXT: bx lr 1680entry: 1681 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1682 %a = fsub <4 x float> %x, %y 1683 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y 1684 ret <4 x float> %b 1685} 1686 1687define arm_aapcs_vfpcc <8 x half> @fsub_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 1688; CHECK-LABEL: fsub_v8f16_y: 1689; CHECK: @ %bb.0: @ %entry 1690; CHECK-NEXT: vctp.16 r0 1691; CHECK-NEXT: vpst 1692; CHECK-NEXT: vsubt.f16 q1, q0, q1 1693; CHECK-NEXT: vmov q0, q1 1694; CHECK-NEXT: bx lr 1695entry: 1696 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1697 %a = fsub <8 x half> %x, %y 1698 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y 1699 ret <8 x half> %b 1700} 1701 1702define arm_aapcs_vfpcc <4 x float> @fmul_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 1703; CHECK-LABEL: fmul_v4f32_y: 1704; CHECK: @ %bb.0: @ %entry 1705; CHECK-NEXT: vctp.32 r0 1706; CHECK-NEXT: vpst 1707; CHECK-NEXT: vmult.f32 q1, q0, q1 1708; CHECK-NEXT: vmov q0, q1 1709; CHECK-NEXT: bx lr 1710entry: 1711 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1712 %a = fmul <4 x float> %x, %y 1713 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y 1714 ret <4 x float> %b 1715} 1716 1717define arm_aapcs_vfpcc <8 x half> @fmul_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 1718; CHECK-LABEL: fmul_v8f16_y: 1719; CHECK: @ %bb.0: @ %entry 1720; CHECK-NEXT: vctp.16 r0 1721; CHECK-NEXT: vpst 1722; CHECK-NEXT: vmult.f16 q1, q0, q1 1723; CHECK-NEXT: vmov q0, q1 1724; CHECK-NEXT: bx lr 1725entry: 1726 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1727 %a = fmul <8 x half> %x, %y 1728 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y 1729 ret <8 x half> %b 1730} 1731 1732define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1733; CHECK-LABEL: icmp_slt_v4i32_y: 1734; CHECK: @ %bb.0: @ %entry 1735; CHECK-NEXT: vctp.32 r0 1736; CHECK-NEXT: vpst 1737; CHECK-NEXT: vmint.s32 q1, q0, q1 1738; CHECK-NEXT: vmov q0, q1 1739; CHECK-NEXT: bx lr 1740entry: 1741 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1742 %a1 = icmp slt <4 x i32> %x, %y 1743 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 1744 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1745 ret <4 x i32> %b 1746} 1747 1748define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1749; CHECK-LABEL: icmp_slt_v8i16_y: 1750; CHECK: @ %bb.0: @ %entry 1751; CHECK-NEXT: vctp.16 r0 1752; CHECK-NEXT: vpst 1753; CHECK-NEXT: vmint.s16 q1, q0, q1 1754; CHECK-NEXT: vmov q0, q1 1755; CHECK-NEXT: bx lr 1756entry: 1757 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1758 %a1 = icmp slt <8 x i16> %x, %y 1759 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 1760 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1761 ret <8 x i16> %b 1762} 1763 1764define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1765; CHECK-LABEL: icmp_slt_v16i8_y: 1766; CHECK: @ %bb.0: @ %entry 1767; CHECK-NEXT: vctp.8 r0 1768; CHECK-NEXT: vpst 1769; CHECK-NEXT: vmint.s8 q1, q0, q1 1770; CHECK-NEXT: vmov q0, q1 1771; CHECK-NEXT: bx lr 1772entry: 1773 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1774 %a1 = icmp slt <16 x i8> %x, %y 1775 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 1776 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1777 ret <16 x i8> %b 1778} 1779 1780define arm_aapcs_vfpcc <4 x i32> @icmp_sgt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1781; CHECK-LABEL: icmp_sgt_v4i32_y: 1782; CHECK: @ %bb.0: @ %entry 1783; CHECK-NEXT: vctp.32 r0 1784; CHECK-NEXT: vpst 1785; CHECK-NEXT: vmaxt.s32 q1, q0, q1 1786; CHECK-NEXT: vmov q0, q1 1787; CHECK-NEXT: bx lr 1788entry: 1789 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1790 %a1 = icmp sgt <4 x i32> %x, %y 1791 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 1792 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1793 ret <4 x i32> %b 1794} 1795 1796define arm_aapcs_vfpcc <8 x i16> @icmp_sgt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1797; CHECK-LABEL: icmp_sgt_v8i16_y: 1798; CHECK: @ %bb.0: @ %entry 1799; CHECK-NEXT: vctp.16 r0 1800; CHECK-NEXT: vpst 1801; CHECK-NEXT: vmaxt.s16 q1, q0, q1 1802; CHECK-NEXT: vmov q0, q1 1803; CHECK-NEXT: bx lr 1804entry: 1805 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1806 %a1 = icmp sgt <8 x i16> %x, %y 1807 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 1808 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1809 ret <8 x i16> %b 1810} 1811 1812define arm_aapcs_vfpcc <16 x i8> @icmp_sgt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1813; CHECK-LABEL: icmp_sgt_v16i8_y: 1814; CHECK: @ %bb.0: @ %entry 1815; CHECK-NEXT: vctp.8 r0 1816; CHECK-NEXT: vpst 1817; CHECK-NEXT: vmaxt.s8 q1, q0, q1 1818; CHECK-NEXT: vmov q0, q1 1819; CHECK-NEXT: bx lr 1820entry: 1821 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1822 %a1 = icmp sgt <16 x i8> %x, %y 1823 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 1824 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1825 ret <16 x i8> %b 1826} 1827 1828define arm_aapcs_vfpcc <4 x i32> @icmp_ult_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1829; CHECK-LABEL: icmp_ult_v4i32_y: 1830; CHECK: @ %bb.0: @ %entry 1831; CHECK-NEXT: vctp.32 r0 1832; CHECK-NEXT: vpst 1833; CHECK-NEXT: vmint.u32 q1, q0, q1 1834; CHECK-NEXT: vmov q0, q1 1835; CHECK-NEXT: bx lr 1836entry: 1837 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1838 %a1 = icmp ult <4 x i32> %x, %y 1839 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 1840 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1841 ret <4 x i32> %b 1842} 1843 1844define arm_aapcs_vfpcc <8 x i16> @icmp_ult_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1845; CHECK-LABEL: icmp_ult_v8i16_y: 1846; CHECK: @ %bb.0: @ %entry 1847; CHECK-NEXT: vctp.16 r0 1848; CHECK-NEXT: vpst 1849; CHECK-NEXT: vmint.u16 q1, q0, q1 1850; CHECK-NEXT: vmov q0, q1 1851; CHECK-NEXT: bx lr 1852entry: 1853 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1854 %a1 = icmp ult <8 x i16> %x, %y 1855 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 1856 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1857 ret <8 x i16> %b 1858} 1859 1860define arm_aapcs_vfpcc <16 x i8> @icmp_ult_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1861; CHECK-LABEL: icmp_ult_v16i8_y: 1862; CHECK: @ %bb.0: @ %entry 1863; CHECK-NEXT: vctp.8 r0 1864; CHECK-NEXT: vpst 1865; CHECK-NEXT: vmint.u8 q1, q0, q1 1866; CHECK-NEXT: vmov q0, q1 1867; CHECK-NEXT: bx lr 1868entry: 1869 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1870 %a1 = icmp ult <16 x i8> %x, %y 1871 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 1872 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1873 ret <16 x i8> %b 1874} 1875 1876define arm_aapcs_vfpcc <4 x i32> @icmp_ugt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1877; CHECK-LABEL: icmp_ugt_v4i32_y: 1878; CHECK: @ %bb.0: @ %entry 1879; CHECK-NEXT: vctp.32 r0 1880; CHECK-NEXT: vpst 1881; CHECK-NEXT: vmaxt.u32 q1, q0, q1 1882; CHECK-NEXT: vmov q0, q1 1883; CHECK-NEXT: bx lr 1884entry: 1885 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1886 %a1 = icmp ugt <4 x i32> %x, %y 1887 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 1888 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1889 ret <4 x i32> %b 1890} 1891 1892define arm_aapcs_vfpcc <8 x i16> @icmp_ugt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1893; CHECK-LABEL: icmp_ugt_v8i16_y: 1894; CHECK: @ %bb.0: @ %entry 1895; CHECK-NEXT: vctp.16 r0 1896; CHECK-NEXT: vpst 1897; CHECK-NEXT: vmaxt.u16 q1, q0, q1 1898; CHECK-NEXT: vmov q0, q1 1899; CHECK-NEXT: bx lr 1900entry: 1901 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1902 %a1 = icmp ugt <8 x i16> %x, %y 1903 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 1904 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1905 ret <8 x i16> %b 1906} 1907 1908define arm_aapcs_vfpcc <16 x i8> @icmp_ugt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1909; CHECK-LABEL: icmp_ugt_v16i8_y: 1910; CHECK: @ %bb.0: @ %entry 1911; CHECK-NEXT: vctp.8 r0 1912; CHECK-NEXT: vpst 1913; CHECK-NEXT: vmaxt.u8 q1, q0, q1 1914; CHECK-NEXT: vmov q0, q1 1915; CHECK-NEXT: bx lr 1916entry: 1917 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1918 %a1 = icmp ugt <16 x i8> %x, %y 1919 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 1920 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1921 ret <16 x i8> %b 1922} 1923 1924define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 1925; CHECK-LABEL: fcmp_fast_olt_v4f32_y: 1926; CHECK: @ %bb.0: @ %entry 1927; CHECK-NEXT: vctp.32 r0 1928; CHECK-NEXT: vpst 1929; CHECK-NEXT: vminnmt.f32 q1, q0, q1 1930; CHECK-NEXT: vmov q0, q1 1931; CHECK-NEXT: bx lr 1932entry: 1933 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1934 %a1 = fcmp fast olt <4 x float> %x, %y 1935 %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y 1936 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y 1937 ret <4 x float> %b 1938} 1939 1940define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 1941; CHECK-LABEL: fcmp_fast_olt_v8f16_y: 1942; CHECK: @ %bb.0: @ %entry 1943; CHECK-NEXT: vctp.16 r0 1944; CHECK-NEXT: vpst 1945; CHECK-NEXT: vminnmt.f16 q1, q0, q1 1946; CHECK-NEXT: vmov q0, q1 1947; CHECK-NEXT: bx lr 1948entry: 1949 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1950 %a1 = fcmp fast olt <8 x half> %x, %y 1951 %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y 1952 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y 1953 ret <8 x half> %b 1954} 1955 1956define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 1957; CHECK-LABEL: fcmp_fast_ogt_v4f32_y: 1958; CHECK: @ %bb.0: @ %entry 1959; CHECK-NEXT: vctp.32 r0 1960; CHECK-NEXT: vpst 1961; CHECK-NEXT: vmaxnmt.f32 q1, q0, q1 1962; CHECK-NEXT: vmov q0, q1 1963; CHECK-NEXT: bx lr 1964entry: 1965 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1966 %a1 = fcmp fast ogt <4 x float> %x, %y 1967 %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y 1968 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y 1969 ret <4 x float> %b 1970} 1971 1972define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 1973; CHECK-LABEL: fcmp_fast_ogt_v8f16_y: 1974; CHECK: @ %bb.0: @ %entry 1975; CHECK-NEXT: vctp.16 r0 1976; CHECK-NEXT: vpst 1977; CHECK-NEXT: vmaxnmt.f16 q1, q0, q1 1978; CHECK-NEXT: vmov q0, q1 1979; CHECK-NEXT: bx lr 1980entry: 1981 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1982 %a1 = fcmp fast ogt <8 x half> %x, %y 1983 %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y 1984 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y 1985 ret <8 x half> %b 1986} 1987 1988define arm_aapcs_vfpcc <4 x i32> @sadd_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1989; CHECK-LABEL: sadd_sat_v4i32_y: 1990; CHECK: @ %bb.0: @ %entry 1991; CHECK-NEXT: vctp.32 r0 1992; CHECK-NEXT: vpst 1993; CHECK-NEXT: vqaddt.s32 q1, q0, q1 1994; CHECK-NEXT: vmov q0, q1 1995; CHECK-NEXT: bx lr 1996entry: 1997 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1998 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 1999 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2000 ret <4 x i32> %b 2001} 2002 2003define arm_aapcs_vfpcc <8 x i16> @sadd_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2004; CHECK-LABEL: sadd_sat_v8i16_y: 2005; CHECK: @ %bb.0: @ %entry 2006; CHECK-NEXT: vctp.16 r0 2007; CHECK-NEXT: vpst 2008; CHECK-NEXT: vqaddt.s16 q1, q0, q1 2009; CHECK-NEXT: vmov q0, q1 2010; CHECK-NEXT: bx lr 2011entry: 2012 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2013 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 2014 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2015 ret <8 x i16> %b 2016} 2017 2018define arm_aapcs_vfpcc <16 x i8> @sadd_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2019; CHECK-LABEL: sadd_sat_v16i8_y: 2020; CHECK: @ %bb.0: @ %entry 2021; CHECK-NEXT: vctp.8 r0 2022; CHECK-NEXT: vpst 2023; CHECK-NEXT: vqaddt.s8 q1, q0, q1 2024; CHECK-NEXT: vmov q0, q1 2025; CHECK-NEXT: bx lr 2026entry: 2027 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2028 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 2029 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2030 ret <16 x i8> %b 2031} 2032 2033define arm_aapcs_vfpcc <4 x i32> @uadd_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2034; CHECK-LABEL: uadd_sat_v4i32_y: 2035; CHECK: @ %bb.0: @ %entry 2036; CHECK-NEXT: vctp.32 r0 2037; CHECK-NEXT: vpst 2038; CHECK-NEXT: vqaddt.u32 q1, q0, q1 2039; CHECK-NEXT: vmov q0, q1 2040; CHECK-NEXT: bx lr 2041entry: 2042 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2043 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 2044 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2045 ret <4 x i32> %b 2046} 2047 2048define arm_aapcs_vfpcc <8 x i16> @uadd_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2049; CHECK-LABEL: uadd_sat_v8i16_y: 2050; CHECK: @ %bb.0: @ %entry 2051; CHECK-NEXT: vctp.16 r0 2052; CHECK-NEXT: vpst 2053; CHECK-NEXT: vqaddt.u16 q1, q0, q1 2054; CHECK-NEXT: vmov q0, q1 2055; CHECK-NEXT: bx lr 2056entry: 2057 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2058 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 2059 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2060 ret <8 x i16> %b 2061} 2062 2063define arm_aapcs_vfpcc <16 x i8> @uadd_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2064; CHECK-LABEL: uadd_sat_v16i8_y: 2065; CHECK: @ %bb.0: @ %entry 2066; CHECK-NEXT: vctp.8 r0 2067; CHECK-NEXT: vpst 2068; CHECK-NEXT: vqaddt.u8 q1, q0, q1 2069; CHECK-NEXT: vmov q0, q1 2070; CHECK-NEXT: bx lr 2071entry: 2072 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2073 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 2074 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2075 ret <16 x i8> %b 2076} 2077 2078define arm_aapcs_vfpcc <4 x i32> @ssub_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2079; CHECK-LABEL: ssub_sat_v4i32_y: 2080; CHECK: @ %bb.0: @ %entry 2081; CHECK-NEXT: vctp.32 r0 2082; CHECK-NEXT: vpst 2083; CHECK-NEXT: vqsubt.s32 q1, q0, q1 2084; CHECK-NEXT: vmov q0, q1 2085; CHECK-NEXT: bx lr 2086entry: 2087 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2088 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 2089 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2090 ret <4 x i32> %b 2091} 2092 2093define arm_aapcs_vfpcc <8 x i16> @ssub_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2094; CHECK-LABEL: ssub_sat_v8i16_y: 2095; CHECK: @ %bb.0: @ %entry 2096; CHECK-NEXT: vctp.16 r0 2097; CHECK-NEXT: vpst 2098; CHECK-NEXT: vqsubt.s16 q1, q0, q1 2099; CHECK-NEXT: vmov q0, q1 2100; CHECK-NEXT: bx lr 2101entry: 2102 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2103 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 2104 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2105 ret <8 x i16> %b 2106} 2107 2108define arm_aapcs_vfpcc <16 x i8> @ssub_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2109; CHECK-LABEL: ssub_sat_v16i8_y: 2110; CHECK: @ %bb.0: @ %entry 2111; CHECK-NEXT: vctp.8 r0 2112; CHECK-NEXT: vpst 2113; CHECK-NEXT: vqsubt.s8 q1, q0, q1 2114; CHECK-NEXT: vmov q0, q1 2115; CHECK-NEXT: bx lr 2116entry: 2117 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2118 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 2119 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2120 ret <16 x i8> %b 2121} 2122 2123define arm_aapcs_vfpcc <4 x i32> @usub_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2124; CHECK-LABEL: usub_sat_v4i32_y: 2125; CHECK: @ %bb.0: @ %entry 2126; CHECK-NEXT: vctp.32 r0 2127; CHECK-NEXT: vpst 2128; CHECK-NEXT: vqsubt.u32 q1, q0, q1 2129; CHECK-NEXT: vmov q0, q1 2130; CHECK-NEXT: bx lr 2131entry: 2132 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2133 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 2134 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2135 ret <4 x i32> %b 2136} 2137 2138define arm_aapcs_vfpcc <8 x i16> @usub_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2139; CHECK-LABEL: usub_sat_v8i16_y: 2140; CHECK: @ %bb.0: @ %entry 2141; CHECK-NEXT: vctp.16 r0 2142; CHECK-NEXT: vpst 2143; CHECK-NEXT: vqsubt.u16 q1, q0, q1 2144; CHECK-NEXT: vmov q0, q1 2145; CHECK-NEXT: bx lr 2146entry: 2147 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2148 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 2149 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2150 ret <8 x i16> %b 2151} 2152 2153define arm_aapcs_vfpcc <16 x i8> @usub_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2154; CHECK-LABEL: usub_sat_v16i8_y: 2155; CHECK: @ %bb.0: @ %entry 2156; CHECK-NEXT: vctp.8 r0 2157; CHECK-NEXT: vpst 2158; CHECK-NEXT: vqsubt.u8 q1, q0, q1 2159; CHECK-NEXT: vmov q0, q1 2160; CHECK-NEXT: bx lr 2161entry: 2162 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2163 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 2164 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2165 ret <16 x i8> %b 2166} 2167 2168define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2169; CHECK-LABEL: addqr_v4i32_y: 2170; CHECK: @ %bb.0: @ %entry 2171; CHECK-NEXT: vdup.32 q1, r0 2172; CHECK-NEXT: vctp.32 r1 2173; CHECK-NEXT: vpst 2174; CHECK-NEXT: vaddt.i32 q1, q0, r0 2175; CHECK-NEXT: vmov q0, q1 2176; CHECK-NEXT: bx lr 2177entry: 2178 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2179 %i = insertelement <4 x i32> undef, i32 %y, i32 0 2180 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 2181 %a = add <4 x i32> %x, %ys 2182 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 2183 ret <4 x i32> %b 2184} 2185 2186define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 2187; CHECK-LABEL: addqr_v8i16_y: 2188; CHECK: @ %bb.0: @ %entry 2189; CHECK-NEXT: vdup.16 q1, r0 2190; CHECK-NEXT: vctp.16 r1 2191; CHECK-NEXT: vpst 2192; CHECK-NEXT: vaddt.i16 q1, q0, r0 2193; CHECK-NEXT: vmov q0, q1 2194; CHECK-NEXT: bx lr 2195entry: 2196 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2197 %i = insertelement <8 x i16> undef, i16 %y, i32 0 2198 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 2199 %a = add <8 x i16> %x, %ys 2200 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 2201 ret <8 x i16> %b 2202} 2203 2204define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 2205; CHECK-LABEL: addqr_v16i8_y: 2206; CHECK: @ %bb.0: @ %entry 2207; CHECK-NEXT: vdup.8 q1, r0 2208; CHECK-NEXT: vctp.8 r1 2209; CHECK-NEXT: vpst 2210; CHECK-NEXT: vaddt.i8 q1, q0, r0 2211; CHECK-NEXT: vmov q0, q1 2212; CHECK-NEXT: bx lr 2213entry: 2214 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2215 %i = insertelement <16 x i8> undef, i8 %y, i32 0 2216 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 2217 %a = add <16 x i8> %x, %ys 2218 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 2219 ret <16 x i8> %b 2220} 2221 2222define arm_aapcs_vfpcc <4 x i32> @subqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2223; CHECK-LABEL: subqr_v4i32_y: 2224; CHECK: @ %bb.0: @ %entry 2225; CHECK-NEXT: vdup.32 q1, r0 2226; CHECK-NEXT: vctp.32 r1 2227; CHECK-NEXT: vpst 2228; CHECK-NEXT: vsubt.i32 q1, q0, r0 2229; CHECK-NEXT: vmov q0, q1 2230; CHECK-NEXT: bx lr 2231entry: 2232 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2233 %i = insertelement <4 x i32> undef, i32 %y, i32 0 2234 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 2235 %a = sub <4 x i32> %x, %ys 2236 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 2237 ret <4 x i32> %b 2238} 2239 2240define arm_aapcs_vfpcc <8 x i16> @subqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 2241; CHECK-LABEL: subqr_v8i16_y: 2242; CHECK: @ %bb.0: @ %entry 2243; CHECK-NEXT: vdup.16 q1, r0 2244; CHECK-NEXT: vctp.16 r1 2245; CHECK-NEXT: vpst 2246; CHECK-NEXT: vsubt.i16 q1, q0, r0 2247; CHECK-NEXT: vmov q0, q1 2248; CHECK-NEXT: bx lr 2249entry: 2250 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2251 %i = insertelement <8 x i16> undef, i16 %y, i32 0 2252 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 2253 %a = sub <8 x i16> %x, %ys 2254 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 2255 ret <8 x i16> %b 2256} 2257 2258define arm_aapcs_vfpcc <16 x i8> @subqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 2259; CHECK-LABEL: subqr_v16i8_y: 2260; CHECK: @ %bb.0: @ %entry 2261; CHECK-NEXT: vdup.8 q1, r0 2262; CHECK-NEXT: vctp.8 r1 2263; CHECK-NEXT: vpst 2264; CHECK-NEXT: vsubt.i8 q1, q0, r0 2265; CHECK-NEXT: vmov q0, q1 2266; CHECK-NEXT: bx lr 2267entry: 2268 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2269 %i = insertelement <16 x i8> undef, i8 %y, i32 0 2270 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 2271 %a = sub <16 x i8> %x, %ys 2272 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 2273 ret <16 x i8> %b 2274} 2275 2276define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2277; CHECK-LABEL: mulqr_v4i32_y: 2278; CHECK: @ %bb.0: @ %entry 2279; CHECK-NEXT: vdup.32 q1, r0 2280; CHECK-NEXT: vctp.32 r1 2281; CHECK-NEXT: vpst 2282; CHECK-NEXT: vmult.i32 q1, q0, r0 2283; CHECK-NEXT: vmov q0, q1 2284; CHECK-NEXT: bx lr 2285entry: 2286 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2287 %i = insertelement <4 x i32> undef, i32 %y, i32 0 2288 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 2289 %a = mul <4 x i32> %x, %ys 2290 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 2291 ret <4 x i32> %b 2292} 2293 2294define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 2295; CHECK-LABEL: mulqr_v8i16_y: 2296; CHECK: @ %bb.0: @ %entry 2297; CHECK-NEXT: vdup.16 q1, r0 2298; CHECK-NEXT: vctp.16 r1 2299; CHECK-NEXT: vpst 2300; CHECK-NEXT: vmult.i16 q1, q0, r0 2301; CHECK-NEXT: vmov q0, q1 2302; CHECK-NEXT: bx lr 2303entry: 2304 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2305 %i = insertelement <8 x i16> undef, i16 %y, i32 0 2306 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 2307 %a = mul <8 x i16> %x, %ys 2308 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 2309 ret <8 x i16> %b 2310} 2311 2312define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 2313; CHECK-LABEL: mulqr_v16i8_y: 2314; CHECK: @ %bb.0: @ %entry 2315; CHECK-NEXT: vdup.8 q1, r0 2316; CHECK-NEXT: vctp.8 r1 2317; CHECK-NEXT: vpst 2318; CHECK-NEXT: vmult.i8 q1, q0, r0 2319; CHECK-NEXT: vmov q0, q1 2320; CHECK-NEXT: bx lr 2321entry: 2322 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2323 %i = insertelement <16 x i8> undef, i8 %y, i32 0 2324 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 2325 %a = mul <16 x i8> %x, %ys 2326 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 2327 ret <16 x i8> %b 2328} 2329 2330define arm_aapcs_vfpcc <4 x float> @faddqr_v4f32_y(<4 x float> %x, float %y, i32 %n) { 2331; CHECK-LABEL: faddqr_v4f32_y: 2332; CHECK: @ %bb.0: @ %entry 2333; CHECK-NEXT: vmov r1, s4 2334; CHECK-NEXT: vctp.32 r0 2335; CHECK-NEXT: vdup.32 q1, r1 2336; CHECK-NEXT: vpst 2337; CHECK-NEXT: vaddt.f32 q1, q0, r1 2338; CHECK-NEXT: vmov q0, q1 2339; CHECK-NEXT: bx lr 2340entry: 2341 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2342 %i = insertelement <4 x float> undef, float %y, i32 0 2343 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 2344 %a = fadd <4 x float> %x, %ys 2345 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %ys 2346 ret <4 x float> %b 2347} 2348 2349define arm_aapcs_vfpcc <8 x half> @faddqr_v8f16_y(<8 x half> %x, half %y, i32 %n) { 2350; CHECK-LABEL: faddqr_v8f16_y: 2351; CHECK: @ %bb.0: @ %entry 2352; CHECK-NEXT: vmov.f16 r1, s4 2353; CHECK-NEXT: vctp.16 r0 2354; CHECK-NEXT: vdup.16 q1, r1 2355; CHECK-NEXT: vpst 2356; CHECK-NEXT: vaddt.f16 q1, q0, r1 2357; CHECK-NEXT: vmov q0, q1 2358; CHECK-NEXT: bx lr 2359entry: 2360 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2361 %i = insertelement <8 x half> undef, half %y, i32 0 2362 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 2363 %a = fadd <8 x half> %x, %ys 2364 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %ys 2365 ret <8 x half> %b 2366} 2367 2368define arm_aapcs_vfpcc <4 x float> @fsubqr_v4f32_y(<4 x float> %x, float %y, i32 %n) { 2369; CHECK-LABEL: fsubqr_v4f32_y: 2370; CHECK: @ %bb.0: @ %entry 2371; CHECK-NEXT: vmov r1, s4 2372; CHECK-NEXT: vctp.32 r0 2373; CHECK-NEXT: vdup.32 q1, r1 2374; CHECK-NEXT: vpst 2375; CHECK-NEXT: vsubt.f32 q1, q0, r1 2376; CHECK-NEXT: vmov q0, q1 2377; CHECK-NEXT: bx lr 2378entry: 2379 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2380 %i = insertelement <4 x float> undef, float %y, i32 0 2381 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 2382 %a = fsub <4 x float> %x, %ys 2383 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %ys 2384 ret <4 x float> %b 2385} 2386 2387define arm_aapcs_vfpcc <8 x half> @fsubqr_v8f16_y(<8 x half> %x, half %y, i32 %n) { 2388; CHECK-LABEL: fsubqr_v8f16_y: 2389; CHECK: @ %bb.0: @ %entry 2390; CHECK-NEXT: vmov.f16 r1, s4 2391; CHECK-NEXT: vctp.16 r0 2392; CHECK-NEXT: vdup.16 q1, r1 2393; CHECK-NEXT: vpst 2394; CHECK-NEXT: vsubt.f16 q1, q0, r1 2395; CHECK-NEXT: vmov q0, q1 2396; CHECK-NEXT: bx lr 2397entry: 2398 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2399 %i = insertelement <8 x half> undef, half %y, i32 0 2400 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 2401 %a = fsub <8 x half> %x, %ys 2402 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %ys 2403 ret <8 x half> %b 2404} 2405 2406define arm_aapcs_vfpcc <4 x float> @fmulqr_v4f32_y(<4 x float> %x, float %y, i32 %n) { 2407; CHECK-LABEL: fmulqr_v4f32_y: 2408; CHECK: @ %bb.0: @ %entry 2409; CHECK-NEXT: vmov r1, s4 2410; CHECK-NEXT: vctp.32 r0 2411; CHECK-NEXT: vdup.32 q1, r1 2412; CHECK-NEXT: vpst 2413; CHECK-NEXT: vmult.f32 q1, q0, r1 2414; CHECK-NEXT: vmov q0, q1 2415; CHECK-NEXT: bx lr 2416entry: 2417 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2418 %i = insertelement <4 x float> undef, float %y, i32 0 2419 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 2420 %a = fmul <4 x float> %x, %ys 2421 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %ys 2422 ret <4 x float> %b 2423} 2424 2425define arm_aapcs_vfpcc <8 x half> @fmulqr_v8f16_y(<8 x half> %x, half %y, i32 %n) { 2426; CHECK-LABEL: fmulqr_v8f16_y: 2427; CHECK: @ %bb.0: @ %entry 2428; CHECK-NEXT: vmov.f16 r1, s4 2429; CHECK-NEXT: vctp.16 r0 2430; CHECK-NEXT: vdup.16 q1, r1 2431; CHECK-NEXT: vpst 2432; CHECK-NEXT: vmult.f16 q1, q0, r1 2433; CHECK-NEXT: vmov q0, q1 2434; CHECK-NEXT: bx lr 2435entry: 2436 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2437 %i = insertelement <8 x half> undef, half %y, i32 0 2438 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 2439 %a = fmul <8 x half> %x, %ys 2440 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %ys 2441 ret <8 x half> %b 2442} 2443 2444define arm_aapcs_vfpcc <4 x i32> @sadd_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2445; CHECK-LABEL: sadd_satqr_v4i32_y: 2446; CHECK: @ %bb.0: @ %entry 2447; CHECK-NEXT: vdup.32 q1, r0 2448; CHECK-NEXT: vctp.32 r1 2449; CHECK-NEXT: vpst 2450; CHECK-NEXT: vqaddt.s32 q1, q0, r0 2451; CHECK-NEXT: vmov q0, q1 2452; CHECK-NEXT: bx lr 2453entry: 2454 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2455 %i = insertelement <4 x i32> undef, i32 %y, i32 0 2456 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 2457 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 2458 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 2459 ret <4 x i32> %b 2460} 2461 2462define arm_aapcs_vfpcc <8 x i16> @sadd_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 2463; CHECK-LABEL: sadd_satqr_v8i16_y: 2464; CHECK: @ %bb.0: @ %entry 2465; CHECK-NEXT: vdup.16 q1, r0 2466; CHECK-NEXT: vctp.16 r1 2467; CHECK-NEXT: vpst 2468; CHECK-NEXT: vqaddt.s16 q1, q0, r0 2469; CHECK-NEXT: vmov q0, q1 2470; CHECK-NEXT: bx lr 2471entry: 2472 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2473 %i = insertelement <8 x i16> undef, i16 %y, i32 0 2474 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 2475 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 2476 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 2477 ret <8 x i16> %b 2478} 2479 2480define arm_aapcs_vfpcc <16 x i8> @sadd_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 2481; CHECK-LABEL: sadd_satqr_v16i8_y: 2482; CHECK: @ %bb.0: @ %entry 2483; CHECK-NEXT: vdup.8 q1, r0 2484; CHECK-NEXT: vctp.8 r1 2485; CHECK-NEXT: vpst 2486; CHECK-NEXT: vqaddt.s8 q1, q0, r0 2487; CHECK-NEXT: vmov q0, q1 2488; CHECK-NEXT: bx lr 2489entry: 2490 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2491 %i = insertelement <16 x i8> undef, i8 %y, i32 0 2492 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 2493 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 2494 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 2495 ret <16 x i8> %b 2496} 2497 2498define arm_aapcs_vfpcc <4 x i32> @uadd_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2499; CHECK-LABEL: uadd_satqr_v4i32_y: 2500; CHECK: @ %bb.0: @ %entry 2501; CHECK-NEXT: vdup.32 q1, r0 2502; CHECK-NEXT: vctp.32 r1 2503; CHECK-NEXT: vpst 2504; CHECK-NEXT: vqaddt.u32 q1, q0, r0 2505; CHECK-NEXT: vmov q0, q1 2506; CHECK-NEXT: bx lr 2507entry: 2508 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2509 %i = insertelement <4 x i32> undef, i32 %y, i32 0 2510 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 2511 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 2512 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 2513 ret <4 x i32> %b 2514} 2515 2516define arm_aapcs_vfpcc <8 x i16> @uadd_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 2517; CHECK-LABEL: uadd_satqr_v8i16_y: 2518; CHECK: @ %bb.0: @ %entry 2519; CHECK-NEXT: vdup.16 q1, r0 2520; CHECK-NEXT: vctp.16 r1 2521; CHECK-NEXT: vpst 2522; CHECK-NEXT: vqaddt.u16 q1, q0, r0 2523; CHECK-NEXT: vmov q0, q1 2524; CHECK-NEXT: bx lr 2525entry: 2526 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2527 %i = insertelement <8 x i16> undef, i16 %y, i32 0 2528 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 2529 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 2530 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 2531 ret <8 x i16> %b 2532} 2533 2534define arm_aapcs_vfpcc <16 x i8> @uadd_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 2535; CHECK-LABEL: uadd_satqr_v16i8_y: 2536; CHECK: @ %bb.0: @ %entry 2537; CHECK-NEXT: vdup.8 q1, r0 2538; CHECK-NEXT: vctp.8 r1 2539; CHECK-NEXT: vpst 2540; CHECK-NEXT: vqaddt.u8 q1, q0, r0 2541; CHECK-NEXT: vmov q0, q1 2542; CHECK-NEXT: bx lr 2543entry: 2544 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2545 %i = insertelement <16 x i8> undef, i8 %y, i32 0 2546 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 2547 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 2548 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 2549 ret <16 x i8> %b 2550} 2551 2552define arm_aapcs_vfpcc <4 x i32> @ssub_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2553; CHECK-LABEL: ssub_satqr_v4i32_y: 2554; CHECK: @ %bb.0: @ %entry 2555; CHECK-NEXT: vdup.32 q1, r0 2556; CHECK-NEXT: vctp.32 r1 2557; CHECK-NEXT: vpst 2558; CHECK-NEXT: vqsubt.s32 q1, q0, r0 2559; CHECK-NEXT: vmov q0, q1 2560; CHECK-NEXT: bx lr 2561entry: 2562 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2563 %i = insertelement <4 x i32> undef, i32 %y, i32 0 2564 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 2565 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 2566 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 2567 ret <4 x i32> %b 2568} 2569 2570define arm_aapcs_vfpcc <8 x i16> @ssub_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 2571; CHECK-LABEL: ssub_satqr_v8i16_y: 2572; CHECK: @ %bb.0: @ %entry 2573; CHECK-NEXT: vdup.16 q1, r0 2574; CHECK-NEXT: vctp.16 r1 2575; CHECK-NEXT: vpst 2576; CHECK-NEXT: vqsubt.s16 q1, q0, r0 2577; CHECK-NEXT: vmov q0, q1 2578; CHECK-NEXT: bx lr 2579entry: 2580 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2581 %i = insertelement <8 x i16> undef, i16 %y, i32 0 2582 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 2583 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 2584 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 2585 ret <8 x i16> %b 2586} 2587 2588define arm_aapcs_vfpcc <16 x i8> @ssub_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 2589; CHECK-LABEL: ssub_satqr_v16i8_y: 2590; CHECK: @ %bb.0: @ %entry 2591; CHECK-NEXT: vdup.8 q1, r0 2592; CHECK-NEXT: vctp.8 r1 2593; CHECK-NEXT: vpst 2594; CHECK-NEXT: vqsubt.s8 q1, q0, r0 2595; CHECK-NEXT: vmov q0, q1 2596; CHECK-NEXT: bx lr 2597entry: 2598 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2599 %i = insertelement <16 x i8> undef, i8 %y, i32 0 2600 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 2601 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 2602 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 2603 ret <16 x i8> %b 2604} 2605 2606define arm_aapcs_vfpcc <4 x i32> @usub_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2607; CHECK-LABEL: usub_satqr_v4i32_y: 2608; CHECK: @ %bb.0: @ %entry 2609; CHECK-NEXT: vdup.32 q1, r0 2610; CHECK-NEXT: vctp.32 r1 2611; CHECK-NEXT: vpst 2612; CHECK-NEXT: vqsubt.u32 q1, q0, r0 2613; CHECK-NEXT: vmov q0, q1 2614; CHECK-NEXT: bx lr 2615entry: 2616 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2617 %i = insertelement <4 x i32> undef, i32 %y, i32 0 2618 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 2619 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 2620 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 2621 ret <4 x i32> %b 2622} 2623 2624define arm_aapcs_vfpcc <8 x i16> @usub_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 2625; CHECK-LABEL: usub_satqr_v8i16_y: 2626; CHECK: @ %bb.0: @ %entry 2627; CHECK-NEXT: vdup.16 q1, r0 2628; CHECK-NEXT: vctp.16 r1 2629; CHECK-NEXT: vpst 2630; CHECK-NEXT: vqsubt.u16 q1, q0, r0 2631; CHECK-NEXT: vmov q0, q1 2632; CHECK-NEXT: bx lr 2633entry: 2634 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2635 %i = insertelement <8 x i16> undef, i16 %y, i32 0 2636 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 2637 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 2638 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 2639 ret <8 x i16> %b 2640} 2641 2642define arm_aapcs_vfpcc <16 x i8> @usub_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 2643; CHECK-LABEL: usub_satqr_v16i8_y: 2644; CHECK: @ %bb.0: @ %entry 2645; CHECK-NEXT: vdup.8 q1, r0 2646; CHECK-NEXT: vctp.8 r1 2647; CHECK-NEXT: vpst 2648; CHECK-NEXT: vqsubt.u8 q1, q0, r0 2649; CHECK-NEXT: vmov q0, q1 2650; CHECK-NEXT: bx lr 2651entry: 2652 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2653 %i = insertelement <16 x i8> undef, i8 %y, i32 0 2654 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 2655 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 2656 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 2657 ret <16 x i8> %b 2658} 2659 2660declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) 2661declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) 2662declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>) 2663declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>) 2664declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>) 2665declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>) 2666declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) 2667declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) 2668declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) 2669declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>) 2670declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) 2671declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>) 2672 2673declare <16 x i1> @llvm.arm.mve.vctp8(i32) 2674declare <8 x i1> @llvm.arm.mve.vctp16(i32) 2675declare <4 x i1> @llvm.arm.mve.vctp32(i32) 2676