1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s 3 4define arm_aapcs_vfpcc i32 @test_vaddvq_s8(<16 x i8> %a) { 5; CHECK-LABEL: test_vaddvq_s8: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vaddv.s8 r0, q0 8; CHECK-NEXT: bx lr 9entry: 10 %0 = tail call i32 @llvm.arm.mve.addv.v16i8(<16 x i8> %a, i32 0) 11 ret i32 %0 12} 13 14define arm_aapcs_vfpcc i32 @test_vaddvq_s16(<8 x i16> %a) { 15; CHECK-LABEL: test_vaddvq_s16: 16; CHECK: @ %bb.0: @ %entry 17; CHECK-NEXT: vaddv.s16 r0, q0 18; CHECK-NEXT: bx lr 19entry: 20 %0 = tail call i32 @llvm.arm.mve.addv.v8i16(<8 x i16> %a, i32 0) 21 ret i32 %0 22} 23 24define arm_aapcs_vfpcc i32 @test_vaddvq_s32(<4 x i32> %a) { 25; CHECK-LABEL: test_vaddvq_s32: 26; CHECK: @ %bb.0: @ %entry 27; CHECK-NEXT: vaddv.s32 r0, q0 28; CHECK-NEXT: bx lr 29entry: 30 %0 = tail call i32 @llvm.arm.mve.addv.v4i32(<4 x i32> %a, i32 0) 31 ret i32 %0 32} 33 34define arm_aapcs_vfpcc i32 @test_vaddvq_u8(<16 x i8> %a) { 35; CHECK-LABEL: test_vaddvq_u8: 36; CHECK: @ %bb.0: @ %entry 37; CHECK-NEXT: vaddv.u8 r0, q0 38; CHECK-NEXT: bx lr 39entry: 40 %0 = tail call i32 @llvm.arm.mve.addv.v16i8(<16 x i8> %a, i32 1) 41 ret i32 %0 42} 43 44define arm_aapcs_vfpcc i32 @test_vaddvq_u16(<8 x i16> %a) { 45; CHECK-LABEL: test_vaddvq_u16: 46; CHECK: @ %bb.0: @ %entry 47; CHECK-NEXT: vaddv.u16 r0, q0 48; CHECK-NEXT: bx lr 49entry: 50 %0 = tail call i32 @llvm.arm.mve.addv.v8i16(<8 x i16> %a, i32 1) 51 ret i32 %0 52} 53 54define arm_aapcs_vfpcc i32 @test_vaddvq_u32(<4 x i32> %a) { 55; CHECK-LABEL: test_vaddvq_u32: 56; CHECK: @ %bb.0: @ %entry 57; CHECK-NEXT: vaddv.u32 r0, q0 58; CHECK-NEXT: bx lr 59entry: 60 %0 = tail call i32 @llvm.arm.mve.addv.v4i32(<4 x i32> %a, i32 1) 61 ret i32 %0 62} 63 64define arm_aapcs_vfpcc i32 @test_vaddvaq_s8(i32 %a, <16 x i8> %b) { 65; CHECK-LABEL: test_vaddvaq_s8: 66; CHECK: @ %bb.0: @ %entry 67; CHECK-NEXT: vaddva.s8 r0, q0 68; CHECK-NEXT: bx lr 69entry: 70 %0 = tail call i32 @llvm.arm.mve.addv.v16i8(<16 x i8> %b, i32 0) 71 %1 = add i32 %0, %a 72 ret i32 %1 73} 74 75define arm_aapcs_vfpcc i32 @test_vaddvaq_s16(i32 %a, <8 x i16> %b) { 76; CHECK-LABEL: test_vaddvaq_s16: 77; CHECK: @ %bb.0: @ %entry 78; CHECK-NEXT: vaddva.s16 r0, q0 79; CHECK-NEXT: bx lr 80entry: 81 %0 = tail call i32 @llvm.arm.mve.addv.v8i16(<8 x i16> %b, i32 0) 82 %1 = add i32 %0, %a 83 ret i32 %1 84} 85 86define arm_aapcs_vfpcc i32 @test_vaddvaq_s32(i32 %a, <4 x i32> %b) { 87; CHECK-LABEL: test_vaddvaq_s32: 88; CHECK: @ %bb.0: @ %entry 89; CHECK-NEXT: vaddva.s32 r0, q0 90; CHECK-NEXT: bx lr 91entry: 92 %0 = tail call i32 @llvm.arm.mve.addv.v4i32(<4 x i32> %b, i32 0) 93 %1 = add i32 %0, %a 94 ret i32 %1 95} 96 97define arm_aapcs_vfpcc i32 @test_vaddvaq_u8(i32 %a, <16 x i8> %b) { 98; CHECK-LABEL: test_vaddvaq_u8: 99; CHECK: @ %bb.0: @ %entry 100; CHECK-NEXT: vaddva.u8 r0, q0 101; CHECK-NEXT: bx lr 102entry: 103 %0 = tail call i32 @llvm.arm.mve.addv.v16i8(<16 x i8> %b, i32 1) 104 %1 = add i32 %0, %a 105 ret i32 %1 106} 107 108define arm_aapcs_vfpcc i32 @test_vaddvaq_u16(i32 %a, <8 x i16> %b) { 109; CHECK-LABEL: test_vaddvaq_u16: 110; CHECK: @ %bb.0: @ %entry 111; CHECK-NEXT: vaddva.u16 r0, q0 112; CHECK-NEXT: bx lr 113entry: 114 %0 = tail call i32 @llvm.arm.mve.addv.v8i16(<8 x i16> %b, i32 1) 115 %1 = add i32 %0, %a 116 ret i32 %1 117} 118 119define arm_aapcs_vfpcc i32 @test_vaddvaq_u32(i32 %a, <4 x i32> %b) { 120; CHECK-LABEL: test_vaddvaq_u32: 121; CHECK: @ %bb.0: @ %entry 122; CHECK-NEXT: vaddva.u32 r0, q0 123; CHECK-NEXT: bx lr 124entry: 125 %0 = tail call i32 @llvm.arm.mve.addv.v4i32(<4 x i32> %b, i32 1) 126 %1 = add i32 %0, %a 127 ret i32 %1 128} 129 130define arm_aapcs_vfpcc i32 @test_vaddvq_p_s8(<16 x i8> %a, i16 zeroext %p) { 131; CHECK-LABEL: test_vaddvq_p_s8: 132; CHECK: @ %bb.0: @ %entry 133; CHECK-NEXT: vmsr p0, r0 134; CHECK-NEXT: vpst 135; CHECK-NEXT: vaddvt.s8 r0, q0 136; CHECK-NEXT: bx lr 137entry: 138 %0 = zext i16 %p to i32 139 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 140 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v16i8.v16i1(<16 x i8> %a, i32 0, <16 x i1> %1) 141 ret i32 %2 142} 143 144define arm_aapcs_vfpcc i32 @test_vaddvq_p_s16(<8 x i16> %a, i16 zeroext %p) { 145; CHECK-LABEL: test_vaddvq_p_s16: 146; CHECK: @ %bb.0: @ %entry 147; CHECK-NEXT: vmsr p0, r0 148; CHECK-NEXT: vpst 149; CHECK-NEXT: vaddvt.s16 r0, q0 150; CHECK-NEXT: bx lr 151entry: 152 %0 = zext i16 %p to i32 153 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 154 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16> %a, i32 0, <8 x i1> %1) 155 ret i32 %2 156} 157 158define arm_aapcs_vfpcc i32 @test_vaddvq_p_s32(<4 x i32> %a, i16 zeroext %p) { 159; CHECK-LABEL: test_vaddvq_p_s32: 160; CHECK: @ %bb.0: @ %entry 161; CHECK-NEXT: vmsr p0, r0 162; CHECK-NEXT: vpst 163; CHECK-NEXT: vaddvt.s32 r0, q0 164; CHECK-NEXT: bx lr 165entry: 166 %0 = zext i16 %p to i32 167 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 168 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %a, i32 0, <4 x i1> %1) 169 ret i32 %2 170} 171 172define arm_aapcs_vfpcc i32 @test_vaddvq_p_u8(<16 x i8> %a, i16 zeroext %p) { 173; CHECK-LABEL: test_vaddvq_p_u8: 174; CHECK: @ %bb.0: @ %entry 175; CHECK-NEXT: vmsr p0, r0 176; CHECK-NEXT: vpst 177; CHECK-NEXT: vaddvt.u8 r0, q0 178; CHECK-NEXT: bx lr 179entry: 180 %0 = zext i16 %p to i32 181 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 182 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v16i8.v16i1(<16 x i8> %a, i32 1, <16 x i1> %1) 183 ret i32 %2 184} 185 186define arm_aapcs_vfpcc i32 @test_vaddvq_p_u16(<8 x i16> %a, i16 zeroext %p) { 187; CHECK-LABEL: test_vaddvq_p_u16: 188; CHECK: @ %bb.0: @ %entry 189; CHECK-NEXT: vmsr p0, r0 190; CHECK-NEXT: vpst 191; CHECK-NEXT: vaddvt.u16 r0, q0 192; CHECK-NEXT: bx lr 193entry: 194 %0 = zext i16 %p to i32 195 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 196 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16> %a, i32 1, <8 x i1> %1) 197 ret i32 %2 198} 199 200define arm_aapcs_vfpcc i32 @test_vaddvq_p_u32(<4 x i32> %a, i16 zeroext %p) { 201; CHECK-LABEL: test_vaddvq_p_u32: 202; CHECK: @ %bb.0: @ %entry 203; CHECK-NEXT: vmsr p0, r0 204; CHECK-NEXT: vpst 205; CHECK-NEXT: vaddvt.u32 r0, q0 206; CHECK-NEXT: bx lr 207entry: 208 %0 = zext i16 %p to i32 209 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 210 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %a, i32 1, <4 x i1> %1) 211 ret i32 %2 212} 213 214define arm_aapcs_vfpcc i32 @test_vaddvaq_p_s8(i32 %a, <16 x i8> %b, i16 zeroext %p) { 215; CHECK-LABEL: test_vaddvaq_p_s8: 216; CHECK: @ %bb.0: @ %entry 217; CHECK-NEXT: vmsr p0, r1 218; CHECK-NEXT: vpst 219; CHECK-NEXT: vaddvat.s8 r0, q0 220; CHECK-NEXT: bx lr 221entry: 222 %0 = zext i16 %p to i32 223 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 224 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v16i8.v16i1(<16 x i8> %b, i32 0, <16 x i1> %1) 225 %3 = add i32 %2, %a 226 ret i32 %3 227} 228 229define arm_aapcs_vfpcc i32 @test_vaddvaq_p_s16(i32 %a, <8 x i16> %b, i16 zeroext %p) { 230; CHECK-LABEL: test_vaddvaq_p_s16: 231; CHECK: @ %bb.0: @ %entry 232; CHECK-NEXT: vmsr p0, r1 233; CHECK-NEXT: vpst 234; CHECK-NEXT: vaddvat.s16 r0, q0 235; CHECK-NEXT: bx lr 236entry: 237 %0 = zext i16 %p to i32 238 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 239 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16> %b, i32 0, <8 x i1> %1) 240 %3 = add i32 %2, %a 241 ret i32 %3 242} 243 244define arm_aapcs_vfpcc i32 @test_vaddvaq_p_s32(i32 %a, <4 x i32> %b, i16 zeroext %p) { 245; CHECK-LABEL: test_vaddvaq_p_s32: 246; CHECK: @ %bb.0: @ %entry 247; CHECK-NEXT: vmsr p0, r1 248; CHECK-NEXT: vpst 249; CHECK-NEXT: vaddvat.s32 r0, q0 250; CHECK-NEXT: bx lr 251entry: 252 %0 = zext i16 %p to i32 253 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 254 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %b, i32 0, <4 x i1> %1) 255 %3 = add i32 %2, %a 256 ret i32 %3 257} 258 259define arm_aapcs_vfpcc i32 @test_vaddvaq_p_u8(i32 %a, <16 x i8> %b, i16 zeroext %p) { 260; CHECK-LABEL: test_vaddvaq_p_u8: 261; CHECK: @ %bb.0: @ %entry 262; CHECK-NEXT: vmsr p0, r1 263; CHECK-NEXT: vpst 264; CHECK-NEXT: vaddvat.u8 r0, q0 265; CHECK-NEXT: bx lr 266entry: 267 %0 = zext i16 %p to i32 268 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 269 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v16i8.v16i1(<16 x i8> %b, i32 1, <16 x i1> %1) 270 %3 = add i32 %2, %a 271 ret i32 %3 272} 273 274define arm_aapcs_vfpcc i32 @test_vaddvaq_p_u16(i32 %a, <8 x i16> %b, i16 zeroext %p) { 275; CHECK-LABEL: test_vaddvaq_p_u16: 276; CHECK: @ %bb.0: @ %entry 277; CHECK-NEXT: vmsr p0, r1 278; CHECK-NEXT: vpst 279; CHECK-NEXT: vaddvat.u16 r0, q0 280; CHECK-NEXT: bx lr 281entry: 282 %0 = zext i16 %p to i32 283 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 284 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16> %b, i32 1, <8 x i1> %1) 285 %3 = add i32 %2, %a 286 ret i32 %3 287} 288 289define arm_aapcs_vfpcc i32 @test_vaddvaq_p_u32(i32 %a, <4 x i32> %b, i16 zeroext %p) { 290; CHECK-LABEL: test_vaddvaq_p_u32: 291; CHECK: @ %bb.0: @ %entry 292; CHECK-NEXT: vmsr p0, r1 293; CHECK-NEXT: vpst 294; CHECK-NEXT: vaddvat.u32 r0, q0 295; CHECK-NEXT: bx lr 296entry: 297 %0 = zext i16 %p to i32 298 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 299 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %b, i32 1, <4 x i1> %1) 300 %3 = add i32 %2, %a 301 ret i32 %3 302} 303 304define arm_aapcs_vfpcc i64 @test_vaddlvq_s32(<4 x i32> %a) { 305; CHECK-LABEL: test_vaddlvq_s32: 306; CHECK: @ %bb.0: @ %entry 307; CHECK-NEXT: vaddlv.s32 r0, r1, q0 308; CHECK-NEXT: bx lr 309entry: 310 %0 = tail call i64 @llvm.arm.mve.addlv.v4i32(<4 x i32> %a, i32 0) 311 ret i64 %0 312} 313 314define arm_aapcs_vfpcc i64 @test_vaddlvq_u32(<4 x i32> %a) { 315; CHECK-LABEL: test_vaddlvq_u32: 316; CHECK: @ %bb.0: @ %entry 317; CHECK-NEXT: vaddlv.u32 r0, r1, q0 318; CHECK-NEXT: bx lr 319entry: 320 %0 = tail call i64 @llvm.arm.mve.addlv.v4i32(<4 x i32> %a, i32 1) 321 ret i64 %0 322} 323 324define arm_aapcs_vfpcc i64 @test_vaddlvaq_s32(i64 %a, <4 x i32> %b) { 325; CHECK-LABEL: test_vaddlvaq_s32: 326; CHECK: @ %bb.0: @ %entry 327; CHECK-NEXT: vaddlva.s32 r0, r1, q0 328; CHECK-NEXT: bx lr 329entry: 330 %0 = tail call i64 @llvm.arm.mve.addlv.v4i32(<4 x i32> %b, i32 0) 331 %1 = add i64 %0, %a 332 ret i64 %1 333} 334 335define arm_aapcs_vfpcc i64 @test_vaddlvaq_u32(i64 %a, <4 x i32> %b) { 336; CHECK-LABEL: test_vaddlvaq_u32: 337; CHECK: @ %bb.0: @ %entry 338; CHECK-NEXT: vaddlva.u32 r0, r1, q0 339; CHECK-NEXT: bx lr 340entry: 341 %0 = tail call i64 @llvm.arm.mve.addlv.v4i32(<4 x i32> %b, i32 1) 342 %1 = add i64 %0, %a 343 ret i64 %1 344} 345 346define arm_aapcs_vfpcc i64 @test_vaddlvq_p_s32(<4 x i32> %a, i16 zeroext %p) { 347; CHECK-LABEL: test_vaddlvq_p_s32: 348; CHECK: @ %bb.0: @ %entry 349; CHECK-NEXT: vmsr p0, r0 350; CHECK-NEXT: vpst 351; CHECK-NEXT: vaddlvt.s32 r0, r1, q0 352; CHECK-NEXT: bx lr 353entry: 354 %0 = zext i16 %p to i32 355 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 356 %2 = tail call i64 @llvm.arm.mve.addlv.predicated.v4i32.v4i1(<4 x i32> %a, i32 0, <4 x i1> %1) 357 ret i64 %2 358} 359 360define arm_aapcs_vfpcc i64 @test_vaddlvq_p_u32(<4 x i32> %a, i16 zeroext %p) { 361; CHECK-LABEL: test_vaddlvq_p_u32: 362; CHECK: @ %bb.0: @ %entry 363; CHECK-NEXT: vmsr p0, r0 364; CHECK-NEXT: vpst 365; CHECK-NEXT: vaddlvt.u32 r0, r1, q0 366; CHECK-NEXT: bx lr 367entry: 368 %0 = zext i16 %p to i32 369 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 370 %2 = tail call i64 @llvm.arm.mve.addlv.predicated.v4i32.v4i1(<4 x i32> %a, i32 1, <4 x i1> %1) 371 ret i64 %2 372} 373 374define arm_aapcs_vfpcc i64 @test_vaddlvaq_p_s32(i64 %a, <4 x i32> %b, i16 zeroext %p) { 375; CHECK-LABEL: test_vaddlvaq_p_s32: 376; CHECK: @ %bb.0: @ %entry 377; CHECK-NEXT: vmsr p0, r2 378; CHECK-NEXT: vpst 379; CHECK-NEXT: vaddlvat.s32 r0, r1, q0 380; CHECK-NEXT: bx lr 381entry: 382 %0 = zext i16 %p to i32 383 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 384 %2 = tail call i64 @llvm.arm.mve.addlv.predicated.v4i32.v4i1(<4 x i32> %b, i32 0, <4 x i1> %1) 385 %3 = add i64 %2, %a 386 ret i64 %3 387} 388 389define arm_aapcs_vfpcc i64 @test_vaddlvaq_p_u32(i64 %a, <4 x i32> %b, i16 zeroext %p) { 390; CHECK-LABEL: test_vaddlvaq_p_u32: 391; CHECK: @ %bb.0: @ %entry 392; CHECK-NEXT: vmsr p0, r2 393; CHECK-NEXT: vpst 394; CHECK-NEXT: vaddlvat.u32 r0, r1, q0 395; CHECK-NEXT: bx lr 396entry: 397 %0 = zext i16 %p to i32 398 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 399 %2 = tail call i64 @llvm.arm.mve.addlv.predicated.v4i32.v4i1(<4 x i32> %b, i32 1, <4 x i1> %1) 400 %3 = add i64 %2, %a 401 ret i64 %3 402} 403 404declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) 405declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) 406declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 407 408declare i32 @llvm.arm.mve.addv.v16i8(<16 x i8>, i32) 409declare i32 @llvm.arm.mve.addv.v8i16(<8 x i16>, i32) 410declare i32 @llvm.arm.mve.addv.v4i32(<4 x i32>, i32) 411declare i64 @llvm.arm.mve.addlv.v4i32(<4 x i32>, i32) 412 413declare i32 @llvm.arm.mve.addv.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>) 414declare i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>) 415declare i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>) 416declare i64 @llvm.arm.mve.addlv.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>) 417