1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s 3 4declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) 5declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) 6declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 7 8declare <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32, i32, <16 x i8>, <16 x i8>) 9declare <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32, i32, <4 x i32>, <4 x i32>) 10declare <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32, i32, <8 x i16>, <8 x i16>) 11declare <8 x half> @llvm.arm.mve.vcaddq.v8f16(i32, i32, <8 x half>, <8 x half>) 12declare <4 x float> @llvm.arm.mve.vcaddq.v4f32(i32, i32, <4 x float>, <4 x float>) 13 14declare <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32, i32, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i1>) 15declare <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32, i32, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i1>) 16declare <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i1>) 17declare <8 x half> @llvm.arm.mve.vcaddq.predicated.v8f16.v8i1(i32, i32, <8 x half>, <8 x half>, <8 x half>, <8 x i1>) 18declare <4 x float> @llvm.arm.mve.vcaddq.predicated.v4f32.v4i1(i32, i32, <4 x float>, <4 x float>, <4 x float>, <4 x i1>) 19 20define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot90_u8(<16 x i8> %a, <16 x i8> %b) { 21; CHECK-LABEL: test_vcaddq_rot90_u8: 22; CHECK: @ %bb.0: @ %entry 23; CHECK-NEXT: vcadd.i8 q0, q0, q1, #90 24; CHECK-NEXT: bx lr 25entry: 26 %0 = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 1, i32 0, <16 x i8> %a, <16 x i8> %b) 27 ret <16 x i8> %0 28} 29 30define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot90_u16(<8 x i16> %a, <8 x i16> %b) { 31; CHECK-LABEL: test_vcaddq_rot90_u16: 32; CHECK: @ %bb.0: @ %entry 33; CHECK-NEXT: vcadd.i16 q0, q0, q1, #90 34; CHECK-NEXT: bx lr 35entry: 36 %0 = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 1, i32 0, <8 x i16> %a, <8 x i16> %b) 37 ret <8 x i16> %0 38} 39 40define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot90_u32(<4 x i32> %a, <4 x i32> %b) { 41; CHECK-LABEL: test_vcaddq_rot90_u32: 42; CHECK: @ %bb.0: @ %entry 43; CHECK-NEXT: vcadd.i32 q2, q0, q1, #90 44; CHECK-NEXT: vmov q0, q2 45; CHECK-NEXT: bx lr 46entry: 47 %0 = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 1, i32 0, <4 x i32> %a, <4 x i32> %b) 48 ret <4 x i32> %0 49} 50 51define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot90_s8(<16 x i8> %a, <16 x i8> %b) { 52; CHECK-LABEL: test_vcaddq_rot90_s8: 53; CHECK: @ %bb.0: @ %entry 54; CHECK-NEXT: vcadd.i8 q0, q0, q1, #90 55; CHECK-NEXT: bx lr 56entry: 57 %0 = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 1, i32 0, <16 x i8> %a, <16 x i8> %b) 58 ret <16 x i8> %0 59} 60 61define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot90_s16(<8 x i16> %a, <8 x i16> %b) { 62; CHECK-LABEL: test_vcaddq_rot90_s16: 63; CHECK: @ %bb.0: @ %entry 64; CHECK-NEXT: vcadd.i16 q0, q0, q1, #90 65; CHECK-NEXT: bx lr 66entry: 67 %0 = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 1, i32 0, <8 x i16> %a, <8 x i16> %b) 68 ret <8 x i16> %0 69} 70 71define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot90_s32(<4 x i32> %a, <4 x i32> %b) { 72; CHECK-LABEL: test_vcaddq_rot90_s32: 73; CHECK: @ %bb.0: @ %entry 74; CHECK-NEXT: vcadd.i32 q2, q0, q1, #90 75; CHECK-NEXT: vmov q0, q2 76; CHECK-NEXT: bx lr 77entry: 78 %0 = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 1, i32 0, <4 x i32> %a, <4 x i32> %b) 79 ret <4 x i32> %0 80} 81 82define arm_aapcs_vfpcc <8 x half> @test_vcaddq_rot90_f16(<8 x half> %a, <8 x half> %b) { 83; CHECK-LABEL: test_vcaddq_rot90_f16: 84; CHECK: @ %bb.0: @ %entry 85; CHECK-NEXT: vcadd.f16 q0, q0, q1, #90 86; CHECK-NEXT: bx lr 87entry: 88 %0 = call <8 x half> @llvm.arm.mve.vcaddq.v8f16(i32 1, i32 0, <8 x half> %a, <8 x half> %b) 89 ret <8 x half> %0 90} 91 92define arm_aapcs_vfpcc <4 x float> @test_vcaddq_rot90_f32(<4 x float> %a, <4 x float> %b) { 93; CHECK-LABEL: test_vcaddq_rot90_f32: 94; CHECK: @ %bb.0: @ %entry 95; CHECK-NEXT: vcadd.f32 q2, q0, q1, #90 96; CHECK-NEXT: vmov q0, q2 97; CHECK-NEXT: bx lr 98entry: 99 %0 = call <4 x float> @llvm.arm.mve.vcaddq.v4f32(i32 1, i32 0, <4 x float> %a, <4 x float> %b) 100 ret <4 x float> %0 101} 102 103define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot270_u8(<16 x i8> %a, <16 x i8> %b) { 104; CHECK-LABEL: test_vcaddq_rot270_u8: 105; CHECK: @ %bb.0: @ %entry 106; CHECK-NEXT: vcadd.i8 q0, q0, q1, #270 107; CHECK-NEXT: bx lr 108entry: 109 %0 = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 1, i32 1, <16 x i8> %a, <16 x i8> %b) 110 ret <16 x i8> %0 111} 112 113define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot270_u16(<8 x i16> %a, <8 x i16> %b) { 114; CHECK-LABEL: test_vcaddq_rot270_u16: 115; CHECK: @ %bb.0: @ %entry 116; CHECK-NEXT: vcadd.i16 q0, q0, q1, #270 117; CHECK-NEXT: bx lr 118entry: 119 %0 = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 1, i32 1, <8 x i16> %a, <8 x i16> %b) 120 ret <8 x i16> %0 121} 122 123define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot270_u32(<4 x i32> %a, <4 x i32> %b) { 124; CHECK-LABEL: test_vcaddq_rot270_u32: 125; CHECK: @ %bb.0: @ %entry 126; CHECK-NEXT: vcadd.i32 q2, q0, q1, #270 127; CHECK-NEXT: vmov q0, q2 128; CHECK-NEXT: bx lr 129entry: 130 %0 = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 1, i32 1, <4 x i32> %a, <4 x i32> %b) 131 ret <4 x i32> %0 132} 133 134define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot270_s8(<16 x i8> %a, <16 x i8> %b) { 135; CHECK-LABEL: test_vcaddq_rot270_s8: 136; CHECK: @ %bb.0: @ %entry 137; CHECK-NEXT: vcadd.i8 q0, q0, q1, #270 138; CHECK-NEXT: bx lr 139entry: 140 %0 = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 1, i32 1, <16 x i8> %a, <16 x i8> %b) 141 ret <16 x i8> %0 142} 143 144define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot270_s16(<8 x i16> %a, <8 x i16> %b) { 145; CHECK-LABEL: test_vcaddq_rot270_s16: 146; CHECK: @ %bb.0: @ %entry 147; CHECK-NEXT: vcadd.i16 q0, q0, q1, #270 148; CHECK-NEXT: bx lr 149entry: 150 %0 = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 1, i32 1, <8 x i16> %a, <8 x i16> %b) 151 ret <8 x i16> %0 152} 153 154define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot270_s32(<4 x i32> %a, <4 x i32> %b) { 155; CHECK-LABEL: test_vcaddq_rot270_s32: 156; CHECK: @ %bb.0: @ %entry 157; CHECK-NEXT: vcadd.i32 q2, q0, q1, #270 158; CHECK-NEXT: vmov q0, q2 159; CHECK-NEXT: bx lr 160entry: 161 %0 = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 1, i32 1, <4 x i32> %a, <4 x i32> %b) 162 ret <4 x i32> %0 163} 164 165define arm_aapcs_vfpcc <8 x half> @test_vcaddq_rot270_f16(<8 x half> %a, <8 x half> %b) { 166; CHECK-LABEL: test_vcaddq_rot270_f16: 167; CHECK: @ %bb.0: @ %entry 168; CHECK-NEXT: vcadd.f16 q0, q0, q1, #270 169; CHECK-NEXT: bx lr 170entry: 171 %0 = call <8 x half> @llvm.arm.mve.vcaddq.v8f16(i32 1, i32 1, <8 x half> %a, <8 x half> %b) 172 ret <8 x half> %0 173} 174 175define arm_aapcs_vfpcc <4 x float> @test_vcaddq_rot270_f32(<4 x float> %a, <4 x float> %b) { 176; CHECK-LABEL: test_vcaddq_rot270_f32: 177; CHECK: @ %bb.0: @ %entry 178; CHECK-NEXT: vcadd.f32 q2, q0, q1, #270 179; CHECK-NEXT: vmov q0, q2 180; CHECK-NEXT: bx lr 181entry: 182 %0 = call <4 x float> @llvm.arm.mve.vcaddq.v4f32(i32 1, i32 1, <4 x float> %a, <4 x float> %b) 183 ret <4 x float> %0 184} 185 186define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot90_m_u8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 187; CHECK-LABEL: test_vcaddq_rot90_m_u8: 188; CHECK: @ %bb.0: @ %entry 189; CHECK-NEXT: vmsr p0, r0 190; CHECK-NEXT: vpst 191; CHECK-NEXT: vcaddt.i8 q0, q1, q2, #90 192; CHECK-NEXT: bx lr 193entry: 194 %0 = zext i16 %p to i32 195 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 196 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 0, <16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1) 197 ret <16 x i8> %2 198} 199 200define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot90_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 201; CHECK-LABEL: test_vcaddq_rot90_m_u16: 202; CHECK: @ %bb.0: @ %entry 203; CHECK-NEXT: vmsr p0, r0 204; CHECK-NEXT: vpst 205; CHECK-NEXT: vcaddt.i16 q0, q1, q2, #90 206; CHECK-NEXT: bx lr 207entry: 208 %0 = zext i16 %p to i32 209 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 210 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 0, <8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1) 211 ret <8 x i16> %2 212} 213 214define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot90_m_u32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 215; CHECK-LABEL: test_vcaddq_rot90_m_u32: 216; CHECK: @ %bb.0: @ %entry 217; CHECK-NEXT: vmsr p0, r0 218; CHECK-NEXT: vpst 219; CHECK-NEXT: vcaddt.i32 q0, q1, q2, #90 220; CHECK-NEXT: bx lr 221entry: 222 %0 = zext i16 %p to i32 223 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 224 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 0, <4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1) 225 ret <4 x i32> %2 226} 227 228define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot90_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 229; CHECK-LABEL: test_vcaddq_rot90_m_s8: 230; CHECK: @ %bb.0: @ %entry 231; CHECK-NEXT: vmsr p0, r0 232; CHECK-NEXT: vpst 233; CHECK-NEXT: vcaddt.i8 q0, q1, q2, #90 234; CHECK-NEXT: bx lr 235entry: 236 %0 = zext i16 %p to i32 237 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 238 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 0, <16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1) 239 ret <16 x i8> %2 240} 241 242define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot90_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 243; CHECK-LABEL: test_vcaddq_rot90_m_s16: 244; CHECK: @ %bb.0: @ %entry 245; CHECK-NEXT: vmsr p0, r0 246; CHECK-NEXT: vpst 247; CHECK-NEXT: vcaddt.i16 q0, q1, q2, #90 248; CHECK-NEXT: bx lr 249entry: 250 %0 = zext i16 %p to i32 251 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 252 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 0, <8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1) 253 ret <8 x i16> %2 254} 255 256define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot90_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 257; CHECK-LABEL: test_vcaddq_rot90_m_s32: 258; CHECK: @ %bb.0: @ %entry 259; CHECK-NEXT: vmsr p0, r0 260; CHECK-NEXT: vpst 261; CHECK-NEXT: vcaddt.i32 q0, q1, q2, #90 262; CHECK-NEXT: bx lr 263entry: 264 %0 = zext i16 %p to i32 265 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 266 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 0, <4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1) 267 ret <4 x i32> %2 268} 269 270define arm_aapcs_vfpcc <8 x half> @test_vcaddq_rot90_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) { 271; CHECK-LABEL: test_vcaddq_rot90_m_f16: 272; CHECK: @ %bb.0: @ %entry 273; CHECK-NEXT: vmsr p0, r0 274; CHECK-NEXT: vpst 275; CHECK-NEXT: vcaddt.f16 q0, q1, q2, #90 276; CHECK-NEXT: bx lr 277entry: 278 %0 = zext i16 %p to i32 279 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 280 %2 = call <8 x half> @llvm.arm.mve.vcaddq.predicated.v8f16.v8i1(i32 1, i32 0, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1) 281 ret <8 x half> %2 282} 283 284define arm_aapcs_vfpcc <4 x float> @test_vcaddq_rot90_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) { 285; CHECK-LABEL: test_vcaddq_rot90_m_f32: 286; CHECK: @ %bb.0: @ %entry 287; CHECK-NEXT: vmsr p0, r0 288; CHECK-NEXT: vpst 289; CHECK-NEXT: vcaddt.f32 q0, q1, q2, #90 290; CHECK-NEXT: bx lr 291entry: 292 %0 = zext i16 %p to i32 293 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 294 %2 = call <4 x float> @llvm.arm.mve.vcaddq.predicated.v4f32.v4i1(i32 1, i32 0, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1) 295 ret <4 x float> %2 296} 297 298define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot270_m_u8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 299; CHECK-LABEL: test_vcaddq_rot270_m_u8: 300; CHECK: @ %bb.0: @ %entry 301; CHECK-NEXT: vmsr p0, r0 302; CHECK-NEXT: vpst 303; CHECK-NEXT: vcaddt.i8 q0, q1, q2, #270 304; CHECK-NEXT: bx lr 305entry: 306 %0 = zext i16 %p to i32 307 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 308 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 1, <16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1) 309 ret <16 x i8> %2 310} 311 312define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot270_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 313; CHECK-LABEL: test_vcaddq_rot270_m_u16: 314; CHECK: @ %bb.0: @ %entry 315; CHECK-NEXT: vmsr p0, r0 316; CHECK-NEXT: vpst 317; CHECK-NEXT: vcaddt.i16 q0, q1, q2, #270 318; CHECK-NEXT: bx lr 319entry: 320 %0 = zext i16 %p to i32 321 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 322 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 1, <8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1) 323 ret <8 x i16> %2 324} 325 326define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot270_m_u32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 327; CHECK-LABEL: test_vcaddq_rot270_m_u32: 328; CHECK: @ %bb.0: @ %entry 329; CHECK-NEXT: vmsr p0, r0 330; CHECK-NEXT: vpst 331; CHECK-NEXT: vcaddt.i32 q0, q1, q2, #270 332; CHECK-NEXT: bx lr 333entry: 334 %0 = zext i16 %p to i32 335 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 336 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 1, <4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1) 337 ret <4 x i32> %2 338} 339 340define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot270_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 341; CHECK-LABEL: test_vcaddq_rot270_m_s8: 342; CHECK: @ %bb.0: @ %entry 343; CHECK-NEXT: vmsr p0, r0 344; CHECK-NEXT: vpst 345; CHECK-NEXT: vcaddt.i8 q0, q1, q2, #270 346; CHECK-NEXT: bx lr 347entry: 348 %0 = zext i16 %p to i32 349 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 350 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 1, <16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1) 351 ret <16 x i8> %2 352} 353 354define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot270_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 355; CHECK-LABEL: test_vcaddq_rot270_m_s16: 356; CHECK: @ %bb.0: @ %entry 357; CHECK-NEXT: vmsr p0, r0 358; CHECK-NEXT: vpst 359; CHECK-NEXT: vcaddt.i16 q0, q1, q2, #270 360; CHECK-NEXT: bx lr 361entry: 362 %0 = zext i16 %p to i32 363 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 364 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 1, <8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1) 365 ret <8 x i16> %2 366} 367 368define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot270_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 369; CHECK-LABEL: test_vcaddq_rot270_m_s32: 370; CHECK: @ %bb.0: @ %entry 371; CHECK-NEXT: vmsr p0, r0 372; CHECK-NEXT: vpst 373; CHECK-NEXT: vcaddt.i32 q0, q1, q2, #270 374; CHECK-NEXT: bx lr 375entry: 376 %0 = zext i16 %p to i32 377 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 378 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 1, <4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1) 379 ret <4 x i32> %2 380} 381 382define arm_aapcs_vfpcc <8 x half> @test_vcaddq_rot270_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) { 383; CHECK-LABEL: test_vcaddq_rot270_m_f16: 384; CHECK: @ %bb.0: @ %entry 385; CHECK-NEXT: vmsr p0, r0 386; CHECK-NEXT: vpst 387; CHECK-NEXT: vcaddt.f16 q0, q1, q2, #270 388; CHECK-NEXT: bx lr 389entry: 390 %0 = zext i16 %p to i32 391 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 392 %2 = call <8 x half> @llvm.arm.mve.vcaddq.predicated.v8f16.v8i1(i32 1, i32 1, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1) 393 ret <8 x half> %2 394} 395 396define arm_aapcs_vfpcc <4 x float> @test_vcaddq_rot270_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) { 397; CHECK-LABEL: test_vcaddq_rot270_m_f32: 398; CHECK: @ %bb.0: @ %entry 399; CHECK-NEXT: vmsr p0, r0 400; CHECK-NEXT: vpst 401; CHECK-NEXT: vcaddt.f32 q0, q1, q2, #270 402; CHECK-NEXT: bx lr 403entry: 404 %0 = zext i16 %p to i32 405 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 406 %2 = call <4 x float> @llvm.arm.mve.vcaddq.predicated.v4f32.v4i1(i32 1, i32 1, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1) 407 ret <4 x float> %2 408} 409 410define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot90_x_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 411; CHECK-LABEL: test_vcaddq_rot90_x_u8: 412; CHECK: @ %bb.0: @ %entry 413; CHECK-NEXT: vmsr p0, r0 414; CHECK-NEXT: vpst 415; CHECK-NEXT: vcaddt.i8 q0, q0, q1, #90 416; CHECK-NEXT: bx lr 417entry: 418 %0 = zext i16 %p to i32 419 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 420 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 0, <16 x i8> undef, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1) 421 ret <16 x i8> %2 422} 423 424define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot90_x_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 425; CHECK-LABEL: test_vcaddq_rot90_x_u16: 426; CHECK: @ %bb.0: @ %entry 427; CHECK-NEXT: vmsr p0, r0 428; CHECK-NEXT: vpst 429; CHECK-NEXT: vcaddt.i16 q0, q0, q1, #90 430; CHECK-NEXT: bx lr 431entry: 432 %0 = zext i16 %p to i32 433 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 434 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 0, <8 x i16> undef, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1) 435 ret <8 x i16> %2 436} 437 438define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot90_x_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 439; CHECK-LABEL: test_vcaddq_rot90_x_u32: 440; CHECK: @ %bb.0: @ %entry 441; CHECK-NEXT: vmsr p0, r0 442; CHECK-NEXT: vpst 443; CHECK-NEXT: vcaddt.i32 q2, q0, q1, #90 444; CHECK-NEXT: vmov q0, q2 445; CHECK-NEXT: bx lr 446entry: 447 %0 = zext i16 %p to i32 448 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 449 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 0, <4 x i32> undef, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1) 450 ret <4 x i32> %2 451} 452 453define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot90_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 454; CHECK-LABEL: test_vcaddq_rot90_x_s8: 455; CHECK: @ %bb.0: @ %entry 456; CHECK-NEXT: vmsr p0, r0 457; CHECK-NEXT: vpst 458; CHECK-NEXT: vcaddt.i8 q0, q0, q1, #90 459; CHECK-NEXT: bx lr 460entry: 461 %0 = zext i16 %p to i32 462 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 463 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 0, <16 x i8> undef, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1) 464 ret <16 x i8> %2 465} 466 467define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot90_x_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 468; CHECK-LABEL: test_vcaddq_rot90_x_s16: 469; CHECK: @ %bb.0: @ %entry 470; CHECK-NEXT: vmsr p0, r0 471; CHECK-NEXT: vpst 472; CHECK-NEXT: vcaddt.i16 q0, q0, q1, #90 473; CHECK-NEXT: bx lr 474entry: 475 %0 = zext i16 %p to i32 476 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 477 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 0, <8 x i16> undef, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1) 478 ret <8 x i16> %2 479} 480 481define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot90_x_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 482; CHECK-LABEL: test_vcaddq_rot90_x_s32: 483; CHECK: @ %bb.0: @ %entry 484; CHECK-NEXT: vmsr p0, r0 485; CHECK-NEXT: vpst 486; CHECK-NEXT: vcaddt.i32 q2, q0, q1, #90 487; CHECK-NEXT: vmov q0, q2 488; CHECK-NEXT: bx lr 489entry: 490 %0 = zext i16 %p to i32 491 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 492 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 0, <4 x i32> undef, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1) 493 ret <4 x i32> %2 494} 495 496define arm_aapcs_vfpcc <8 x half> @test_vcaddq_rot90_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) { 497; CHECK-LABEL: test_vcaddq_rot90_x_f16: 498; CHECK: @ %bb.0: @ %entry 499; CHECK-NEXT: vmsr p0, r0 500; CHECK-NEXT: vpst 501; CHECK-NEXT: vcaddt.f16 q0, q0, q1, #90 502; CHECK-NEXT: bx lr 503entry: 504 %0 = zext i16 %p to i32 505 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 506 %2 = call <8 x half> @llvm.arm.mve.vcaddq.predicated.v8f16.v8i1(i32 1, i32 0, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1) 507 ret <8 x half> %2 508} 509 510define arm_aapcs_vfpcc <4 x float> @test_vcaddq_rot90_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) { 511; CHECK-LABEL: test_vcaddq_rot90_x_f32: 512; CHECK: @ %bb.0: @ %entry 513; CHECK-NEXT: vmsr p0, r0 514; CHECK-NEXT: vpst 515; CHECK-NEXT: vcaddt.f32 q2, q0, q1, #90 516; CHECK-NEXT: vmov q0, q2 517; CHECK-NEXT: bx lr 518entry: 519 %0 = zext i16 %p to i32 520 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 521 %2 = call <4 x float> @llvm.arm.mve.vcaddq.predicated.v4f32.v4i1(i32 1, i32 0, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1) 522 ret <4 x float> %2 523} 524 525define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot270_x_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 526; CHECK-LABEL: test_vcaddq_rot270_x_u8: 527; CHECK: @ %bb.0: @ %entry 528; CHECK-NEXT: vmsr p0, r0 529; CHECK-NEXT: vpst 530; CHECK-NEXT: vcaddt.i8 q0, q0, q1, #270 531; CHECK-NEXT: bx lr 532entry: 533 %0 = zext i16 %p to i32 534 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 535 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 1, <16 x i8> undef, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1) 536 ret <16 x i8> %2 537} 538 539define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot270_x_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 540; CHECK-LABEL: test_vcaddq_rot270_x_u16: 541; CHECK: @ %bb.0: @ %entry 542; CHECK-NEXT: vmsr p0, r0 543; CHECK-NEXT: vpst 544; CHECK-NEXT: vcaddt.i16 q0, q0, q1, #270 545; CHECK-NEXT: bx lr 546entry: 547 %0 = zext i16 %p to i32 548 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 549 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 1, <8 x i16> undef, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1) 550 ret <8 x i16> %2 551} 552 553define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot270_x_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 554; CHECK-LABEL: test_vcaddq_rot270_x_u32: 555; CHECK: @ %bb.0: @ %entry 556; CHECK-NEXT: vmsr p0, r0 557; CHECK-NEXT: vpst 558; CHECK-NEXT: vcaddt.i32 q2, q0, q1, #270 559; CHECK-NEXT: vmov q0, q2 560; CHECK-NEXT: bx lr 561entry: 562 %0 = zext i16 %p to i32 563 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 564 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 1, <4 x i32> undef, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1) 565 ret <4 x i32> %2 566} 567 568define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot270_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 569; CHECK-LABEL: test_vcaddq_rot270_x_s8: 570; CHECK: @ %bb.0: @ %entry 571; CHECK-NEXT: vmsr p0, r0 572; CHECK-NEXT: vpst 573; CHECK-NEXT: vcaddt.i8 q0, q0, q1, #270 574; CHECK-NEXT: bx lr 575entry: 576 %0 = zext i16 %p to i32 577 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 578 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 1, <16 x i8> undef, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1) 579 ret <16 x i8> %2 580} 581 582define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot270_x_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 583; CHECK-LABEL: test_vcaddq_rot270_x_s16: 584; CHECK: @ %bb.0: @ %entry 585; CHECK-NEXT: vmsr p0, r0 586; CHECK-NEXT: vpst 587; CHECK-NEXT: vcaddt.i16 q0, q0, q1, #270 588; CHECK-NEXT: bx lr 589entry: 590 %0 = zext i16 %p to i32 591 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 592 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 1, <8 x i16> undef, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1) 593 ret <8 x i16> %2 594} 595 596define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot270_x_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 597; CHECK-LABEL: test_vcaddq_rot270_x_s32: 598; CHECK: @ %bb.0: @ %entry 599; CHECK-NEXT: vmsr p0, r0 600; CHECK-NEXT: vpst 601; CHECK-NEXT: vcaddt.i32 q2, q0, q1, #270 602; CHECK-NEXT: vmov q0, q2 603; CHECK-NEXT: bx lr 604entry: 605 %0 = zext i16 %p to i32 606 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 607 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 1, <4 x i32> undef, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1) 608 ret <4 x i32> %2 609} 610 611define arm_aapcs_vfpcc <8 x half> @test_vcaddq_rot270_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) { 612; CHECK-LABEL: test_vcaddq_rot270_x_f16: 613; CHECK: @ %bb.0: @ %entry 614; CHECK-NEXT: vmsr p0, r0 615; CHECK-NEXT: vpst 616; CHECK-NEXT: vcaddt.f16 q0, q0, q1, #270 617; CHECK-NEXT: bx lr 618entry: 619 %0 = zext i16 %p to i32 620 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 621 %2 = call <8 x half> @llvm.arm.mve.vcaddq.predicated.v8f16.v8i1(i32 1, i32 1, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1) 622 ret <8 x half> %2 623} 624 625define arm_aapcs_vfpcc <4 x float> @test_vcaddq_rot270_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) { 626; CHECK-LABEL: test_vcaddq_rot270_x_f32: 627; CHECK: @ %bb.0: @ %entry 628; CHECK-NEXT: vmsr p0, r0 629; CHECK-NEXT: vpst 630; CHECK-NEXT: vcaddt.f32 q2, q0, q1, #270 631; CHECK-NEXT: vmov q0, q2 632; CHECK-NEXT: bx lr 633entry: 634 %0 = zext i16 %p to i32 635 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 636 %2 = call <4 x float> @llvm.arm.mve.vcaddq.predicated.v4f32.v4i1(i32 1, i32 1, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1) 637 ret <4 x float> %2 638} 639 640define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot90_s8(<16 x i8> %a, <16 x i8> %b) { 641; CHECK-LABEL: test_vhcaddq_rot90_s8: 642; CHECK: @ %bb.0: @ %entry 643; CHECK-NEXT: vhcadd.s8 q0, q0, q1, #90 644; CHECK-NEXT: bx lr 645entry: 646 %0 = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 0, i32 0, <16 x i8> %a, <16 x i8> %b) 647 ret <16 x i8> %0 648} 649 650define arm_aapcs_vfpcc <8 x i16> @test_vhcaddq_rot90_s16(<8 x i16> %a, <8 x i16> %b) { 651; CHECK-LABEL: test_vhcaddq_rot90_s16: 652; CHECK: @ %bb.0: @ %entry 653; CHECK-NEXT: vhcadd.s16 q0, q0, q1, #90 654; CHECK-NEXT: bx lr 655entry: 656 %0 = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 0, i32 0, <8 x i16> %a, <8 x i16> %b) 657 ret <8 x i16> %0 658} 659 660define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot90_s32(<4 x i32> %a, <4 x i32> %b) { 661; CHECK-LABEL: test_vhcaddq_rot90_s32: 662; CHECK: @ %bb.0: @ %entry 663; CHECK-NEXT: vhcadd.s32 q2, q0, q1, #90 664; CHECK-NEXT: vmov q0, q2 665; CHECK-NEXT: bx lr 666entry: 667 %0 = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 0, i32 0, <4 x i32> %a, <4 x i32> %b) 668 ret <4 x i32> %0 669} 670 671define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot270_s8(<16 x i8> %a, <16 x i8> %b) { 672; CHECK-LABEL: test_vhcaddq_rot270_s8: 673; CHECK: @ %bb.0: @ %entry 674; CHECK-NEXT: vhcadd.s8 q0, q0, q1, #270 675; CHECK-NEXT: bx lr 676entry: 677 %0 = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 0, i32 1, <16 x i8> %a, <16 x i8> %b) 678 ret <16 x i8> %0 679} 680 681define arm_aapcs_vfpcc <8 x i16> @test_vhcaddq_rot270_s16(<8 x i16> %a, <8 x i16> %b) { 682; CHECK-LABEL: test_vhcaddq_rot270_s16: 683; CHECK: @ %bb.0: @ %entry 684; CHECK-NEXT: vhcadd.s16 q0, q0, q1, #270 685; CHECK-NEXT: bx lr 686entry: 687 %0 = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 0, i32 1, <8 x i16> %a, <8 x i16> %b) 688 ret <8 x i16> %0 689} 690 691define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot270_s32(<4 x i32> %a, <4 x i32> %b) { 692; CHECK-LABEL: test_vhcaddq_rot270_s32: 693; CHECK: @ %bb.0: @ %entry 694; CHECK-NEXT: vhcadd.s32 q2, q0, q1, #270 695; CHECK-NEXT: vmov q0, q2 696; CHECK-NEXT: bx lr 697entry: 698 %0 = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 0, i32 1, <4 x i32> %a, <4 x i32> %b) 699 ret <4 x i32> %0 700} 701 702define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot90_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 703; CHECK-LABEL: test_vhcaddq_rot90_x_s8: 704; CHECK: @ %bb.0: @ %entry 705; CHECK-NEXT: vmsr p0, r0 706; CHECK-NEXT: vpst 707; CHECK-NEXT: vhcaddt.s8 q0, q0, q1, #90 708; CHECK-NEXT: bx lr 709entry: 710 %0 = zext i16 %p to i32 711 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 712 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 0, i32 0, <16 x i8> undef, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1) 713 ret <16 x i8> %2 714} 715 716define arm_aapcs_vfpcc <8 x i16> @test_vhcaddq_rot90_x_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 717; CHECK-LABEL: test_vhcaddq_rot90_x_s16: 718; CHECK: @ %bb.0: @ %entry 719; CHECK-NEXT: vmsr p0, r0 720; CHECK-NEXT: vpst 721; CHECK-NEXT: vhcaddt.s16 q0, q0, q1, #90 722; CHECK-NEXT: bx lr 723entry: 724 %0 = zext i16 %p to i32 725 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 726 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 0, i32 0, <8 x i16> undef, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1) 727 ret <8 x i16> %2 728} 729 730define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot90_x_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 731; CHECK-LABEL: test_vhcaddq_rot90_x_s32: 732; CHECK: @ %bb.0: @ %entry 733; CHECK-NEXT: vmsr p0, r0 734; CHECK-NEXT: vpst 735; CHECK-NEXT: vhcaddt.s32 q2, q0, q1, #90 736; CHECK-NEXT: vmov q0, q2 737; CHECK-NEXT: bx lr 738entry: 739 %0 = zext i16 %p to i32 740 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 741 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 0, i32 0, <4 x i32> undef, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1) 742 ret <4 x i32> %2 743} 744 745define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot270_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 746; CHECK-LABEL: test_vhcaddq_rot270_x_s8: 747; CHECK: @ %bb.0: @ %entry 748; CHECK-NEXT: vmsr p0, r0 749; CHECK-NEXT: vpst 750; CHECK-NEXT: vhcaddt.s8 q0, q0, q1, #270 751; CHECK-NEXT: bx lr 752entry: 753 %0 = zext i16 %p to i32 754 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 755 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 0, i32 1, <16 x i8> undef, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1) 756 ret <16 x i8> %2 757} 758 759define arm_aapcs_vfpcc <8 x i16> @test_vhcaddq_rot270_x_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 760; CHECK-LABEL: test_vhcaddq_rot270_x_s16: 761; CHECK: @ %bb.0: @ %entry 762; CHECK-NEXT: vmsr p0, r0 763; CHECK-NEXT: vpst 764; CHECK-NEXT: vhcaddt.s16 q0, q0, q1, #270 765; CHECK-NEXT: bx lr 766entry: 767 %0 = zext i16 %p to i32 768 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 769 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 0, i32 1, <8 x i16> undef, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1) 770 ret <8 x i16> %2 771} 772 773define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot270_x_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 774; CHECK-LABEL: test_vhcaddq_rot270_x_s32: 775; CHECK: @ %bb.0: @ %entry 776; CHECK-NEXT: vmsr p0, r0 777; CHECK-NEXT: vpst 778; CHECK-NEXT: vhcaddt.s32 q2, q0, q1, #270 779; CHECK-NEXT: vmov q0, q2 780; CHECK-NEXT: bx lr 781entry: 782 %0 = zext i16 %p to i32 783 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 784 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 0, i32 1, <4 x i32> undef, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1) 785 ret <4 x i32> %2 786} 787 788define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot90_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 789; CHECK-LABEL: test_vhcaddq_rot90_m_s8: 790; CHECK: @ %bb.0: @ %entry 791; CHECK-NEXT: vmsr p0, r0 792; CHECK-NEXT: vpst 793; CHECK-NEXT: vhcaddt.s8 q0, q1, q2, #90 794; CHECK-NEXT: bx lr 795entry: 796 %0 = zext i16 %p to i32 797 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 798 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 0, i32 0, <16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1) 799 ret <16 x i8> %2 800} 801 802define arm_aapcs_vfpcc <8 x i16> @test_vhcaddq_rot90_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 803; CHECK-LABEL: test_vhcaddq_rot90_m_s16: 804; CHECK: @ %bb.0: @ %entry 805; CHECK-NEXT: vmsr p0, r0 806; CHECK-NEXT: vpst 807; CHECK-NEXT: vhcaddt.s16 q0, q1, q2, #90 808; CHECK-NEXT: bx lr 809entry: 810 %0 = zext i16 %p to i32 811 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 812 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 0, i32 0, <8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1) 813 ret <8 x i16> %2 814} 815 816define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot90_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 817; CHECK-LABEL: test_vhcaddq_rot90_m_s32: 818; CHECK: @ %bb.0: @ %entry 819; CHECK-NEXT: vmsr p0, r0 820; CHECK-NEXT: vpst 821; CHECK-NEXT: vhcaddt.s32 q0, q1, q2, #90 822; CHECK-NEXT: bx lr 823entry: 824 %0 = zext i16 %p to i32 825 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 826 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 0, i32 0, <4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1) 827 ret <4 x i32> %2 828} 829 830define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot270_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 831; CHECK-LABEL: test_vhcaddq_rot270_m_s8: 832; CHECK: @ %bb.0: @ %entry 833; CHECK-NEXT: vmsr p0, r0 834; CHECK-NEXT: vpst 835; CHECK-NEXT: vhcaddt.s8 q0, q1, q2, #270 836; CHECK-NEXT: bx lr 837entry: 838 %0 = zext i16 %p to i32 839 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 840 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 0, i32 1, <16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1) 841 ret <16 x i8> %2 842} 843 844define arm_aapcs_vfpcc <8 x i16> @test_vhcaddq_rot270_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 845; CHECK-LABEL: test_vhcaddq_rot270_m_s16: 846; CHECK: @ %bb.0: @ %entry 847; CHECK-NEXT: vmsr p0, r0 848; CHECK-NEXT: vpst 849; CHECK-NEXT: vhcaddt.s16 q0, q1, q2, #270 850; CHECK-NEXT: bx lr 851entry: 852 %0 = zext i16 %p to i32 853 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 854 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 0, i32 1, <8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1) 855 ret <8 x i16> %2 856} 857 858define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot270_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 859; CHECK-LABEL: test_vhcaddq_rot270_m_s32: 860; CHECK: @ %bb.0: @ %entry 861; CHECK-NEXT: vmsr p0, r0 862; CHECK-NEXT: vpst 863; CHECK-NEXT: vhcaddt.s32 q0, q1, q2, #270 864; CHECK-NEXT: bx lr 865entry: 866 %0 = zext i16 %p to i32 867 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 868 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 0, i32 1, <4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1) 869 ret <4 x i32> %2 870} 871