1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s 3 4define arm_aapcs_vfpcc <16 x i8> @test_vshlq_s8(<16 x i8> %a, <16 x i8> %b) { 5; CHECK-LABEL: test_vshlq_s8: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vshl.s8 q0, q0, q1 8; CHECK-NEXT: bx lr 9entry: 10 %0 = call <16 x i8> @llvm.arm.mve.vshl.vector.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, i32 0, i32 0, i32 0) 11 ret <16 x i8> %0 12} 13 14define arm_aapcs_vfpcc <8 x i16> @test_vshlq_s16(<8 x i16> %a, <8 x i16> %b) { 15; CHECK-LABEL: test_vshlq_s16: 16; CHECK: @ %bb.0: @ %entry 17; CHECK-NEXT: vshl.s16 q0, q0, q1 18; CHECK-NEXT: bx lr 19entry: 20 %0 = call <8 x i16> @llvm.arm.mve.vshl.vector.v8i16.v8i16(<8 x i16> %a, <8 x i16> %b, i32 0, i32 0, i32 0) 21 ret <8 x i16> %0 22} 23 24define arm_aapcs_vfpcc <4 x i32> @test_vshlq_s32(<4 x i32> %a, <4 x i32> %b) { 25; CHECK-LABEL: test_vshlq_s32: 26; CHECK: @ %bb.0: @ %entry 27; CHECK-NEXT: vshl.s32 q0, q0, q1 28; CHECK-NEXT: bx lr 29entry: 30 %0 = call <4 x i32> @llvm.arm.mve.vshl.vector.v4i32.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0, i32 0, i32 0) 31 ret <4 x i32> %0 32} 33 34define arm_aapcs_vfpcc <16 x i8> @test_vshlq_u8(<16 x i8> %a, <16 x i8> %b) { 35; CHECK-LABEL: test_vshlq_u8: 36; CHECK: @ %bb.0: @ %entry 37; CHECK-NEXT: vshl.u8 q0, q0, q1 38; CHECK-NEXT: bx lr 39entry: 40 %0 = call <16 x i8> @llvm.arm.mve.vshl.vector.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, i32 0, i32 0, i32 1) 41 ret <16 x i8> %0 42} 43 44define arm_aapcs_vfpcc <8 x i16> @test_vshlq_u16(<8 x i16> %a, <8 x i16> %b) { 45; CHECK-LABEL: test_vshlq_u16: 46; CHECK: @ %bb.0: @ %entry 47; CHECK-NEXT: vshl.u16 q0, q0, q1 48; CHECK-NEXT: bx lr 49entry: 50 %0 = call <8 x i16> @llvm.arm.mve.vshl.vector.v8i16.v8i16(<8 x i16> %a, <8 x i16> %b, i32 0, i32 0, i32 1) 51 ret <8 x i16> %0 52} 53 54define arm_aapcs_vfpcc <4 x i32> @test_vshlq_u32(<4 x i32> %a, <4 x i32> %b) { 55; CHECK-LABEL: test_vshlq_u32: 56; CHECK: @ %bb.0: @ %entry 57; CHECK-NEXT: vshl.u32 q0, q0, q1 58; CHECK-NEXT: bx lr 59entry: 60 %0 = call <4 x i32> @llvm.arm.mve.vshl.vector.v4i32.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0, i32 0, i32 1) 61 ret <4 x i32> %0 62} 63 64define arm_aapcs_vfpcc <16 x i8> @test_vshlq_r_s8(<16 x i8> %a, i32 %b) { 65; CHECK-LABEL: test_vshlq_r_s8: 66; CHECK: @ %bb.0: @ %entry 67; CHECK-NEXT: vshl.s8 q0, r0 68; CHECK-NEXT: bx lr 69entry: 70 %0 = call <16 x i8> @llvm.arm.mve.vshl.scalar.v16i8(<16 x i8> %a, i32 %b, i32 0, i32 0, i32 0) 71 ret <16 x i8> %0 72} 73 74define arm_aapcs_vfpcc <8 x i16> @test_vshlq_r_s16(<8 x i16> %a, i32 %b) { 75; CHECK-LABEL: test_vshlq_r_s16: 76; CHECK: @ %bb.0: @ %entry 77; CHECK-NEXT: vshl.s16 q0, r0 78; CHECK-NEXT: bx lr 79entry: 80 %0 = call <8 x i16> @llvm.arm.mve.vshl.scalar.v8i16(<8 x i16> %a, i32 %b, i32 0, i32 0, i32 0) 81 ret <8 x i16> %0 82} 83 84define arm_aapcs_vfpcc <4 x i32> @test_vshlq_r_s32(<4 x i32> %a, i32 %b) { 85; CHECK-LABEL: test_vshlq_r_s32: 86; CHECK: @ %bb.0: @ %entry 87; CHECK-NEXT: vshl.s32 q0, r0 88; CHECK-NEXT: bx lr 89entry: 90 %0 = call <4 x i32> @llvm.arm.mve.vshl.scalar.v4i32(<4 x i32> %a, i32 %b, i32 0, i32 0, i32 0) 91 ret <4 x i32> %0 92} 93 94define arm_aapcs_vfpcc <16 x i8> @test_vshlq_r_u8(<16 x i8> %a, i32 %b) { 95; CHECK-LABEL: test_vshlq_r_u8: 96; CHECK: @ %bb.0: @ %entry 97; CHECK-NEXT: vshl.u8 q0, r0 98; CHECK-NEXT: bx lr 99entry: 100 %0 = call <16 x i8> @llvm.arm.mve.vshl.scalar.v16i8(<16 x i8> %a, i32 %b, i32 0, i32 0, i32 1) 101 ret <16 x i8> %0 102} 103 104define arm_aapcs_vfpcc <8 x i16> @test_vshlq_r_u16(<8 x i16> %a, i32 %b) { 105; CHECK-LABEL: test_vshlq_r_u16: 106; CHECK: @ %bb.0: @ %entry 107; CHECK-NEXT: vshl.u16 q0, r0 108; CHECK-NEXT: bx lr 109entry: 110 %0 = call <8 x i16> @llvm.arm.mve.vshl.scalar.v8i16(<8 x i16> %a, i32 %b, i32 0, i32 0, i32 1) 111 ret <8 x i16> %0 112} 113 114define arm_aapcs_vfpcc <4 x i32> @test_vshlq_r_u32(<4 x i32> %a, i32 %b) { 115; CHECK-LABEL: test_vshlq_r_u32: 116; CHECK: @ %bb.0: @ %entry 117; CHECK-NEXT: vshl.u32 q0, r0 118; CHECK-NEXT: bx lr 119entry: 120 %0 = call <4 x i32> @llvm.arm.mve.vshl.scalar.v4i32(<4 x i32> %a, i32 %b, i32 0, i32 0, i32 1) 121 ret <4 x i32> %0 122} 123 124define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_s8(<16 x i8> %a, <16 x i8> %b) { 125; CHECK-LABEL: test_vqshlq_s8: 126; CHECK: @ %bb.0: @ %entry 127; CHECK-NEXT: vqshl.s8 q0, q0, q1 128; CHECK-NEXT: bx lr 129entry: 130 %0 = call <16 x i8> @llvm.arm.mve.vshl.vector.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, i32 1, i32 0, i32 0) 131 ret <16 x i8> %0 132} 133 134define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_s16(<8 x i16> %a, <8 x i16> %b) { 135; CHECK-LABEL: test_vqshlq_s16: 136; CHECK: @ %bb.0: @ %entry 137; CHECK-NEXT: vqshl.s16 q0, q0, q1 138; CHECK-NEXT: bx lr 139entry: 140 %0 = call <8 x i16> @llvm.arm.mve.vshl.vector.v8i16.v8i16(<8 x i16> %a, <8 x i16> %b, i32 1, i32 0, i32 0) 141 ret <8 x i16> %0 142} 143 144define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_s32(<4 x i32> %a, <4 x i32> %b) { 145; CHECK-LABEL: test_vqshlq_s32: 146; CHECK: @ %bb.0: @ %entry 147; CHECK-NEXT: vqshl.s32 q0, q0, q1 148; CHECK-NEXT: bx lr 149entry: 150 %0 = call <4 x i32> @llvm.arm.mve.vshl.vector.v4i32.v4i32(<4 x i32> %a, <4 x i32> %b, i32 1, i32 0, i32 0) 151 ret <4 x i32> %0 152} 153 154define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_u8(<16 x i8> %a, <16 x i8> %b) { 155; CHECK-LABEL: test_vqshlq_u8: 156; CHECK: @ %bb.0: @ %entry 157; CHECK-NEXT: vqshl.u8 q0, q0, q1 158; CHECK-NEXT: bx lr 159entry: 160 %0 = call <16 x i8> @llvm.arm.mve.vshl.vector.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, i32 1, i32 0, i32 1) 161 ret <16 x i8> %0 162} 163 164define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_u16(<8 x i16> %a, <8 x i16> %b) { 165; CHECK-LABEL: test_vqshlq_u16: 166; CHECK: @ %bb.0: @ %entry 167; CHECK-NEXT: vqshl.u16 q0, q0, q1 168; CHECK-NEXT: bx lr 169entry: 170 %0 = call <8 x i16> @llvm.arm.mve.vshl.vector.v8i16.v8i16(<8 x i16> %a, <8 x i16> %b, i32 1, i32 0, i32 1) 171 ret <8 x i16> %0 172} 173 174define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_u32(<4 x i32> %a, <4 x i32> %b) { 175; CHECK-LABEL: test_vqshlq_u32: 176; CHECK: @ %bb.0: @ %entry 177; CHECK-NEXT: vqshl.u32 q0, q0, q1 178; CHECK-NEXT: bx lr 179entry: 180 %0 = call <4 x i32> @llvm.arm.mve.vshl.vector.v4i32.v4i32(<4 x i32> %a, <4 x i32> %b, i32 1, i32 0, i32 1) 181 ret <4 x i32> %0 182} 183 184define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_r_s8(<16 x i8> %a, i32 %b) { 185; CHECK-LABEL: test_vqshlq_r_s8: 186; CHECK: @ %bb.0: @ %entry 187; CHECK-NEXT: vqshl.s8 q0, r0 188; CHECK-NEXT: bx lr 189entry: 190 %0 = call <16 x i8> @llvm.arm.mve.vshl.scalar.v16i8(<16 x i8> %a, i32 %b, i32 1, i32 0, i32 0) 191 ret <16 x i8> %0 192} 193 194define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_r_s16(<8 x i16> %a, i32 %b) { 195; CHECK-LABEL: test_vqshlq_r_s16: 196; CHECK: @ %bb.0: @ %entry 197; CHECK-NEXT: vqshl.s16 q0, r0 198; CHECK-NEXT: bx lr 199entry: 200 %0 = call <8 x i16> @llvm.arm.mve.vshl.scalar.v8i16(<8 x i16> %a, i32 %b, i32 1, i32 0, i32 0) 201 ret <8 x i16> %0 202} 203 204define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_r_s32(<4 x i32> %a, i32 %b) { 205; CHECK-LABEL: test_vqshlq_r_s32: 206; CHECK: @ %bb.0: @ %entry 207; CHECK-NEXT: vqshl.s32 q0, r0 208; CHECK-NEXT: bx lr 209entry: 210 %0 = call <4 x i32> @llvm.arm.mve.vshl.scalar.v4i32(<4 x i32> %a, i32 %b, i32 1, i32 0, i32 0) 211 ret <4 x i32> %0 212} 213 214define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_r_u8(<16 x i8> %a, i32 %b) { 215; CHECK-LABEL: test_vqshlq_r_u8: 216; CHECK: @ %bb.0: @ %entry 217; CHECK-NEXT: vqshl.u8 q0, r0 218; CHECK-NEXT: bx lr 219entry: 220 %0 = call <16 x i8> @llvm.arm.mve.vshl.scalar.v16i8(<16 x i8> %a, i32 %b, i32 1, i32 0, i32 1) 221 ret <16 x i8> %0 222} 223 224define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_r_u16(<8 x i16> %a, i32 %b) { 225; CHECK-LABEL: test_vqshlq_r_u16: 226; CHECK: @ %bb.0: @ %entry 227; CHECK-NEXT: vqshl.u16 q0, r0 228; CHECK-NEXT: bx lr 229entry: 230 %0 = call <8 x i16> @llvm.arm.mve.vshl.scalar.v8i16(<8 x i16> %a, i32 %b, i32 1, i32 0, i32 1) 231 ret <8 x i16> %0 232} 233 234define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_r_u32(<4 x i32> %a, i32 %b) { 235; CHECK-LABEL: test_vqshlq_r_u32: 236; CHECK: @ %bb.0: @ %entry 237; CHECK-NEXT: vqshl.u32 q0, r0 238; CHECK-NEXT: bx lr 239entry: 240 %0 = call <4 x i32> @llvm.arm.mve.vshl.scalar.v4i32(<4 x i32> %a, i32 %b, i32 1, i32 0, i32 1) 241 ret <4 x i32> %0 242} 243 244define arm_aapcs_vfpcc <16 x i8> @test_vrshlq_s8(<16 x i8> %a, <16 x i8> %b) { 245; CHECK-LABEL: test_vrshlq_s8: 246; CHECK: @ %bb.0: @ %entry 247; CHECK-NEXT: vrshl.s8 q0, q0, q1 248; CHECK-NEXT: bx lr 249entry: 250 %0 = call <16 x i8> @llvm.arm.mve.vshl.vector.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, i32 0, i32 1, i32 0) 251 ret <16 x i8> %0 252} 253 254define arm_aapcs_vfpcc <8 x i16> @test_vrshlq_s16(<8 x i16> %a, <8 x i16> %b) { 255; CHECK-LABEL: test_vrshlq_s16: 256; CHECK: @ %bb.0: @ %entry 257; CHECK-NEXT: vrshl.s16 q0, q0, q1 258; CHECK-NEXT: bx lr 259entry: 260 %0 = call <8 x i16> @llvm.arm.mve.vshl.vector.v8i16.v8i16(<8 x i16> %a, <8 x i16> %b, i32 0, i32 1, i32 0) 261 ret <8 x i16> %0 262} 263 264define arm_aapcs_vfpcc <4 x i32> @test_vrshlq_s32(<4 x i32> %a, <4 x i32> %b) { 265; CHECK-LABEL: test_vrshlq_s32: 266; CHECK: @ %bb.0: @ %entry 267; CHECK-NEXT: vrshl.s32 q0, q0, q1 268; CHECK-NEXT: bx lr 269entry: 270 %0 = call <4 x i32> @llvm.arm.mve.vshl.vector.v4i32.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0, i32 1, i32 0) 271 ret <4 x i32> %0 272} 273 274define arm_aapcs_vfpcc <16 x i8> @test_vrshlq_u8(<16 x i8> %a, <16 x i8> %b) { 275; CHECK-LABEL: test_vrshlq_u8: 276; CHECK: @ %bb.0: @ %entry 277; CHECK-NEXT: vrshl.u8 q0, q0, q1 278; CHECK-NEXT: bx lr 279entry: 280 %0 = call <16 x i8> @llvm.arm.mve.vshl.vector.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, i32 0, i32 1, i32 1) 281 ret <16 x i8> %0 282} 283 284define arm_aapcs_vfpcc <8 x i16> @test_vrshlq_u16(<8 x i16> %a, <8 x i16> %b) { 285; CHECK-LABEL: test_vrshlq_u16: 286; CHECK: @ %bb.0: @ %entry 287; CHECK-NEXT: vrshl.u16 q0, q0, q1 288; CHECK-NEXT: bx lr 289entry: 290 %0 = call <8 x i16> @llvm.arm.mve.vshl.vector.v8i16.v8i16(<8 x i16> %a, <8 x i16> %b, i32 0, i32 1, i32 1) 291 ret <8 x i16> %0 292} 293 294define arm_aapcs_vfpcc <4 x i32> @test_vrshlq_u32(<4 x i32> %a, <4 x i32> %b) { 295; CHECK-LABEL: test_vrshlq_u32: 296; CHECK: @ %bb.0: @ %entry 297; CHECK-NEXT: vrshl.u32 q0, q0, q1 298; CHECK-NEXT: bx lr 299entry: 300 %0 = call <4 x i32> @llvm.arm.mve.vshl.vector.v4i32.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0, i32 1, i32 1) 301 ret <4 x i32> %0 302} 303 304define arm_aapcs_vfpcc <16 x i8> @test_vrshlq_n_s8(<16 x i8> %a, i32 %b) { 305; CHECK-LABEL: test_vrshlq_n_s8: 306; CHECK: @ %bb.0: @ %entry 307; CHECK-NEXT: vrshl.s8 q0, r0 308; CHECK-NEXT: bx lr 309entry: 310 %0 = call <16 x i8> @llvm.arm.mve.vshl.scalar.v16i8(<16 x i8> %a, i32 %b, i32 0, i32 1, i32 0) 311 ret <16 x i8> %0 312} 313 314define arm_aapcs_vfpcc <8 x i16> @test_vrshlq_n_s16(<8 x i16> %a, i32 %b) { 315; CHECK-LABEL: test_vrshlq_n_s16: 316; CHECK: @ %bb.0: @ %entry 317; CHECK-NEXT: vrshl.s16 q0, r0 318; CHECK-NEXT: bx lr 319entry: 320 %0 = call <8 x i16> @llvm.arm.mve.vshl.scalar.v8i16(<8 x i16> %a, i32 %b, i32 0, i32 1, i32 0) 321 ret <8 x i16> %0 322} 323 324define arm_aapcs_vfpcc <4 x i32> @test_vrshlq_n_s32(<4 x i32> %a, i32 %b) { 325; CHECK-LABEL: test_vrshlq_n_s32: 326; CHECK: @ %bb.0: @ %entry 327; CHECK-NEXT: vrshl.s32 q0, r0 328; CHECK-NEXT: bx lr 329entry: 330 %0 = call <4 x i32> @llvm.arm.mve.vshl.scalar.v4i32(<4 x i32> %a, i32 %b, i32 0, i32 1, i32 0) 331 ret <4 x i32> %0 332} 333 334define arm_aapcs_vfpcc <16 x i8> @test_vrshlq_n_u8(<16 x i8> %a, i32 %b) { 335; CHECK-LABEL: test_vrshlq_n_u8: 336; CHECK: @ %bb.0: @ %entry 337; CHECK-NEXT: vrshl.u8 q0, r0 338; CHECK-NEXT: bx lr 339entry: 340 %0 = call <16 x i8> @llvm.arm.mve.vshl.scalar.v16i8(<16 x i8> %a, i32 %b, i32 0, i32 1, i32 1) 341 ret <16 x i8> %0 342} 343 344define arm_aapcs_vfpcc <8 x i16> @test_vrshlq_n_u16(<8 x i16> %a, i32 %b) { 345; CHECK-LABEL: test_vrshlq_n_u16: 346; CHECK: @ %bb.0: @ %entry 347; CHECK-NEXT: vrshl.u16 q0, r0 348; CHECK-NEXT: bx lr 349entry: 350 %0 = call <8 x i16> @llvm.arm.mve.vshl.scalar.v8i16(<8 x i16> %a, i32 %b, i32 0, i32 1, i32 1) 351 ret <8 x i16> %0 352} 353 354define arm_aapcs_vfpcc <4 x i32> @test_vrshlq_n_u32(<4 x i32> %a, i32 %b) { 355; CHECK-LABEL: test_vrshlq_n_u32: 356; CHECK: @ %bb.0: @ %entry 357; CHECK-NEXT: vrshl.u32 q0, r0 358; CHECK-NEXT: bx lr 359entry: 360 %0 = call <4 x i32> @llvm.arm.mve.vshl.scalar.v4i32(<4 x i32> %a, i32 %b, i32 0, i32 1, i32 1) 361 ret <4 x i32> %0 362} 363 364define arm_aapcs_vfpcc <16 x i8> @test_vqrshlq_s8(<16 x i8> %a, <16 x i8> %b) { 365; CHECK-LABEL: test_vqrshlq_s8: 366; CHECK: @ %bb.0: @ %entry 367; CHECK-NEXT: vqrshl.s8 q0, q0, q1 368; CHECK-NEXT: bx lr 369entry: 370 %0 = call <16 x i8> @llvm.arm.mve.vshl.vector.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, i32 1, i32 1, i32 0) 371 ret <16 x i8> %0 372} 373 374define arm_aapcs_vfpcc <8 x i16> @test_vqrshlq_s16(<8 x i16> %a, <8 x i16> %b) { 375; CHECK-LABEL: test_vqrshlq_s16: 376; CHECK: @ %bb.0: @ %entry 377; CHECK-NEXT: vqrshl.s16 q0, q0, q1 378; CHECK-NEXT: bx lr 379entry: 380 %0 = call <8 x i16> @llvm.arm.mve.vshl.vector.v8i16.v8i16(<8 x i16> %a, <8 x i16> %b, i32 1, i32 1, i32 0) 381 ret <8 x i16> %0 382} 383 384define arm_aapcs_vfpcc <4 x i32> @test_vqrshlq_s32(<4 x i32> %a, <4 x i32> %b) { 385; CHECK-LABEL: test_vqrshlq_s32: 386; CHECK: @ %bb.0: @ %entry 387; CHECK-NEXT: vqrshl.s32 q0, q0, q1 388; CHECK-NEXT: bx lr 389entry: 390 %0 = call <4 x i32> @llvm.arm.mve.vshl.vector.v4i32.v4i32(<4 x i32> %a, <4 x i32> %b, i32 1, i32 1, i32 0) 391 ret <4 x i32> %0 392} 393 394define arm_aapcs_vfpcc <16 x i8> @test_vqrshlq_u8(<16 x i8> %a, <16 x i8> %b) { 395; CHECK-LABEL: test_vqrshlq_u8: 396; CHECK: @ %bb.0: @ %entry 397; CHECK-NEXT: vqrshl.u8 q0, q0, q1 398; CHECK-NEXT: bx lr 399entry: 400 %0 = call <16 x i8> @llvm.arm.mve.vshl.vector.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, i32 1, i32 1, i32 1) 401 ret <16 x i8> %0 402} 403 404define arm_aapcs_vfpcc <8 x i16> @test_vqrshlq_u16(<8 x i16> %a, <8 x i16> %b) { 405; CHECK-LABEL: test_vqrshlq_u16: 406; CHECK: @ %bb.0: @ %entry 407; CHECK-NEXT: vqrshl.u16 q0, q0, q1 408; CHECK-NEXT: bx lr 409entry: 410 %0 = call <8 x i16> @llvm.arm.mve.vshl.vector.v8i16.v8i16(<8 x i16> %a, <8 x i16> %b, i32 1, i32 1, i32 1) 411 ret <8 x i16> %0 412} 413 414define arm_aapcs_vfpcc <4 x i32> @test_vqrshlq_u32(<4 x i32> %a, <4 x i32> %b) { 415; CHECK-LABEL: test_vqrshlq_u32: 416; CHECK: @ %bb.0: @ %entry 417; CHECK-NEXT: vqrshl.u32 q0, q0, q1 418; CHECK-NEXT: bx lr 419entry: 420 %0 = call <4 x i32> @llvm.arm.mve.vshl.vector.v4i32.v4i32(<4 x i32> %a, <4 x i32> %b, i32 1, i32 1, i32 1) 421 ret <4 x i32> %0 422} 423 424define arm_aapcs_vfpcc <16 x i8> @test_vqrshlq_n_s8(<16 x i8> %a, i32 %b) { 425; CHECK-LABEL: test_vqrshlq_n_s8: 426; CHECK: @ %bb.0: @ %entry 427; CHECK-NEXT: vqrshl.s8 q0, r0 428; CHECK-NEXT: bx lr 429entry: 430 %0 = call <16 x i8> @llvm.arm.mve.vshl.scalar.v16i8(<16 x i8> %a, i32 %b, i32 1, i32 1, i32 0) 431 ret <16 x i8> %0 432} 433 434define arm_aapcs_vfpcc <8 x i16> @test_vqrshlq_n_s16(<8 x i16> %a, i32 %b) { 435; CHECK-LABEL: test_vqrshlq_n_s16: 436; CHECK: @ %bb.0: @ %entry 437; CHECK-NEXT: vqrshl.s16 q0, r0 438; CHECK-NEXT: bx lr 439entry: 440 %0 = call <8 x i16> @llvm.arm.mve.vshl.scalar.v8i16(<8 x i16> %a, i32 %b, i32 1, i32 1, i32 0) 441 ret <8 x i16> %0 442} 443 444define arm_aapcs_vfpcc <4 x i32> @test_vqrshlq_n_s32(<4 x i32> %a, i32 %b) { 445; CHECK-LABEL: test_vqrshlq_n_s32: 446; CHECK: @ %bb.0: @ %entry 447; CHECK-NEXT: vqrshl.s32 q0, r0 448; CHECK-NEXT: bx lr 449entry: 450 %0 = call <4 x i32> @llvm.arm.mve.vshl.scalar.v4i32(<4 x i32> %a, i32 %b, i32 1, i32 1, i32 0) 451 ret <4 x i32> %0 452} 453 454define arm_aapcs_vfpcc <16 x i8> @test_vqrshlq_n_u8(<16 x i8> %a, i32 %b) { 455; CHECK-LABEL: test_vqrshlq_n_u8: 456; CHECK: @ %bb.0: @ %entry 457; CHECK-NEXT: vqrshl.u8 q0, r0 458; CHECK-NEXT: bx lr 459entry: 460 %0 = call <16 x i8> @llvm.arm.mve.vshl.scalar.v16i8(<16 x i8> %a, i32 %b, i32 1, i32 1, i32 1) 461 ret <16 x i8> %0 462} 463 464define arm_aapcs_vfpcc <8 x i16> @test_vqrshlq_n_u16(<8 x i16> %a, i32 %b) { 465; CHECK-LABEL: test_vqrshlq_n_u16: 466; CHECK: @ %bb.0: @ %entry 467; CHECK-NEXT: vqrshl.u16 q0, r0 468; CHECK-NEXT: bx lr 469entry: 470 %0 = call <8 x i16> @llvm.arm.mve.vshl.scalar.v8i16(<8 x i16> %a, i32 %b, i32 1, i32 1, i32 1) 471 ret <8 x i16> %0 472} 473 474define arm_aapcs_vfpcc <4 x i32> @test_vqrshlq_n_u32(<4 x i32> %a, i32 %b) { 475; CHECK-LABEL: test_vqrshlq_n_u32: 476; CHECK: @ %bb.0: @ %entry 477; CHECK-NEXT: vqrshl.u32 q0, r0 478; CHECK-NEXT: bx lr 479entry: 480 %0 = call <4 x i32> @llvm.arm.mve.vshl.scalar.v4i32(<4 x i32> %a, i32 %b, i32 1, i32 1, i32 1) 481 ret <4 x i32> %0 482} 483 484define arm_aapcs_vfpcc <16 x i8> @test_vshlq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 485; CHECK-LABEL: test_vshlq_m_s8: 486; CHECK: @ %bb.0: @ %entry 487; CHECK-NEXT: vmsr p0, r0 488; CHECK-NEXT: vpst 489; CHECK-NEXT: vshlt.s8 q0, q1, q2 490; CHECK-NEXT: bx lr 491entry: 492 %0 = zext i16 %p to i32 493 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 494 %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, i32 0, i32 0, <16 x i1> %1, <16 x i8> %inactive) 495 ret <16 x i8> %2 496} 497 498define arm_aapcs_vfpcc <8 x i16> @test_vshlq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 499; CHECK-LABEL: test_vshlq_m_s16: 500; CHECK: @ %bb.0: @ %entry 501; CHECK-NEXT: vmsr p0, r0 502; CHECK-NEXT: vpst 503; CHECK-NEXT: vshlt.s16 q0, q1, q2 504; CHECK-NEXT: bx lr 505entry: 506 %0 = zext i16 %p to i32 507 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 508 %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 0, i32 0, i32 0, <8 x i1> %1, <8 x i16> %inactive) 509 ret <8 x i16> %2 510} 511 512define arm_aapcs_vfpcc <4 x i32> @test_vshlq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 513; CHECK-LABEL: test_vshlq_m_s32: 514; CHECK: @ %bb.0: @ %entry 515; CHECK-NEXT: vmsr p0, r0 516; CHECK-NEXT: vpst 517; CHECK-NEXT: vshlt.s32 q0, q1, q2 518; CHECK-NEXT: bx lr 519entry: 520 %0 = zext i16 %p to i32 521 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 522 %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, i32 0, i32 0, <4 x i1> %1, <4 x i32> %inactive) 523 ret <4 x i32> %2 524} 525 526define arm_aapcs_vfpcc <16 x i8> @test_vshlq_m_u8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 527; CHECK-LABEL: test_vshlq_m_u8: 528; CHECK: @ %bb.0: @ %entry 529; CHECK-NEXT: vmsr p0, r0 530; CHECK-NEXT: vpst 531; CHECK-NEXT: vshlt.u8 q0, q1, q2 532; CHECK-NEXT: bx lr 533entry: 534 %0 = zext i16 %p to i32 535 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 536 %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, i32 0, i32 1, <16 x i1> %1, <16 x i8> %inactive) 537 ret <16 x i8> %2 538} 539 540define arm_aapcs_vfpcc <8 x i16> @test_vshlq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 541; CHECK-LABEL: test_vshlq_m_u16: 542; CHECK: @ %bb.0: @ %entry 543; CHECK-NEXT: vmsr p0, r0 544; CHECK-NEXT: vpst 545; CHECK-NEXT: vshlt.u16 q0, q1, q2 546; CHECK-NEXT: bx lr 547entry: 548 %0 = zext i16 %p to i32 549 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 550 %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 0, i32 0, i32 1, <8 x i1> %1, <8 x i16> %inactive) 551 ret <8 x i16> %2 552} 553 554define arm_aapcs_vfpcc <4 x i32> @test_vshlq_m_u32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 555; CHECK-LABEL: test_vshlq_m_u32: 556; CHECK: @ %bb.0: @ %entry 557; CHECK-NEXT: vmsr p0, r0 558; CHECK-NEXT: vpst 559; CHECK-NEXT: vshlt.u32 q0, q1, q2 560; CHECK-NEXT: bx lr 561entry: 562 %0 = zext i16 %p to i32 563 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 564 %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, i32 0, i32 1, <4 x i1> %1, <4 x i32> %inactive) 565 ret <4 x i32> %2 566} 567 568define arm_aapcs_vfpcc <16 x i8> @test_vshlq_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 569; CHECK-LABEL: test_vshlq_x_s8: 570; CHECK: @ %bb.0: @ %entry 571; CHECK-NEXT: vmsr p0, r0 572; CHECK-NEXT: vpst 573; CHECK-NEXT: vshlt.s8 q0, q0, q1 574; CHECK-NEXT: bx lr 575entry: 576 %0 = zext i16 %p to i32 577 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 578 %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, i32 0, i32 0, <16 x i1> %1, <16 x i8> undef) 579 ret <16 x i8> %2 580} 581 582define arm_aapcs_vfpcc <8 x i16> @test_vshlq_x_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 583; CHECK-LABEL: test_vshlq_x_s16: 584; CHECK: @ %bb.0: @ %entry 585; CHECK-NEXT: vmsr p0, r0 586; CHECK-NEXT: vpst 587; CHECK-NEXT: vshlt.s16 q0, q0, q1 588; CHECK-NEXT: bx lr 589entry: 590 %0 = zext i16 %p to i32 591 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 592 %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 0, i32 0, i32 0, <8 x i1> %1, <8 x i16> undef) 593 ret <8 x i16> %2 594} 595 596define arm_aapcs_vfpcc <4 x i32> @test_vshlq_x_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 597; CHECK-LABEL: test_vshlq_x_s32: 598; CHECK: @ %bb.0: @ %entry 599; CHECK-NEXT: vmsr p0, r0 600; CHECK-NEXT: vpst 601; CHECK-NEXT: vshlt.s32 q0, q0, q1 602; CHECK-NEXT: bx lr 603entry: 604 %0 = zext i16 %p to i32 605 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 606 %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, i32 0, i32 0, <4 x i1> %1, <4 x i32> undef) 607 ret <4 x i32> %2 608} 609 610define arm_aapcs_vfpcc <16 x i8> @test_vshlq_x_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 611; CHECK-LABEL: test_vshlq_x_u8: 612; CHECK: @ %bb.0: @ %entry 613; CHECK-NEXT: vmsr p0, r0 614; CHECK-NEXT: vpst 615; CHECK-NEXT: vshlt.u8 q0, q0, q1 616; CHECK-NEXT: bx lr 617entry: 618 %0 = zext i16 %p to i32 619 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 620 %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, i32 0, i32 1, <16 x i1> %1, <16 x i8> undef) 621 ret <16 x i8> %2 622} 623 624define arm_aapcs_vfpcc <8 x i16> @test_vshlq_x_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 625; CHECK-LABEL: test_vshlq_x_u16: 626; CHECK: @ %bb.0: @ %entry 627; CHECK-NEXT: vmsr p0, r0 628; CHECK-NEXT: vpst 629; CHECK-NEXT: vshlt.u16 q0, q0, q1 630; CHECK-NEXT: bx lr 631entry: 632 %0 = zext i16 %p to i32 633 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 634 %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 0, i32 0, i32 1, <8 x i1> %1, <8 x i16> undef) 635 ret <8 x i16> %2 636} 637 638define arm_aapcs_vfpcc <4 x i32> @test_vshlq_x_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 639; CHECK-LABEL: test_vshlq_x_u32: 640; CHECK: @ %bb.0: @ %entry 641; CHECK-NEXT: vmsr p0, r0 642; CHECK-NEXT: vpst 643; CHECK-NEXT: vshlt.u32 q0, q0, q1 644; CHECK-NEXT: bx lr 645entry: 646 %0 = zext i16 %p to i32 647 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 648 %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, i32 0, i32 1, <4 x i1> %1, <4 x i32> undef) 649 ret <4 x i32> %2 650} 651 652define arm_aapcs_vfpcc <16 x i8> @test_vshlq_m_r_s8(<16 x i8> %a, i32 %b, i16 zeroext %p) { 653; CHECK-LABEL: test_vshlq_m_r_s8: 654; CHECK: @ %bb.0: @ %entry 655; CHECK-NEXT: vmsr p0, r1 656; CHECK-NEXT: vpst 657; CHECK-NEXT: vshlt.s8 q0, r0 658; CHECK-NEXT: bx lr 659entry: 660 %0 = zext i16 %p to i32 661 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 662 %2 = call <16 x i8> @llvm.arm.mve.vshl.scalar.predicated.v16i8.v16i1(<16 x i8> %a, i32 %b, i32 0, i32 0, i32 0, <16 x i1> %1) 663 ret <16 x i8> %2 664} 665 666define arm_aapcs_vfpcc <8 x i16> @test_vshlq_m_r_s16(<8 x i16> %a, i32 %b, i16 zeroext %p) { 667; CHECK-LABEL: test_vshlq_m_r_s16: 668; CHECK: @ %bb.0: @ %entry 669; CHECK-NEXT: vmsr p0, r1 670; CHECK-NEXT: vpst 671; CHECK-NEXT: vshlt.s16 q0, r0 672; CHECK-NEXT: bx lr 673entry: 674 %0 = zext i16 %p to i32 675 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 676 %2 = call <8 x i16> @llvm.arm.mve.vshl.scalar.predicated.v8i16.v8i1(<8 x i16> %a, i32 %b, i32 0, i32 0, i32 0, <8 x i1> %1) 677 ret <8 x i16> %2 678} 679 680define arm_aapcs_vfpcc <4 x i32> @test_vshlq_m_r_s32(<4 x i32> %a, i32 %b, i16 zeroext %p) { 681; CHECK-LABEL: test_vshlq_m_r_s32: 682; CHECK: @ %bb.0: @ %entry 683; CHECK-NEXT: vmsr p0, r1 684; CHECK-NEXT: vpst 685; CHECK-NEXT: vshlt.s32 q0, r0 686; CHECK-NEXT: bx lr 687entry: 688 %0 = zext i16 %p to i32 689 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 690 %2 = call <4 x i32> @llvm.arm.mve.vshl.scalar.predicated.v4i32.v4i1(<4 x i32> %a, i32 %b, i32 0, i32 0, i32 0, <4 x i1> %1) 691 ret <4 x i32> %2 692} 693 694define arm_aapcs_vfpcc <16 x i8> @test_vshlq_m_r_u8(<16 x i8> %a, i32 %b, i16 zeroext %p) { 695; CHECK-LABEL: test_vshlq_m_r_u8: 696; CHECK: @ %bb.0: @ %entry 697; CHECK-NEXT: vmsr p0, r1 698; CHECK-NEXT: vpst 699; CHECK-NEXT: vshlt.u8 q0, r0 700; CHECK-NEXT: bx lr 701entry: 702 %0 = zext i16 %p to i32 703 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 704 %2 = call <16 x i8> @llvm.arm.mve.vshl.scalar.predicated.v16i8.v16i1(<16 x i8> %a, i32 %b, i32 0, i32 0, i32 1, <16 x i1> %1) 705 ret <16 x i8> %2 706} 707 708define arm_aapcs_vfpcc <8 x i16> @test_vshlq_m_r_u16(<8 x i16> %a, i32 %b, i16 zeroext %p) { 709; CHECK-LABEL: test_vshlq_m_r_u16: 710; CHECK: @ %bb.0: @ %entry 711; CHECK-NEXT: vmsr p0, r1 712; CHECK-NEXT: vpst 713; CHECK-NEXT: vshlt.u16 q0, r0 714; CHECK-NEXT: bx lr 715entry: 716 %0 = zext i16 %p to i32 717 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 718 %2 = call <8 x i16> @llvm.arm.mve.vshl.scalar.predicated.v8i16.v8i1(<8 x i16> %a, i32 %b, i32 0, i32 0, i32 1, <8 x i1> %1) 719 ret <8 x i16> %2 720} 721 722define arm_aapcs_vfpcc <4 x i32> @test_vshlq_m_r_u32(<4 x i32> %a, i32 %b, i16 zeroext %p) { 723; CHECK-LABEL: test_vshlq_m_r_u32: 724; CHECK: @ %bb.0: @ %entry 725; CHECK-NEXT: vmsr p0, r1 726; CHECK-NEXT: vpst 727; CHECK-NEXT: vshlt.u32 q0, r0 728; CHECK-NEXT: bx lr 729entry: 730 %0 = zext i16 %p to i32 731 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 732 %2 = call <4 x i32> @llvm.arm.mve.vshl.scalar.predicated.v4i32.v4i1(<4 x i32> %a, i32 %b, i32 0, i32 0, i32 1, <4 x i1> %1) 733 ret <4 x i32> %2 734} 735 736define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 737; CHECK-LABEL: test_vqshlq_m_s8: 738; CHECK: @ %bb.0: @ %entry 739; CHECK-NEXT: vmsr p0, r0 740; CHECK-NEXT: vpst 741; CHECK-NEXT: vqshlt.s8 q0, q1, q2 742; CHECK-NEXT: bx lr 743entry: 744 %0 = zext i16 %p to i32 745 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 746 %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 1, i32 0, i32 0, <16 x i1> %1, <16 x i8> %inactive) 747 ret <16 x i8> %2 748} 749 750define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 751; CHECK-LABEL: test_vqshlq_m_s16: 752; CHECK: @ %bb.0: @ %entry 753; CHECK-NEXT: vmsr p0, r0 754; CHECK-NEXT: vpst 755; CHECK-NEXT: vqshlt.s16 q0, q1, q2 756; CHECK-NEXT: bx lr 757entry: 758 %0 = zext i16 %p to i32 759 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 760 %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 1, i32 0, i32 0, <8 x i1> %1, <8 x i16> %inactive) 761 ret <8 x i16> %2 762} 763 764define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 765; CHECK-LABEL: test_vqshlq_m_s32: 766; CHECK: @ %bb.0: @ %entry 767; CHECK-NEXT: vmsr p0, r0 768; CHECK-NEXT: vpst 769; CHECK-NEXT: vqshlt.s32 q0, q1, q2 770; CHECK-NEXT: bx lr 771entry: 772 %0 = zext i16 %p to i32 773 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 774 %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 1, i32 0, i32 0, <4 x i1> %1, <4 x i32> %inactive) 775 ret <4 x i32> %2 776} 777 778define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_m_u8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 779; CHECK-LABEL: test_vqshlq_m_u8: 780; CHECK: @ %bb.0: @ %entry 781; CHECK-NEXT: vmsr p0, r0 782; CHECK-NEXT: vpst 783; CHECK-NEXT: vqshlt.u8 q0, q1, q2 784; CHECK-NEXT: bx lr 785entry: 786 %0 = zext i16 %p to i32 787 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 788 %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 1, i32 0, i32 1, <16 x i1> %1, <16 x i8> %inactive) 789 ret <16 x i8> %2 790} 791 792define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 793; CHECK-LABEL: test_vqshlq_m_u16: 794; CHECK: @ %bb.0: @ %entry 795; CHECK-NEXT: vmsr p0, r0 796; CHECK-NEXT: vpst 797; CHECK-NEXT: vqshlt.u16 q0, q1, q2 798; CHECK-NEXT: bx lr 799entry: 800 %0 = zext i16 %p to i32 801 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 802 %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 1, i32 0, i32 1, <8 x i1> %1, <8 x i16> %inactive) 803 ret <8 x i16> %2 804} 805 806define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_m_u32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 807; CHECK-LABEL: test_vqshlq_m_u32: 808; CHECK: @ %bb.0: @ %entry 809; CHECK-NEXT: vmsr p0, r0 810; CHECK-NEXT: vpst 811; CHECK-NEXT: vqshlt.u32 q0, q1, q2 812; CHECK-NEXT: bx lr 813entry: 814 %0 = zext i16 %p to i32 815 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 816 %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 1, i32 0, i32 1, <4 x i1> %1, <4 x i32> %inactive) 817 ret <4 x i32> %2 818} 819 820define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_m_r_s8(<16 x i8> %a, i32 %b, i16 zeroext %p) { 821; CHECK-LABEL: test_vqshlq_m_r_s8: 822; CHECK: @ %bb.0: @ %entry 823; CHECK-NEXT: vmsr p0, r1 824; CHECK-NEXT: vpst 825; CHECK-NEXT: vqshlt.s8 q0, r0 826; CHECK-NEXT: bx lr 827entry: 828 %0 = zext i16 %p to i32 829 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 830 %2 = call <16 x i8> @llvm.arm.mve.vshl.scalar.predicated.v16i8.v16i1(<16 x i8> %a, i32 %b, i32 1, i32 0, i32 0, <16 x i1> %1) 831 ret <16 x i8> %2 832} 833 834define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_m_r_s16(<8 x i16> %a, i32 %b, i16 zeroext %p) { 835; CHECK-LABEL: test_vqshlq_m_r_s16: 836; CHECK: @ %bb.0: @ %entry 837; CHECK-NEXT: vmsr p0, r1 838; CHECK-NEXT: vpst 839; CHECK-NEXT: vqshlt.s16 q0, r0 840; CHECK-NEXT: bx lr 841entry: 842 %0 = zext i16 %p to i32 843 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 844 %2 = call <8 x i16> @llvm.arm.mve.vshl.scalar.predicated.v8i16.v8i1(<8 x i16> %a, i32 %b, i32 1, i32 0, i32 0, <8 x i1> %1) 845 ret <8 x i16> %2 846} 847 848define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_m_r_s32(<4 x i32> %a, i32 %b, i16 zeroext %p) { 849; CHECK-LABEL: test_vqshlq_m_r_s32: 850; CHECK: @ %bb.0: @ %entry 851; CHECK-NEXT: vmsr p0, r1 852; CHECK-NEXT: vpst 853; CHECK-NEXT: vqshlt.s32 q0, r0 854; CHECK-NEXT: bx lr 855entry: 856 %0 = zext i16 %p to i32 857 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 858 %2 = call <4 x i32> @llvm.arm.mve.vshl.scalar.predicated.v4i32.v4i1(<4 x i32> %a, i32 %b, i32 1, i32 0, i32 0, <4 x i1> %1) 859 ret <4 x i32> %2 860} 861 862define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_m_r_u8(<16 x i8> %a, i32 %b, i16 zeroext %p) { 863; CHECK-LABEL: test_vqshlq_m_r_u8: 864; CHECK: @ %bb.0: @ %entry 865; CHECK-NEXT: vmsr p0, r1 866; CHECK-NEXT: vpst 867; CHECK-NEXT: vqshlt.u8 q0, r0 868; CHECK-NEXT: bx lr 869entry: 870 %0 = zext i16 %p to i32 871 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 872 %2 = call <16 x i8> @llvm.arm.mve.vshl.scalar.predicated.v16i8.v16i1(<16 x i8> %a, i32 %b, i32 1, i32 0, i32 1, <16 x i1> %1) 873 ret <16 x i8> %2 874} 875 876define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_m_r_u16(<8 x i16> %a, i32 %b, i16 zeroext %p) { 877; CHECK-LABEL: test_vqshlq_m_r_u16: 878; CHECK: @ %bb.0: @ %entry 879; CHECK-NEXT: vmsr p0, r1 880; CHECK-NEXT: vpst 881; CHECK-NEXT: vqshlt.u16 q0, r0 882; CHECK-NEXT: bx lr 883entry: 884 %0 = zext i16 %p to i32 885 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 886 %2 = call <8 x i16> @llvm.arm.mve.vshl.scalar.predicated.v8i16.v8i1(<8 x i16> %a, i32 %b, i32 1, i32 0, i32 1, <8 x i1> %1) 887 ret <8 x i16> %2 888} 889 890define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_m_r_u32(<4 x i32> %a, i32 %b, i16 zeroext %p) { 891; CHECK-LABEL: test_vqshlq_m_r_u32: 892; CHECK: @ %bb.0: @ %entry 893; CHECK-NEXT: vmsr p0, r1 894; CHECK-NEXT: vpst 895; CHECK-NEXT: vqshlt.u32 q0, r0 896; CHECK-NEXT: bx lr 897entry: 898 %0 = zext i16 %p to i32 899 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 900 %2 = call <4 x i32> @llvm.arm.mve.vshl.scalar.predicated.v4i32.v4i1(<4 x i32> %a, i32 %b, i32 1, i32 0, i32 1, <4 x i1> %1) 901 ret <4 x i32> %2 902} 903 904define arm_aapcs_vfpcc <16 x i8> @test_vrshlq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 905; CHECK-LABEL: test_vrshlq_m_s8: 906; CHECK: @ %bb.0: @ %entry 907; CHECK-NEXT: vmsr p0, r0 908; CHECK-NEXT: vpst 909; CHECK-NEXT: vrshlt.s8 q0, q1, q2 910; CHECK-NEXT: bx lr 911entry: 912 %0 = zext i16 %p to i32 913 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 914 %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, i32 1, i32 0, <16 x i1> %1, <16 x i8> %inactive) 915 ret <16 x i8> %2 916} 917 918define arm_aapcs_vfpcc <8 x i16> @test_vrshlq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 919; CHECK-LABEL: test_vrshlq_m_s16: 920; CHECK: @ %bb.0: @ %entry 921; CHECK-NEXT: vmsr p0, r0 922; CHECK-NEXT: vpst 923; CHECK-NEXT: vrshlt.s16 q0, q1, q2 924; CHECK-NEXT: bx lr 925entry: 926 %0 = zext i16 %p to i32 927 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 928 %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 0, i32 1, i32 0, <8 x i1> %1, <8 x i16> %inactive) 929 ret <8 x i16> %2 930} 931 932define arm_aapcs_vfpcc <4 x i32> @test_vrshlq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 933; CHECK-LABEL: test_vrshlq_m_s32: 934; CHECK: @ %bb.0: @ %entry 935; CHECK-NEXT: vmsr p0, r0 936; CHECK-NEXT: vpst 937; CHECK-NEXT: vrshlt.s32 q0, q1, q2 938; CHECK-NEXT: bx lr 939entry: 940 %0 = zext i16 %p to i32 941 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 942 %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, i32 1, i32 0, <4 x i1> %1, <4 x i32> %inactive) 943 ret <4 x i32> %2 944} 945 946define arm_aapcs_vfpcc <16 x i8> @test_vrshlq_m_u8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 947; CHECK-LABEL: test_vrshlq_m_u8: 948; CHECK: @ %bb.0: @ %entry 949; CHECK-NEXT: vmsr p0, r0 950; CHECK-NEXT: vpst 951; CHECK-NEXT: vrshlt.u8 q0, q1, q2 952; CHECK-NEXT: bx lr 953entry: 954 %0 = zext i16 %p to i32 955 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 956 %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, i32 1, i32 1, <16 x i1> %1, <16 x i8> %inactive) 957 ret <16 x i8> %2 958} 959 960define arm_aapcs_vfpcc <8 x i16> @test_vrshlq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 961; CHECK-LABEL: test_vrshlq_m_u16: 962; CHECK: @ %bb.0: @ %entry 963; CHECK-NEXT: vmsr p0, r0 964; CHECK-NEXT: vpst 965; CHECK-NEXT: vrshlt.u16 q0, q1, q2 966; CHECK-NEXT: bx lr 967entry: 968 %0 = zext i16 %p to i32 969 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 970 %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 0, i32 1, i32 1, <8 x i1> %1, <8 x i16> %inactive) 971 ret <8 x i16> %2 972} 973 974define arm_aapcs_vfpcc <4 x i32> @test_vrshlq_m_u32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 975; CHECK-LABEL: test_vrshlq_m_u32: 976; CHECK: @ %bb.0: @ %entry 977; CHECK-NEXT: vmsr p0, r0 978; CHECK-NEXT: vpst 979; CHECK-NEXT: vrshlt.u32 q0, q1, q2 980; CHECK-NEXT: bx lr 981entry: 982 %0 = zext i16 %p to i32 983 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 984 %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, i32 1, i32 1, <4 x i1> %1, <4 x i32> %inactive) 985 ret <4 x i32> %2 986} 987 988define arm_aapcs_vfpcc <16 x i8> @test_vrshlq_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 989; CHECK-LABEL: test_vrshlq_x_s8: 990; CHECK: @ %bb.0: @ %entry 991; CHECK-NEXT: vmsr p0, r0 992; CHECK-NEXT: vpst 993; CHECK-NEXT: vrshlt.s8 q0, q0, q1 994; CHECK-NEXT: bx lr 995entry: 996 %0 = zext i16 %p to i32 997 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 998 %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, i32 1, i32 0, <16 x i1> %1, <16 x i8> undef) 999 ret <16 x i8> %2 1000} 1001 1002define arm_aapcs_vfpcc <8 x i16> @test_vrshlq_x_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 1003; CHECK-LABEL: test_vrshlq_x_s16: 1004; CHECK: @ %bb.0: @ %entry 1005; CHECK-NEXT: vmsr p0, r0 1006; CHECK-NEXT: vpst 1007; CHECK-NEXT: vrshlt.s16 q0, q0, q1 1008; CHECK-NEXT: bx lr 1009entry: 1010 %0 = zext i16 %p to i32 1011 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1012 %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 0, i32 1, i32 0, <8 x i1> %1, <8 x i16> undef) 1013 ret <8 x i16> %2 1014} 1015 1016define arm_aapcs_vfpcc <4 x i32> @test_vrshlq_x_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 1017; CHECK-LABEL: test_vrshlq_x_s32: 1018; CHECK: @ %bb.0: @ %entry 1019; CHECK-NEXT: vmsr p0, r0 1020; CHECK-NEXT: vpst 1021; CHECK-NEXT: vrshlt.s32 q0, q0, q1 1022; CHECK-NEXT: bx lr 1023entry: 1024 %0 = zext i16 %p to i32 1025 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1026 %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, i32 1, i32 0, <4 x i1> %1, <4 x i32> undef) 1027 ret <4 x i32> %2 1028} 1029 1030define arm_aapcs_vfpcc <16 x i8> @test_vrshlq_x_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 1031; CHECK-LABEL: test_vrshlq_x_u8: 1032; CHECK: @ %bb.0: @ %entry 1033; CHECK-NEXT: vmsr p0, r0 1034; CHECK-NEXT: vpst 1035; CHECK-NEXT: vrshlt.u8 q0, q0, q1 1036; CHECK-NEXT: bx lr 1037entry: 1038 %0 = zext i16 %p to i32 1039 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 1040 %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, i32 1, i32 1, <16 x i1> %1, <16 x i8> undef) 1041 ret <16 x i8> %2 1042} 1043 1044define arm_aapcs_vfpcc <8 x i16> @test_vrshlq_x_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 1045; CHECK-LABEL: test_vrshlq_x_u16: 1046; CHECK: @ %bb.0: @ %entry 1047; CHECK-NEXT: vmsr p0, r0 1048; CHECK-NEXT: vpst 1049; CHECK-NEXT: vrshlt.u16 q0, q0, q1 1050; CHECK-NEXT: bx lr 1051entry: 1052 %0 = zext i16 %p to i32 1053 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1054 %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 0, i32 1, i32 1, <8 x i1> %1, <8 x i16> undef) 1055 ret <8 x i16> %2 1056} 1057 1058define arm_aapcs_vfpcc <4 x i32> @test_vrshlq_x_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 1059; CHECK-LABEL: test_vrshlq_x_u32: 1060; CHECK: @ %bb.0: @ %entry 1061; CHECK-NEXT: vmsr p0, r0 1062; CHECK-NEXT: vpst 1063; CHECK-NEXT: vrshlt.u32 q0, q0, q1 1064; CHECK-NEXT: bx lr 1065entry: 1066 %0 = zext i16 %p to i32 1067 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1068 %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, i32 1, i32 1, <4 x i1> %1, <4 x i32> undef) 1069 ret <4 x i32> %2 1070} 1071 1072define arm_aapcs_vfpcc <16 x i8> @test_vrshlq_m_n_s8(<16 x i8> %a, i32 %b, i16 zeroext %p) { 1073; CHECK-LABEL: test_vrshlq_m_n_s8: 1074; CHECK: @ %bb.0: @ %entry 1075; CHECK-NEXT: vmsr p0, r1 1076; CHECK-NEXT: vpst 1077; CHECK-NEXT: vrshlt.s8 q0, r0 1078; CHECK-NEXT: bx lr 1079entry: 1080 %0 = zext i16 %p to i32 1081 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 1082 %2 = call <16 x i8> @llvm.arm.mve.vshl.scalar.predicated.v16i8.v16i1(<16 x i8> %a, i32 %b, i32 0, i32 1, i32 0, <16 x i1> %1) 1083 ret <16 x i8> %2 1084} 1085 1086define arm_aapcs_vfpcc <8 x i16> @test_vrshlq_m_n_s16(<8 x i16> %a, i32 %b, i16 zeroext %p) { 1087; CHECK-LABEL: test_vrshlq_m_n_s16: 1088; CHECK: @ %bb.0: @ %entry 1089; CHECK-NEXT: vmsr p0, r1 1090; CHECK-NEXT: vpst 1091; CHECK-NEXT: vrshlt.s16 q0, r0 1092; CHECK-NEXT: bx lr 1093entry: 1094 %0 = zext i16 %p to i32 1095 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1096 %2 = call <8 x i16> @llvm.arm.mve.vshl.scalar.predicated.v8i16.v8i1(<8 x i16> %a, i32 %b, i32 0, i32 1, i32 0, <8 x i1> %1) 1097 ret <8 x i16> %2 1098} 1099 1100define arm_aapcs_vfpcc <4 x i32> @test_vrshlq_m_n_s32(<4 x i32> %a, i32 %b, i16 zeroext %p) { 1101; CHECK-LABEL: test_vrshlq_m_n_s32: 1102; CHECK: @ %bb.0: @ %entry 1103; CHECK-NEXT: vmsr p0, r1 1104; CHECK-NEXT: vpst 1105; CHECK-NEXT: vrshlt.s32 q0, r0 1106; CHECK-NEXT: bx lr 1107entry: 1108 %0 = zext i16 %p to i32 1109 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1110 %2 = call <4 x i32> @llvm.arm.mve.vshl.scalar.predicated.v4i32.v4i1(<4 x i32> %a, i32 %b, i32 0, i32 1, i32 0, <4 x i1> %1) 1111 ret <4 x i32> %2 1112} 1113 1114define arm_aapcs_vfpcc <16 x i8> @test_vrshlq_m_n_u8(<16 x i8> %a, i32 %b, i16 zeroext %p) { 1115; CHECK-LABEL: test_vrshlq_m_n_u8: 1116; CHECK: @ %bb.0: @ %entry 1117; CHECK-NEXT: vmsr p0, r1 1118; CHECK-NEXT: vpst 1119; CHECK-NEXT: vrshlt.u8 q0, r0 1120; CHECK-NEXT: bx lr 1121entry: 1122 %0 = zext i16 %p to i32 1123 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 1124 %2 = call <16 x i8> @llvm.arm.mve.vshl.scalar.predicated.v16i8.v16i1(<16 x i8> %a, i32 %b, i32 0, i32 1, i32 1, <16 x i1> %1) 1125 ret <16 x i8> %2 1126} 1127 1128define arm_aapcs_vfpcc <8 x i16> @test_vrshlq_m_n_u16(<8 x i16> %a, i32 %b, i16 zeroext %p) { 1129; CHECK-LABEL: test_vrshlq_m_n_u16: 1130; CHECK: @ %bb.0: @ %entry 1131; CHECK-NEXT: vmsr p0, r1 1132; CHECK-NEXT: vpst 1133; CHECK-NEXT: vrshlt.u16 q0, r0 1134; CHECK-NEXT: bx lr 1135entry: 1136 %0 = zext i16 %p to i32 1137 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1138 %2 = call <8 x i16> @llvm.arm.mve.vshl.scalar.predicated.v8i16.v8i1(<8 x i16> %a, i32 %b, i32 0, i32 1, i32 1, <8 x i1> %1) 1139 ret <8 x i16> %2 1140} 1141 1142define arm_aapcs_vfpcc <4 x i32> @test_vrshlq_m_n_u32(<4 x i32> %a, i32 %b, i16 zeroext %p) { 1143; CHECK-LABEL: test_vrshlq_m_n_u32: 1144; CHECK: @ %bb.0: @ %entry 1145; CHECK-NEXT: vmsr p0, r1 1146; CHECK-NEXT: vpst 1147; CHECK-NEXT: vrshlt.u32 q0, r0 1148; CHECK-NEXT: bx lr 1149entry: 1150 %0 = zext i16 %p to i32 1151 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1152 %2 = call <4 x i32> @llvm.arm.mve.vshl.scalar.predicated.v4i32.v4i1(<4 x i32> %a, i32 %b, i32 0, i32 1, i32 1, <4 x i1> %1) 1153 ret <4 x i32> %2 1154} 1155 1156define arm_aapcs_vfpcc <16 x i8> @test_vqrshlq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 1157; CHECK-LABEL: test_vqrshlq_m_s8: 1158; CHECK: @ %bb.0: @ %entry 1159; CHECK-NEXT: vmsr p0, r0 1160; CHECK-NEXT: vpst 1161; CHECK-NEXT: vqrshlt.s8 q0, q1, q2 1162; CHECK-NEXT: bx lr 1163entry: 1164 %0 = zext i16 %p to i32 1165 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 1166 %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 1, i32 1, i32 0, <16 x i1> %1, <16 x i8> %inactive) 1167 ret <16 x i8> %2 1168} 1169 1170define arm_aapcs_vfpcc <8 x i16> @test_vqrshlq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 1171; CHECK-LABEL: test_vqrshlq_m_s16: 1172; CHECK: @ %bb.0: @ %entry 1173; CHECK-NEXT: vmsr p0, r0 1174; CHECK-NEXT: vpst 1175; CHECK-NEXT: vqrshlt.s16 q0, q1, q2 1176; CHECK-NEXT: bx lr 1177entry: 1178 %0 = zext i16 %p to i32 1179 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1180 %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 1, i32 1, i32 0, <8 x i1> %1, <8 x i16> %inactive) 1181 ret <8 x i16> %2 1182} 1183 1184define arm_aapcs_vfpcc <4 x i32> @test_vqrshlq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 1185; CHECK-LABEL: test_vqrshlq_m_s32: 1186; CHECK: @ %bb.0: @ %entry 1187; CHECK-NEXT: vmsr p0, r0 1188; CHECK-NEXT: vpst 1189; CHECK-NEXT: vqrshlt.s32 q0, q1, q2 1190; CHECK-NEXT: bx lr 1191entry: 1192 %0 = zext i16 %p to i32 1193 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1194 %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 1, i32 1, i32 0, <4 x i1> %1, <4 x i32> %inactive) 1195 ret <4 x i32> %2 1196} 1197 1198define arm_aapcs_vfpcc <16 x i8> @test_vqrshlq_m_u8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 1199; CHECK-LABEL: test_vqrshlq_m_u8: 1200; CHECK: @ %bb.0: @ %entry 1201; CHECK-NEXT: vmsr p0, r0 1202; CHECK-NEXT: vpst 1203; CHECK-NEXT: vqrshlt.u8 q0, q1, q2 1204; CHECK-NEXT: bx lr 1205entry: 1206 %0 = zext i16 %p to i32 1207 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 1208 %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 1, i32 1, i32 1, <16 x i1> %1, <16 x i8> %inactive) 1209 ret <16 x i8> %2 1210} 1211 1212define arm_aapcs_vfpcc <8 x i16> @test_vqrshlq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 1213; CHECK-LABEL: test_vqrshlq_m_u16: 1214; CHECK: @ %bb.0: @ %entry 1215; CHECK-NEXT: vmsr p0, r0 1216; CHECK-NEXT: vpst 1217; CHECK-NEXT: vqrshlt.u16 q0, q1, q2 1218; CHECK-NEXT: bx lr 1219entry: 1220 %0 = zext i16 %p to i32 1221 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1222 %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 1, i32 1, i32 1, <8 x i1> %1, <8 x i16> %inactive) 1223 ret <8 x i16> %2 1224} 1225 1226define arm_aapcs_vfpcc <4 x i32> @test_vqrshlq_m_u32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 1227; CHECK-LABEL: test_vqrshlq_m_u32: 1228; CHECK: @ %bb.0: @ %entry 1229; CHECK-NEXT: vmsr p0, r0 1230; CHECK-NEXT: vpst 1231; CHECK-NEXT: vqrshlt.u32 q0, q1, q2 1232; CHECK-NEXT: bx lr 1233entry: 1234 %0 = zext i16 %p to i32 1235 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1236 %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 1, i32 1, i32 1, <4 x i1> %1, <4 x i32> %inactive) 1237 ret <4 x i32> %2 1238} 1239 1240define arm_aapcs_vfpcc <16 x i8> @test_vqrshlq_m_n_s8(<16 x i8> %a, i32 %b, i16 zeroext %p) { 1241; CHECK-LABEL: test_vqrshlq_m_n_s8: 1242; CHECK: @ %bb.0: @ %entry 1243; CHECK-NEXT: vmsr p0, r1 1244; CHECK-NEXT: vpst 1245; CHECK-NEXT: vqrshlt.s8 q0, r0 1246; CHECK-NEXT: bx lr 1247entry: 1248 %0 = zext i16 %p to i32 1249 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 1250 %2 = call <16 x i8> @llvm.arm.mve.vshl.scalar.predicated.v16i8.v16i1(<16 x i8> %a, i32 %b, i32 1, i32 1, i32 0, <16 x i1> %1) 1251 ret <16 x i8> %2 1252} 1253 1254define arm_aapcs_vfpcc <8 x i16> @test_vqrshlq_m_n_s16(<8 x i16> %a, i32 %b, i16 zeroext %p) { 1255; CHECK-LABEL: test_vqrshlq_m_n_s16: 1256; CHECK: @ %bb.0: @ %entry 1257; CHECK-NEXT: vmsr p0, r1 1258; CHECK-NEXT: vpst 1259; CHECK-NEXT: vqrshlt.s16 q0, r0 1260; CHECK-NEXT: bx lr 1261entry: 1262 %0 = zext i16 %p to i32 1263 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1264 %2 = call <8 x i16> @llvm.arm.mve.vshl.scalar.predicated.v8i16.v8i1(<8 x i16> %a, i32 %b, i32 1, i32 1, i32 0, <8 x i1> %1) 1265 ret <8 x i16> %2 1266} 1267 1268define arm_aapcs_vfpcc <4 x i32> @test_vqrshlq_m_n_s32(<4 x i32> %a, i32 %b, i16 zeroext %p) { 1269; CHECK-LABEL: test_vqrshlq_m_n_s32: 1270; CHECK: @ %bb.0: @ %entry 1271; CHECK-NEXT: vmsr p0, r1 1272; CHECK-NEXT: vpst 1273; CHECK-NEXT: vqrshlt.s32 q0, r0 1274; CHECK-NEXT: bx lr 1275entry: 1276 %0 = zext i16 %p to i32 1277 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1278 %2 = call <4 x i32> @llvm.arm.mve.vshl.scalar.predicated.v4i32.v4i1(<4 x i32> %a, i32 %b, i32 1, i32 1, i32 0, <4 x i1> %1) 1279 ret <4 x i32> %2 1280} 1281 1282define arm_aapcs_vfpcc <16 x i8> @test_vqrshlq_m_n_u8(<16 x i8> %a, i32 %b, i16 zeroext %p) { 1283; CHECK-LABEL: test_vqrshlq_m_n_u8: 1284; CHECK: @ %bb.0: @ %entry 1285; CHECK-NEXT: vmsr p0, r1 1286; CHECK-NEXT: vpst 1287; CHECK-NEXT: vqrshlt.u8 q0, r0 1288; CHECK-NEXT: bx lr 1289entry: 1290 %0 = zext i16 %p to i32 1291 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 1292 %2 = call <16 x i8> @llvm.arm.mve.vshl.scalar.predicated.v16i8.v16i1(<16 x i8> %a, i32 %b, i32 1, i32 1, i32 1, <16 x i1> %1) 1293 ret <16 x i8> %2 1294} 1295 1296define arm_aapcs_vfpcc <8 x i16> @test_vqrshlq_m_n_u16(<8 x i16> %a, i32 %b, i16 zeroext %p) { 1297; CHECK-LABEL: test_vqrshlq_m_n_u16: 1298; CHECK: @ %bb.0: @ %entry 1299; CHECK-NEXT: vmsr p0, r1 1300; CHECK-NEXT: vpst 1301; CHECK-NEXT: vqrshlt.u16 q0, r0 1302; CHECK-NEXT: bx lr 1303entry: 1304 %0 = zext i16 %p to i32 1305 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1306 %2 = call <8 x i16> @llvm.arm.mve.vshl.scalar.predicated.v8i16.v8i1(<8 x i16> %a, i32 %b, i32 1, i32 1, i32 1, <8 x i1> %1) 1307 ret <8 x i16> %2 1308} 1309 1310define arm_aapcs_vfpcc <4 x i32> @test_vqrshlq_m_n_u32(<4 x i32> %a, i32 %b, i16 zeroext %p) { 1311; CHECK-LABEL: test_vqrshlq_m_n_u32: 1312; CHECK: @ %bb.0: @ %entry 1313; CHECK-NEXT: vmsr p0, r1 1314; CHECK-NEXT: vpst 1315; CHECK-NEXT: vqrshlt.u32 q0, r0 1316; CHECK-NEXT: bx lr 1317entry: 1318 %0 = zext i16 %p to i32 1319 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1320 %2 = call <4 x i32> @llvm.arm.mve.vshl.scalar.predicated.v4i32.v4i1(<4 x i32> %a, i32 %b, i32 1, i32 1, i32 1, <4 x i1> %1) 1321 ret <4 x i32> %2 1322} 1323 1324declare <16 x i8> @llvm.arm.mve.vshl.vector.v16i8.v16i8(<16 x i8>, <16 x i8>, i32, i32, i32) 1325declare <8 x i16> @llvm.arm.mve.vshl.vector.v8i16.v8i16(<8 x i16>, <8 x i16>, i32, i32, i32) 1326declare <4 x i32> @llvm.arm.mve.vshl.vector.v4i32.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32) 1327declare <16 x i8> @llvm.arm.mve.vshl.scalar.v16i8(<16 x i8>, i32, i32, i32, i32) 1328declare <8 x i16> @llvm.arm.mve.vshl.scalar.v8i16(<8 x i16>, i32, i32, i32, i32) 1329declare <4 x i32> @llvm.arm.mve.vshl.scalar.v4i32(<4 x i32>, i32, i32, i32, i32) 1330declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) 1331declare <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, i32, i32, <16 x i1>, <16 x i8>) 1332declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) 1333declare <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, i32, i32, <8 x i1>, <8 x i16>) 1334declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 1335declare <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, i32, i32, <4 x i1>, <4 x i32>) 1336declare <16 x i8> @llvm.arm.mve.vshl.scalar.predicated.v16i8.v16i1(<16 x i8>, i32, i32, i32, i32, <16 x i1>) 1337declare <8 x i16> @llvm.arm.mve.vshl.scalar.predicated.v8i16.v8i1(<8 x i16>, i32, i32, i32, i32, <8 x i1>) 1338declare <4 x i32> @llvm.arm.mve.vshl.scalar.predicated.v4i32.v4i1(<4 x i32>, i32, i32, i32, i32, <4 x i1>) 1339