1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s 3 4define arm_aapcs_vfpcc <16 x i8> @test_vrev16q_m_i8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { 5; CHECK-LABEL: test_vrev16q_m_i8: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vmsr p0, r0 8; CHECK-NEXT: vpst 9; CHECK-NEXT: vrev16t.8 q0, q1 10; CHECK-NEXT: bx lr 11entry: 12 %0 = zext i16 %p to i32 13 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 14 %2 = tail call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> %a, i32 16, <16 x i1> %1, <16 x i8> %inactive) 15 ret <16 x i8> %2 16} 17 18define arm_aapcs_vfpcc <16 x i8> @test_vrev32q_m_i8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { 19; CHECK-LABEL: test_vrev32q_m_i8: 20; CHECK: @ %bb.0: @ %entry 21; CHECK-NEXT: vmsr p0, r0 22; CHECK-NEXT: vpst 23; CHECK-NEXT: vrev32t.8 q0, q1 24; CHECK-NEXT: bx lr 25entry: 26 %0 = zext i16 %p to i32 27 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 28 %2 = tail call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> %a, i32 32, <16 x i1> %1, <16 x i8> %inactive) 29 ret <16 x i8> %2 30} 31 32define arm_aapcs_vfpcc <8 x i16> @test_vrev32q_m_i16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { 33; CHECK-LABEL: test_vrev32q_m_i16: 34; CHECK: @ %bb.0: @ %entry 35; CHECK-NEXT: vmsr p0, r0 36; CHECK-NEXT: vpst 37; CHECK-NEXT: vrev32t.16 q0, q1 38; CHECK-NEXT: bx lr 39entry: 40 %0 = zext i16 %p to i32 41 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 42 %2 = tail call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> %a, i32 32, <8 x i1> %1, <8 x i16> %inactive) 43 ret <8 x i16> %2 44} 45 46define arm_aapcs_vfpcc <8 x half> @test_vrev32q_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { 47; CHECK-LABEL: test_vrev32q_m_f16: 48; CHECK: @ %bb.0: @ %entry 49; CHECK-NEXT: vmsr p0, r0 50; CHECK-NEXT: vpst 51; CHECK-NEXT: vrev32t.16 q0, q1 52; CHECK-NEXT: bx lr 53entry: 54 %0 = zext i16 %p to i32 55 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 56 %2 = tail call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> %a, i32 32, <8 x i1> %1, <8 x half> %inactive) 57 ret <8 x half> %2 58} 59 60define arm_aapcs_vfpcc <16 x i8> @test_vrev64q_m_i8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { 61; CHECK-LABEL: test_vrev64q_m_i8: 62; CHECK: @ %bb.0: @ %entry 63; CHECK-NEXT: vmsr p0, r0 64; CHECK-NEXT: vpst 65; CHECK-NEXT: vrev64t.8 q0, q1 66; CHECK-NEXT: bx lr 67entry: 68 %0 = zext i16 %p to i32 69 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 70 %2 = tail call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> %a, i32 64, <16 x i1> %1, <16 x i8> %inactive) 71 ret <16 x i8> %2 72} 73 74define arm_aapcs_vfpcc <8 x i16> @test_vrev64q_m_i16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { 75; CHECK-LABEL: test_vrev64q_m_i16: 76; CHECK: @ %bb.0: @ %entry 77; CHECK-NEXT: vmsr p0, r0 78; CHECK-NEXT: vpst 79; CHECK-NEXT: vrev64t.16 q0, q1 80; CHECK-NEXT: bx lr 81entry: 82 %0 = zext i16 %p to i32 83 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 84 %2 = tail call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> %a, i32 64, <8 x i1> %1, <8 x i16> %inactive) 85 ret <8 x i16> %2 86} 87 88define arm_aapcs_vfpcc <8 x half> @test_vrev64q_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { 89; CHECK-LABEL: test_vrev64q_m_f16: 90; CHECK: @ %bb.0: @ %entry 91; CHECK-NEXT: vmsr p0, r0 92; CHECK-NEXT: vpst 93; CHECK-NEXT: vrev64t.16 q0, q1 94; CHECK-NEXT: bx lr 95entry: 96 %0 = zext i16 %p to i32 97 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 98 %2 = tail call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> %a, i32 64, <8 x i1> %1, <8 x half> %inactive) 99 ret <8 x half> %2 100} 101 102define arm_aapcs_vfpcc <4 x i32> @test_vrev64q_m_i32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { 103; CHECK-LABEL: test_vrev64q_m_i32: 104; CHECK: @ %bb.0: @ %entry 105; CHECK-NEXT: vmsr p0, r0 106; CHECK-NEXT: vpst 107; CHECK-NEXT: vrev64t.32 q0, q1 108; CHECK-NEXT: bx lr 109entry: 110 %0 = zext i16 %p to i32 111 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 112 %2 = tail call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> %a, i32 64, <4 x i1> %1, <4 x i32> %inactive) 113 ret <4 x i32> %2 114} 115 116define arm_aapcs_vfpcc <4 x float> @test_vrev64q_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) { 117; CHECK-LABEL: test_vrev64q_m_f32: 118; CHECK: @ %bb.0: @ %entry 119; CHECK-NEXT: vmsr p0, r0 120; CHECK-NEXT: vpst 121; CHECK-NEXT: vrev64t.32 q0, q1 122; CHECK-NEXT: bx lr 123entry: 124 %0 = zext i16 %p to i32 125 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 126 %2 = tail call <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float> %a, i32 64, <4 x i1> %1, <4 x float> %inactive) 127 ret <4 x float> %2 128} 129 130declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) 131declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) 132declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 133 134declare <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>, <16 x i8>) 135declare <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x i16>) 136declare <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half>, i32, <8 x i1>, <8 x half>) 137declare <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x i32>) 138declare <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float>, i32, <4 x i1>, <4 x float>) 139