1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s 3 4declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 5 6declare <4 x i32> @llvm.arm.mve.vqdmull.v4i32.v8i16(<8 x i16>, <8 x i16>, i32) 7declare <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32>, <4 x i32>, i32) 8declare <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16>, <8 x i16>, i32, <4 x i1>, <4 x i32>) 9declare <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <2 x i64>) 10 11define arm_aapcs_vfpcc <4 x i32> @test_vqdmullbq_s16(<8 x i16> %a, <8 x i16> %b) { 12; CHECK-LABEL: test_vqdmullbq_s16: 13; CHECK: @ %bb.0: @ %entry 14; CHECK-NEXT: vqdmullb.s16 q0, q0, q1 15; CHECK-NEXT: bx lr 16entry: 17 %0 = call <4 x i32> @llvm.arm.mve.vqdmull.v4i32.v8i16(<8 x i16> %a, <8 x i16> %b, i32 0) 18 ret <4 x i32> %0 19} 20 21define arm_aapcs_vfpcc <2 x i64> @test_vqdmullbq_s32(<4 x i32> %a, <4 x i32> %b) { 22; CHECK-LABEL: test_vqdmullbq_s32: 23; CHECK: @ %bb.0: @ %entry 24; CHECK-NEXT: vqdmullb.s32 q2, q0, q1 25; CHECK-NEXT: vmov q0, q2 26; CHECK-NEXT: bx lr 27entry: 28 %0 = call <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0) 29 ret <2 x i64> %0 30} 31 32define arm_aapcs_vfpcc <4 x i32> @test_vqdmullbq_m_s16(<4 x i32> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 33; CHECK-LABEL: test_vqdmullbq_m_s16: 34; CHECK: @ %bb.0: @ %entry 35; CHECK-NEXT: vmsr p0, r0 36; CHECK-NEXT: vpst 37; CHECK-NEXT: vqdmullbt.s16 q0, q1, q2 38; CHECK-NEXT: bx lr 39entry: 40 %0 = zext i16 %p to i32 41 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 42 %2 = call <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, <8 x i16> %b, i32 0, <4 x i1> %1, <4 x i32> %inactive) 43 ret <4 x i32> %2 44} 45 46define arm_aapcs_vfpcc <2 x i64> @test_vqdmullbq_m_s32(<2 x i64> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 47; CHECK-LABEL: test_vqdmullbq_m_s32: 48; CHECK: @ %bb.0: @ %entry 49; CHECK-NEXT: vmsr p0, r0 50; CHECK-NEXT: vpst 51; CHECK-NEXT: vqdmullbt.s32 q0, q1, q2 52; CHECK-NEXT: bx lr 53entry: 54 %0 = zext i16 %p to i32 55 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 56 %2 = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, <4 x i1> %1, <2 x i64> %inactive) 57 ret <2 x i64> %2 58} 59 60define arm_aapcs_vfpcc <4 x i32> @test_vqdmullbq_n_s16(<8 x i16> %a, i16 signext %b) { 61; CHECK-LABEL: test_vqdmullbq_n_s16: 62; CHECK: @ %bb.0: @ %entry 63; CHECK-NEXT: vqdmullb.s16 q0, q0, r0 64; CHECK-NEXT: bx lr 65entry: 66 %.splatinsert = insertelement <8 x i16> undef, i16 %b, i32 0 67 %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer 68 %0 = call <4 x i32> @llvm.arm.mve.vqdmull.v4i32.v8i16(<8 x i16> %a, <8 x i16> %.splat, i32 0) 69 ret <4 x i32> %0 70} 71 72define arm_aapcs_vfpcc <2 x i64> @test_vqdmullbq_n_s32(<4 x i32> %a, i32 %b) #0 { 73; CHECK-LABEL: test_vqdmullbq_n_s32: 74; CHECK: @ %bb.0: @ %entry 75; CHECK-NEXT: vqdmullb.s32 q1, q0, r0 76; CHECK-NEXT: vmov q0, q1 77; CHECK-NEXT: bx lr 78entry: 79 %.splatinsert = insertelement <4 x i32> undef, i32 %b, i32 0 80 %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer 81 %0 = call <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32> %a, <4 x i32> %.splat, i32 0) 82 ret <2 x i64> %0 83} 84 85define arm_aapcs_vfpcc <4 x i32> @test_vqdmullbq_m_n_s16(<4 x i32> %inactive, <8 x i16> %a, i16 signext %b, i16 zeroext %p) { 86; CHECK-LABEL: test_vqdmullbq_m_n_s16: 87; CHECK: @ %bb.0: @ %entry 88; CHECK-NEXT: vmsr p0, r1 89; CHECK-NEXT: vpst 90; CHECK-NEXT: vqdmullbt.s16 q0, q1, r0 91; CHECK-NEXT: bx lr 92entry: 93 %.splatinsert = insertelement <8 x i16> undef, i16 %b, i32 0 94 %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer 95 %0 = zext i16 %p to i32 96 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 97 %2 = call <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, <8 x i16> %.splat, i32 0, <4 x i1> %1, <4 x i32> %inactive) 98 ret <4 x i32> %2 99} 100 101define arm_aapcs_vfpcc <2 x i64> @test_vqdmullbq_m_n_s32(<2 x i64> %inactive, <4 x i32> %a, i32 %b, i16 zeroext %p) { 102; CHECK-LABEL: test_vqdmullbq_m_n_s32: 103; CHECK: @ %bb.0: @ %entry 104; CHECK-NEXT: vmsr p0, r1 105; CHECK-NEXT: vpst 106; CHECK-NEXT: vqdmullbt.s32 q0, q1, r0 107; CHECK-NEXT: bx lr 108entry: 109 %.splatinsert = insertelement <4 x i32> undef, i32 %b, i32 0 110 %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer 111 %0 = zext i16 %p to i32 112 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 113 %2 = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1(<4 x i32> %a, <4 x i32> %.splat, i32 0, <4 x i1> %1, <2 x i64> %inactive) 114 ret <2 x i64> %2 115} 116 117define arm_aapcs_vfpcc <4 x i32> @test_vqdmulltq_s16(<8 x i16> %a, <8 x i16> %b) { 118; CHECK-LABEL: test_vqdmulltq_s16: 119; CHECK: @ %bb.0: @ %entry 120; CHECK-NEXT: vqdmullt.s16 q0, q0, q1 121; CHECK-NEXT: bx lr 122entry: 123 %0 = call <4 x i32> @llvm.arm.mve.vqdmull.v4i32.v8i16(<8 x i16> %a, <8 x i16> %b, i32 1) 124 ret <4 x i32> %0 125} 126 127define arm_aapcs_vfpcc <2 x i64> @test_vqdmulltq_s32(<4 x i32> %a, <4 x i32> %b) { 128; CHECK-LABEL: test_vqdmulltq_s32: 129; CHECK: @ %bb.0: @ %entry 130; CHECK-NEXT: vqdmullt.s32 q2, q0, q1 131; CHECK-NEXT: vmov q0, q2 132; CHECK-NEXT: bx lr 133entry: 134 %0 = call <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32> %a, <4 x i32> %b, i32 1) 135 ret <2 x i64> %0 136} 137 138define arm_aapcs_vfpcc <4 x i32> @test_vqdmulltq_m_s16(<4 x i32> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 139; CHECK-LABEL: test_vqdmulltq_m_s16: 140; CHECK: @ %bb.0: @ %entry 141; CHECK-NEXT: vmsr p0, r0 142; CHECK-NEXT: vpst 143; CHECK-NEXT: vqdmulltt.s16 q0, q1, q2 144; CHECK-NEXT: bx lr 145entry: 146 %0 = zext i16 %p to i32 147 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 148 %2 = call <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, <8 x i16> %b, i32 1, <4 x i1> %1, <4 x i32> %inactive) 149 ret <4 x i32> %2 150} 151 152define arm_aapcs_vfpcc <2 x i64> @test_vqdmulltq_m_s32(<2 x i64> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 153; CHECK-LABEL: test_vqdmulltq_m_s32: 154; CHECK: @ %bb.0: @ %entry 155; CHECK-NEXT: vmsr p0, r0 156; CHECK-NEXT: vpst 157; CHECK-NEXT: vqdmulltt.s32 q0, q1, q2 158; CHECK-NEXT: bx lr 159entry: 160 %0 = zext i16 %p to i32 161 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 162 %2 = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 1, <4 x i1> %1, <2 x i64> %inactive) 163 ret <2 x i64> %2 164} 165 166define arm_aapcs_vfpcc <4 x i32> @test_vqdmulltq_n_s16(<8 x i16> %a, i16 signext %b) { 167; CHECK-LABEL: test_vqdmulltq_n_s16: 168; CHECK: @ %bb.0: @ %entry 169; CHECK-NEXT: vqdmullt.s16 q0, q0, r0 170; CHECK-NEXT: bx lr 171entry: 172 %.splatinsert = insertelement <8 x i16> undef, i16 %b, i32 0 173 %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer 174 %0 = call <4 x i32> @llvm.arm.mve.vqdmull.v4i32.v8i16(<8 x i16> %a, <8 x i16> %.splat, i32 1) 175 ret <4 x i32> %0 176} 177 178define arm_aapcs_vfpcc <2 x i64> @test_vqdmulltq_n_s32(<4 x i32> %a, i32 %b) { 179; CHECK-LABEL: test_vqdmulltq_n_s32: 180; CHECK: @ %bb.0: @ %entry 181; CHECK-NEXT: vqdmullt.s32 q1, q0, r0 182; CHECK-NEXT: vmov q0, q1 183; CHECK-NEXT: bx lr 184entry: 185 %.splatinsert = insertelement <4 x i32> undef, i32 %b, i32 0 186 %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer 187 %0 = call <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32> %a, <4 x i32> %.splat, i32 1) 188 ret <2 x i64> %0 189} 190 191define arm_aapcs_vfpcc <4 x i32> @test_vqdmulltq_m_n_s16(<4 x i32> %inactive, <8 x i16> %a, i16 signext %b, i16 zeroext %p) { 192; CHECK-LABEL: test_vqdmulltq_m_n_s16: 193; CHECK: @ %bb.0: @ %entry 194; CHECK-NEXT: vmsr p0, r1 195; CHECK-NEXT: vpst 196; CHECK-NEXT: vqdmulltt.s16 q0, q1, r0 197; CHECK-NEXT: bx lr 198entry: 199 %.splatinsert = insertelement <8 x i16> undef, i16 %b, i32 0 200 %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer 201 %0 = zext i16 %p to i32 202 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 203 %2 = call <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, <8 x i16> %.splat, i32 1, <4 x i1> %1, <4 x i32> %inactive) 204 ret <4 x i32> %2 205} 206 207define arm_aapcs_vfpcc <2 x i64> @test_vqdmulltq_m_n_s32(<2 x i64> %inactive, <4 x i32> %a, i32 %b, i16 zeroext %p) { 208; CHECK-LABEL: test_vqdmulltq_m_n_s32: 209; CHECK: @ %bb.0: @ %entry 210; CHECK-NEXT: vmsr p0, r1 211; CHECK-NEXT: vpst 212; CHECK-NEXT: vqdmulltt.s32 q0, q1, r0 213; CHECK-NEXT: bx lr 214entry: 215 %.splatinsert = insertelement <4 x i32> undef, i32 %b, i32 0 216 %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer 217 %0 = zext i16 %p to i32 218 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 219 %2 = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1(<4 x i32> %a, <4 x i32> %.splat, i32 1, <4 x i1> %1, <2 x i64> %inactive) 220 ret <2 x i64> %2 221} 222