1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s 3 4define arm_aapcs_vfpcc <4 x i32> @test_vsubq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 { 5; CHECK-LABEL: test_vsubq_u32: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vsub.i32 q0, q0, q1 8; CHECK-NEXT: bx lr 9entry: 10 %0 = sub <4 x i32> %a, %b 11 ret <4 x i32> %0 12} 13 14define arm_aapcs_vfpcc <8 x half> @test_vsubq_f16(<8 x half> %a, <8 x half> %b) local_unnamed_addr #0 { 15; CHECK-LABEL: test_vsubq_f16: 16; CHECK: @ %bb.0: @ %entry 17; CHECK-NEXT: vsub.f16 q0, q0, q1 18; CHECK-NEXT: bx lr 19entry: 20 %0 = fsub <8 x half> %a, %b 21 ret <8 x half> %0 22} 23 24define arm_aapcs_vfpcc <16 x i8> @test_vsubq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 { 25; CHECK-LABEL: test_vsubq_m_s8: 26; CHECK: @ %bb.0: @ %entry 27; CHECK-NEXT: vmsr p0, r0 28; CHECK-NEXT: vpst 29; CHECK-NEXT: vsubt.i8 q0, q1, q2 30; CHECK-NEXT: bx lr 31entry: 32 %0 = zext i16 %p to i32 33 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 34 %2 = tail call <16 x i8> @llvm.arm.mve.sub.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) 35 ret <16 x i8> %2 36} 37 38declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2 39 40declare <16 x i8> @llvm.arm.mve.sub.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2 41 42define arm_aapcs_vfpcc <4 x float> @test_vsubq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #1 { 43; CHECK-LABEL: test_vsubq_m_f32: 44; CHECK: @ %bb.0: @ %entry 45; CHECK-NEXT: vmsr p0, r0 46; CHECK-NEXT: vpst 47; CHECK-NEXT: vsubt.f32 q0, q1, q2 48; CHECK-NEXT: bx lr 49entry: 50 %0 = zext i16 %p to i32 51 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 52 %2 = tail call <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> %a, <4 x float> %b, <4 x i1> %1, <4 x float> %inactive) 53 ret <4 x float> %2 54} 55 56declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 57 58declare <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #2 59 60define arm_aapcs_vfpcc <8 x i16> @test_vsubq_x_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 { 61; CHECK-LABEL: test_vsubq_x_u16: 62; CHECK: @ %bb.0: @ %entry 63; CHECK-NEXT: vmsr p0, r0 64; CHECK-NEXT: vpst 65; CHECK-NEXT: vsubt.i16 q0, q0, q1 66; CHECK-NEXT: bx lr 67entry: 68 %0 = zext i16 %p to i32 69 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 70 %2 = tail call <8 x i16> @llvm.arm.mve.sub.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> undef) 71 ret <8 x i16> %2 72} 73 74declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 75 76declare <8 x i16> @llvm.arm.mve.sub.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2 77 78define arm_aapcs_vfpcc <8 x half> @test_vsubq_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) local_unnamed_addr #1 { 79; CHECK-LABEL: test_vsubq_x_f16: 80; CHECK: @ %bb.0: @ %entry 81; CHECK-NEXT: vmsr p0, r0 82; CHECK-NEXT: vpst 83; CHECK-NEXT: vsubt.f16 q0, q0, q1 84; CHECK-NEXT: bx lr 85entry: 86 %0 = zext i16 %p to i32 87 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 88 %2 = tail call <8 x half> @llvm.arm.mve.sub.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> undef) 89 ret <8 x half> %2 90} 91 92declare <8 x half> @llvm.arm.mve.sub.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>) #2 93 94define arm_aapcs_vfpcc <4 x i32> @test_vsubq_n_u32(<4 x i32> %a, i32 %b) { 95; CHECK-LABEL: test_vsubq_n_u32: 96; CHECK: @ %bb.0: @ %entry 97; CHECK-NEXT: vsub.i32 q0, q0, r0 98; CHECK-NEXT: bx lr 99entry: 100 %.splatinsert = insertelement <4 x i32> undef, i32 %b, i32 0 101 %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer 102 %0 = sub <4 x i32> %a, %.splat 103 ret <4 x i32> %0 104} 105 106define arm_aapcs_vfpcc <8 x half> @test_vsubq_n_f16(<8 x half> %a, float %b.coerce) { 107; CHECK-LABEL: test_vsubq_n_f16: 108; CHECK: @ %bb.0: @ %entry 109; CHECK-NEXT: vmov r0, s4 110; CHECK-NEXT: vsub.f16 q0, q0, r0 111; CHECK-NEXT: bx lr 112entry: 113 %0 = bitcast float %b.coerce to i32 114 %tmp.0.extract.trunc = trunc i32 %0 to i16 115 %1 = bitcast i16 %tmp.0.extract.trunc to half 116 %.splatinsert = insertelement <8 x half> undef, half %1, i32 0 117 %.splat = shufflevector <8 x half> %.splatinsert, <8 x half> undef, <8 x i32> zeroinitializer 118 %2 = fsub <8 x half> %a, %.splat 119 ret <8 x half> %2 120} 121 122define arm_aapcs_vfpcc <16 x i8> @test_vsubq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i8 signext %b, i16 zeroext %p) { 123; CHECK-LABEL: test_vsubq_m_n_s8: 124; CHECK: @ %bb.0: @ %entry 125; CHECK-NEXT: vmsr p0, r1 126; CHECK-NEXT: vpst 127; CHECK-NEXT: vsubt.i8 q0, q1, r0 128; CHECK-NEXT: bx lr 129entry: 130 %.splatinsert = insertelement <16 x i8> undef, i8 %b, i32 0 131 %.splat = shufflevector <16 x i8> %.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer 132 %0 = zext i16 %p to i32 133 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 134 %2 = call <16 x i8> @llvm.arm.mve.sub.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %.splat, <16 x i1> %1, <16 x i8> %inactive) 135 ret <16 x i8> %2 136} 137 138define arm_aapcs_vfpcc <4 x float> @test_vsubq_m_n_f32(<4 x float> %inactive, <4 x float> %a, float %b, i16 zeroext %p) { 139; CHECK-LABEL: test_vsubq_m_n_f32: 140; CHECK: @ %bb.0: @ %entry 141; CHECK-NEXT: vmov r1, s8 142; CHECK-NEXT: vmsr p0, r0 143; CHECK-NEXT: vpst 144; CHECK-NEXT: vsubt.f32 q0, q1, r1 145; CHECK-NEXT: bx lr 146entry: 147 %.splatinsert = insertelement <4 x float> undef, float %b, i32 0 148 %.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer 149 %0 = zext i16 %p to i32 150 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 151 %2 = call <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> %a, <4 x float> %.splat, <4 x i1> %1, <4 x float> %inactive) 152 ret <4 x float> %2 153} 154 155define arm_aapcs_vfpcc <8 x i16> @test_vsubq_x_n_u16(<8 x i16> %a, i16 zeroext %b, i16 zeroext %p) { 156; CHECK-LABEL: test_vsubq_x_n_u16: 157; CHECK: @ %bb.0: @ %entry 158; CHECK-NEXT: vmsr p0, r1 159; CHECK-NEXT: vpst 160; CHECK-NEXT: vsubt.i16 q0, q0, r0 161; CHECK-NEXT: bx lr 162entry: 163 %.splatinsert = insertelement <8 x i16> undef, i16 %b, i32 0 164 %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer 165 %0 = zext i16 %p to i32 166 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 167 %2 = call <8 x i16> @llvm.arm.mve.sub.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %.splat, <8 x i1> %1, <8 x i16> undef) 168 ret <8 x i16> %2 169} 170 171define arm_aapcs_vfpcc <8 x half> @test_vsubq_x_n_f16(<8 x half> %a, float %b.coerce, i16 zeroext %p) { 172; CHECK-LABEL: test_vsubq_x_n_f16: 173; CHECK: @ %bb.0: @ %entry 174; CHECK-NEXT: vmov r1, s4 175; CHECK-NEXT: vmsr p0, r0 176; CHECK-NEXT: vpst 177; CHECK-NEXT: vsubt.f16 q0, q0, r1 178; CHECK-NEXT: bx lr 179entry: 180 %0 = bitcast float %b.coerce to i32 181 %tmp.0.extract.trunc = trunc i32 %0 to i16 182 %1 = bitcast i16 %tmp.0.extract.trunc to half 183 %.splatinsert = insertelement <8 x half> undef, half %1, i32 0 184 %.splat = shufflevector <8 x half> %.splatinsert, <8 x half> undef, <8 x i32> zeroinitializer 185 %2 = zext i16 %p to i32 186 %3 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2) 187 %4 = call <8 x half> @llvm.arm.mve.sub.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %.splat, <8 x i1> %3, <8 x half> undef) 188 ret <8 x half> %4 189} 190