1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s 3 4declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) 5declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 6 7declare <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32, <8 x half>, <8 x half>, <8 x half>) 8declare <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32, <4 x float>, <4 x float>, <4 x float>) 9 10declare <8 x half> @llvm.arm.mve.vcmlaq.predicated.v8f16.v8i1(i32, <8 x half>, <8 x half>, <8 x half>, <8 x i1>) 11declare <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32, <4 x float>, <4 x float>, <4 x float>, <4 x i1>) 12 13 14define arm_aapcs_vfpcc <8 x half> @test_vcmlaq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 15; CHECK-LABEL: test_vcmlaq_f16: 16; CHECK: @ %bb.0: @ %entry 17; CHECK-NEXT: vcmla.f16 q0, q1, q2, #0 18; CHECK-NEXT: bx lr 19entry: 20 %0 = call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 0, <8 x half> %a, <8 x half> %b, <8 x half> %c) 21 ret <8 x half> %0 22} 23 24define arm_aapcs_vfpcc <4 x float> @test_vcmlaq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 25; CHECK-LABEL: test_vcmlaq_f32: 26; CHECK: @ %bb.0: @ %entry 27; CHECK-NEXT: vcmla.f32 q0, q1, q2, #0 28; CHECK-NEXT: bx lr 29entry: 30 %0 = call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> %a, <4 x float> %b, <4 x float> %c) 31 ret <4 x float> %0 32} 33 34 35define arm_aapcs_vfpcc <8 x half> @test_vcmlaq_rot90_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 36; CHECK-LABEL: test_vcmlaq_rot90_f16: 37; CHECK: @ %bb.0: @ %entry 38; CHECK-NEXT: vcmla.f16 q0, q1, q2, #90 39; CHECK-NEXT: bx lr 40entry: 41 %0 = call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 1, <8 x half> %a, <8 x half> %b, <8 x half> %c) 42 ret <8 x half> %0 43} 44 45define arm_aapcs_vfpcc <4 x float> @test_vcmlaq_rot90_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 46; CHECK-LABEL: test_vcmlaq_rot90_f32: 47; CHECK: @ %bb.0: @ %entry 48; CHECK-NEXT: vcmla.f32 q0, q1, q2, #90 49; CHECK-NEXT: bx lr 50entry: 51 %0 = call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> %a, <4 x float> %b, <4 x float> %c) 52 ret <4 x float> %0 53} 54 55define arm_aapcs_vfpcc <8 x half> @test_vcmlaq_rot180_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 56; CHECK-LABEL: test_vcmlaq_rot180_f16: 57; CHECK: @ %bb.0: @ %entry 58; CHECK-NEXT: vcmla.f16 q0, q1, q2, #180 59; CHECK-NEXT: bx lr 60entry: 61 %0 = call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 2, <8 x half> %a, <8 x half> %b, <8 x half> %c) 62 ret <8 x half> %0 63} 64 65define arm_aapcs_vfpcc <4 x float> @test_vcmlaq_rot180_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 66; CHECK-LABEL: test_vcmlaq_rot180_f32: 67; CHECK: @ %bb.0: @ %entry 68; CHECK-NEXT: vcmla.f32 q0, q1, q2, #180 69; CHECK-NEXT: bx lr 70entry: 71 %0 = call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 2, <4 x float> %a, <4 x float> %b, <4 x float> %c) 72 ret <4 x float> %0 73} 74 75define arm_aapcs_vfpcc <8 x half> @test_vcmlaq_rot270_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 76; CHECK-LABEL: test_vcmlaq_rot270_f16: 77; CHECK: @ %bb.0: @ %entry 78; CHECK-NEXT: vcmla.f16 q0, q1, q2, #270 79; CHECK-NEXT: bx lr 80entry: 81 %0 = call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 3, <8 x half> %a, <8 x half> %b, <8 x half> %c) 82 ret <8 x half> %0 83} 84 85define arm_aapcs_vfpcc <4 x float> @test_vcmlaq_rot270_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 86; CHECK-LABEL: test_vcmlaq_rot270_f32: 87; CHECK: @ %bb.0: @ %entry 88; CHECK-NEXT: vcmla.f32 q0, q1, q2, #270 89; CHECK-NEXT: bx lr 90entry: 91 %0 = call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 3, <4 x float> %a, <4 x float> %b, <4 x float> %c) 92 ret <4 x float> %0 93} 94 95define arm_aapcs_vfpcc <8 x half> @test_vcmlaq_m_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i16 zeroext %p) { 96; CHECK-LABEL: test_vcmlaq_m_f16: 97; CHECK: @ %bb.0: @ %entry 98; CHECK-NEXT: vmsr p0, r0 99; CHECK-NEXT: vpst 100; CHECK-NEXT: vcmlat.f16 q0, q1, q2, #0 101; CHECK-NEXT: bx lr 102entry: 103 %0 = zext i16 %p to i32 104 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 105 %2 = call <8 x half> @llvm.arm.mve.vcmlaq.predicated.v8f16.v8i1(i32 0, <8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %1) 106 ret <8 x half> %2 107} 108 109define arm_aapcs_vfpcc <4 x float> @test_vcmlaq_m_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, i16 zeroext %p) { 110; CHECK-LABEL: test_vcmlaq_m_f32: 111; CHECK: @ %bb.0: @ %entry 112; CHECK-NEXT: vmsr p0, r0 113; CHECK-NEXT: vpst 114; CHECK-NEXT: vcmlat.f32 q0, q1, q2, #0 115; CHECK-NEXT: bx lr 116entry: 117 %0 = zext i16 %p to i32 118 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 119 %2 = call <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 0, <4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %1) 120 ret <4 x float> %2 121} 122 123define arm_aapcs_vfpcc <8 x half> @test_vcmlaq_rot90_m_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i16 zeroext %p) { 124; CHECK-LABEL: test_vcmlaq_rot90_m_f16: 125; CHECK: @ %bb.0: @ %entry 126; CHECK-NEXT: vmsr p0, r0 127; CHECK-NEXT: vpst 128; CHECK-NEXT: vcmlat.f16 q0, q1, q2, #90 129; CHECK-NEXT: bx lr 130entry: 131 %0 = zext i16 %p to i32 132 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 133 %2 = call <8 x half> @llvm.arm.mve.vcmlaq.predicated.v8f16.v8i1(i32 1, <8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %1) 134 ret <8 x half> %2 135} 136 137define arm_aapcs_vfpcc <4 x float> @test_vcmlaq_rot90_m_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, i16 zeroext %p) { 138; CHECK-LABEL: test_vcmlaq_rot90_m_f32: 139; CHECK: @ %bb.0: @ %entry 140; CHECK-NEXT: vmsr p0, r0 141; CHECK-NEXT: vpst 142; CHECK-NEXT: vcmlat.f32 q0, q1, q2, #90 143; CHECK-NEXT: bx lr 144entry: 145 %0 = zext i16 %p to i32 146 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 147 %2 = call <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 1, <4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %1) 148 ret <4 x float> %2 149} 150 151define arm_aapcs_vfpcc <8 x half> @test_vcmlaq_rot180_m_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i16 zeroext %p) { 152; CHECK-LABEL: test_vcmlaq_rot180_m_f16: 153; CHECK: @ %bb.0: @ %entry 154; CHECK-NEXT: vmsr p0, r0 155; CHECK-NEXT: vpst 156; CHECK-NEXT: vcmlat.f16 q0, q1, q2, #180 157; CHECK-NEXT: bx lr 158entry: 159 %0 = zext i16 %p to i32 160 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 161 %2 = call <8 x half> @llvm.arm.mve.vcmlaq.predicated.v8f16.v8i1(i32 2, <8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %1) 162 ret <8 x half> %2 163} 164 165define arm_aapcs_vfpcc <4 x float> @test_vcmlaq_rot180_m_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, i16 zeroext %p) { 166; CHECK-LABEL: test_vcmlaq_rot180_m_f32: 167; CHECK: @ %bb.0: @ %entry 168; CHECK-NEXT: vmsr p0, r0 169; CHECK-NEXT: vpst 170; CHECK-NEXT: vcmlat.f32 q0, q1, q2, #180 171; CHECK-NEXT: bx lr 172entry: 173 %0 = zext i16 %p to i32 174 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 175 %2 = call <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 2, <4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %1) 176 ret <4 x float> %2 177} 178 179define arm_aapcs_vfpcc <8 x half> @test_vcmlaq_rot270_m_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i16 zeroext %p) { 180; CHECK-LABEL: test_vcmlaq_rot270_m_f16: 181; CHECK: @ %bb.0: @ %entry 182; CHECK-NEXT: vmsr p0, r0 183; CHECK-NEXT: vpst 184; CHECK-NEXT: vcmlat.f16 q0, q1, q2, #270 185; CHECK-NEXT: bx lr 186entry: 187 %0 = zext i16 %p to i32 188 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 189 %2 = call <8 x half> @llvm.arm.mve.vcmlaq.predicated.v8f16.v8i1(i32 3, <8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %1) 190 ret <8 x half> %2 191} 192 193define arm_aapcs_vfpcc <4 x float> @test_vcmlaq_rot270_m_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, i16 zeroext %p) { 194; CHECK-LABEL: test_vcmlaq_rot270_m_f32: 195; CHECK: @ %bb.0: @ %entry 196; CHECK-NEXT: vmsr p0, r0 197; CHECK-NEXT: vpst 198; CHECK-NEXT: vcmlat.f32 q0, q1, q2, #270 199; CHECK-NEXT: bx lr 200entry: 201 %0 = zext i16 %p to i32 202 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 203 %2 = call <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 3, <4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %1) 204 ret <4 x float> %2 205} 206