1; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -fp-contract=fast | FileCheck %s 2; Check generated fused MAC and MLS. 3 4define double @fusedMACTest1(double %d1, double %d2, double %d3) { 5;CHECK-LABEL: fusedMACTest1: 6;CHECK: vfma.f64 7 %1 = fmul double %d1, %d2 8 %2 = fadd double %1, %d3 9 ret double %2 10} 11 12define float @fusedMACTest2(float %f1, float %f2, float %f3) { 13;CHECK-LABEL: fusedMACTest2: 14;CHECK: vfma.f32 15 %1 = fmul float %f1, %f2 16 %2 = fadd float %1, %f3 17 ret float %2 18} 19 20define double @fusedMACTest3(double %d1, double %d2, double %d3) { 21;CHECK-LABEL: fusedMACTest3: 22;CHECK: vfms.f64 23 %1 = fmul double %d2, %d3 24 %2 = fsub double %d1, %1 25 ret double %2 26} 27 28define float @fusedMACTest4(float %f1, float %f2, float %f3) { 29;CHECK-LABEL: fusedMACTest4: 30;CHECK: vfms.f32 31 %1 = fmul float %f2, %f3 32 %2 = fsub float %f1, %1 33 ret float %2 34} 35 36define double @fusedMACTest5(double %d1, double %d2, double %d3) { 37;CHECK-LABEL: fusedMACTest5: 38;CHECK: vfnma.f64 39 %1 = fmul double %d1, %d2 40 %2 = fsub double -0.0, %1 41 %3 = fsub double %2, %d3 42 ret double %3 43} 44 45define float @fusedMACTest6(float %f1, float %f2, float %f3) { 46;CHECK-LABEL: fusedMACTest6: 47;CHECK: vfnma.f32 48 %1 = fmul float %f1, %f2 49 %2 = fsub float -0.0, %1 50 %3 = fsub float %2, %f3 51 ret float %3 52} 53 54define double @fusedMACTest7(double %d1, double %d2, double %d3) { 55;CHECK-LABEL: fusedMACTest7: 56;CHECK: vfnms.f64 57 %1 = fmul double %d1, %d2 58 %2 = fsub double %1, %d3 59 ret double %2 60} 61 62define float @fusedMACTest8(float %f1, float %f2, float %f3) { 63;CHECK-LABEL: fusedMACTest8: 64;CHECK: vfnms.f32 65 %1 = fmul float %f1, %f2 66 %2 = fsub float %1, %f3 67 ret float %2 68} 69 70define <2 x float> @fusedMACTest9(<2 x float> %a, <2 x float> %b) { 71;CHECK-LABEL: fusedMACTest9: 72;CHECK: vfma.f32 73 %mul = fmul <2 x float> %a, %b 74 %add = fadd <2 x float> %mul, %a 75 ret <2 x float> %add 76} 77 78define <2 x float> @fusedMACTest10(<2 x float> %a, <2 x float> %b) { 79;CHECK-LABEL: fusedMACTest10: 80;CHECK: vfms.f32 81 %mul = fmul <2 x float> %a, %b 82 %sub = fsub <2 x float> %a, %mul 83 ret <2 x float> %sub 84} 85 86define <4 x float> @fusedMACTest11(<4 x float> %a, <4 x float> %b) { 87;CHECK-LABEL: fusedMACTest11: 88;CHECK: vfma.f32 89 %mul = fmul <4 x float> %a, %b 90 %add = fadd <4 x float> %mul, %a 91 ret <4 x float> %add 92} 93 94define <4 x float> @fusedMACTest12(<4 x float> %a, <4 x float> %b) { 95;CHECK-LABEL: fusedMACTest12: 96;CHECK: vfms.f32 97 %mul = fmul <4 x float> %a, %b 98 %sub = fsub <4 x float> %a, %mul 99 ret <4 x float> %sub 100} 101 102define float @test_fma_f32(float %a, float %b, float %c) nounwind readnone ssp { 103entry: 104; CHECK: test_fma_f32 105; CHECK: vfma.f32 106 %tmp1 = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone 107 ret float %tmp1 108} 109 110define double @test_fma_f64(double %a, double %b, double %c) nounwind readnone ssp { 111entry: 112; CHECK: test_fma_f64 113; CHECK: vfma.f64 114 %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone 115 ret double %tmp1 116} 117 118define <2 x float> @test_fma_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp { 119entry: 120; CHECK: test_fma_v2f32 121; CHECK: vfma.f32 122 %tmp1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind 123 ret <2 x float> %tmp1 124} 125 126define double @test_fms_f64(double %a, double %b, double %c) nounwind readnone ssp { 127entry: 128; CHECK: test_fms_f64 129; CHECK: vfms.f64 130 %tmp1 = fsub double -0.0, %a 131 %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone 132 ret double %tmp2 133} 134 135define double @test_fms_f64_2(double %a, double %b, double %c) nounwind readnone ssp { 136entry: 137; CHECK: test_fms_f64_2 138; CHECK: vfms.f64 139 %tmp1 = fsub double -0.0, %b 140 %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone 141 ret double %tmp2 142} 143 144define float @test_fnms_f32(float %a, float %b, float* %c) nounwind readnone ssp { 145; CHECK: test_fnms_f32 146; CHECK: vfnms.f32 147 %tmp1 = load float, float* %c, align 4 148 %tmp2 = fsub float -0.0, %tmp1 149 %tmp3 = tail call float @llvm.fma.f32(float %a, float %b, float %tmp2) nounwind readnone 150 ret float %tmp3 151} 152 153define double @test_fnms_f64(double %a, double %b, double %c) nounwind readnone ssp { 154entry: 155; CHECK: test_fnms_f64 156; CHECK: vfnms.f64 157 %tmp1 = fsub double -0.0, %a 158 %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone 159 %tmp3 = fsub double -0.0, %tmp2 160 ret double %tmp3 161} 162 163define double @test_fnms_f64_2(double %a, double %b, double %c) nounwind readnone ssp { 164entry: 165; CHECK: test_fnms_f64_2 166; CHECK: vfnms.f64 167 %tmp1 = fsub double -0.0, %b 168 %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone 169 %tmp3 = fsub double -0.0, %tmp2 170 ret double %tmp3 171} 172 173define double @test_fnma_f64(double %a, double %b, double %c) nounwind readnone ssp { 174entry: 175; CHECK: test_fnma_f64 176; CHECK: vfnma.f64 177 %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone 178 %tmp2 = fsub double -0.0, %tmp1 179 ret double %tmp2 180} 181 182define double @test_fnma_f64_2(double %a, double %b, double %c) nounwind readnone ssp { 183entry: 184; CHECK: test_fnma_f64_2 185; CHECK: vfnma.f64 186 %tmp1 = fsub double -0.0, %a 187 %tmp2 = fsub double -0.0, %c 188 %tmp3 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %tmp2) nounwind readnone 189 ret double %tmp3 190} 191 192define float @test_fma_const_fold(float %a, float %b) nounwind { 193; CHECK: test_fma_const_fold 194; CHECK-NOT: vfma 195; CHECK-NOT: vmul 196; CHECK: vadd 197 %ret = call float @llvm.fma.f32(float %a, float 1.0, float %b) 198 ret float %ret 199} 200 201define float @test_fma_canonicalize(float %a, float %b) nounwind { 202; CHECK: test_fma_canonicalize 203; CHECK: vmov.f32 [[R1:s[0-9]+]], #2.000000e+00 204; CHECK: vfma.f32 {{s[0-9]+}}, {{s[0-9]+}}, [[R1]] 205 %ret = call float @llvm.fma.f32(float 2.0, float %a, float %b) 206 ret float %ret 207} 208 209; Check that very wide vector fma's can be split into legal fma's. 210define void @test_fma_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float>* %p) nounwind readnone ssp { 211; CHECK: test_fma_v8f32 212; CHECK: vfma.f32 213; CHECK: vfma.f32 214entry: 215 %call = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind readnone 216 store <8 x float> %call, <8 x float>* %p, align 16 217 ret void 218} 219 220 221declare float @llvm.fma.f32(float, float, float) nounwind readnone 222declare double @llvm.fma.f64(double, double, double) nounwind readnone 223declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone 224declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 225