1; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s 2 3declare float @llvm.fma.f32(float, float, float) 4declare double @llvm.fma.f64(double, double, double) 5 6define float @test_fmla_ss4S(float %a, float %b, <4 x float> %v) { 7 ; CHECK-LABEL: test_fmla_ss4S 8 ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 9 %tmp1 = extractelement <4 x float> %v, i32 3 10 %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a) 11 ret float %tmp2 12} 13 14define float @test_fmla_ss4S_swap(float %a, float %b, <4 x float> %v) { 15 ; CHECK-LABEL: test_fmla_ss4S_swap 16 ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 17 %tmp1 = extractelement <4 x float> %v, i32 3 18 %tmp2 = call float @llvm.fma.f32(float %tmp1, float %a, float %a) 19 ret float %tmp2 20} 21 22define float @test_fmla_ss2S(float %a, float %b, <2 x float> %v) { 23 ; CHECK-LABEL: test_fmla_ss2S 24 ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] 25 %tmp1 = extractelement <2 x float> %v, i32 1 26 %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a) 27 ret float %tmp2 28} 29 30define double @test_fmla_ddD(double %a, double %b, <1 x double> %v) { 31 ; CHECK-LABEL: test_fmla_ddD 32 ; CHECK: {{fmla d[0-9]+, d[0-9]+, v[0-9]+.d\[0]|fmadd d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}} 33 %tmp1 = extractelement <1 x double> %v, i32 0 34 %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a) 35 ret double %tmp2 36} 37 38define double @test_fmla_dd2D(double %a, double %b, <2 x double> %v) { 39 ; CHECK-LABEL: test_fmla_dd2D 40 ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] 41 %tmp1 = extractelement <2 x double> %v, i32 1 42 %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a) 43 ret double %tmp2 44} 45 46define double @test_fmla_dd2D_swap(double %a, double %b, <2 x double> %v) { 47 ; CHECK-LABEL: test_fmla_dd2D_swap 48 ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] 49 %tmp1 = extractelement <2 x double> %v, i32 1 50 %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a) 51 ret double %tmp2 52} 53 54define float @test_fmls_ss4S(float %a, float %b, <4 x float> %v) { 55 ; CHECK-LABEL: test_fmls_ss4S 56 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 57 %tmp1 = extractelement <4 x float> %v, i32 3 58 %tmp2 = fsub float -0.0, %tmp1 59 %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a) 60 ret float %tmp3 61} 62 63define float @test_fmls_ss4S_swap(float %a, float %b, <4 x float> %v) { 64 ; CHECK-LABEL: test_fmls_ss4S_swap 65 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 66 %tmp1 = extractelement <4 x float> %v, i32 3 67 %tmp2 = fsub float -0.0, %tmp1 68 %tmp3 = call float @llvm.fma.f32(float %tmp1, float %tmp2, float %a) 69 ret float %tmp3 70} 71 72 73define float @test_fmls_ss2S(float %a, float %b, <2 x float> %v) { 74 ; CHECK-LABEL: test_fmls_ss2S 75 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] 76 %tmp1 = extractelement <2 x float> %v, i32 1 77 %tmp2 = fsub float -0.0, %tmp1 78 %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a) 79 ret float %tmp3 80} 81 82define double @test_fmls_ddD(double %a, double %b, <1 x double> %v) { 83 ; CHECK-LABEL: test_fmls_ddD 84 ; CHECK: {{fmls d[0-9]+, d[0-9]+, v[0-9]+.d\[0]|fmsub d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}} 85 %tmp1 = extractelement <1 x double> %v, i32 0 86 %tmp2 = fsub double -0.0, %tmp1 87 %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a) 88 ret double %tmp3 89} 90 91define double @test_fmls_dd2D(double %a, double %b, <2 x double> %v) { 92 ; CHECK-LABEL: test_fmls_dd2D 93 ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] 94 %tmp1 = extractelement <2 x double> %v, i32 1 95 %tmp2 = fsub double -0.0, %tmp1 96 %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a) 97 ret double %tmp3 98} 99 100define double @test_fmls_dd2D_swap(double %a, double %b, <2 x double> %v) { 101 ; CHECK-LABEL: test_fmls_dd2D_swap 102 ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] 103 %tmp1 = extractelement <2 x double> %v, i32 1 104 %tmp2 = fsub double -0.0, %tmp1 105 %tmp3 = call double @llvm.fma.f64(double %tmp1, double %tmp2, double %a) 106 ret double %tmp3 107} 108 109