1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=KNL 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=SKX 4 5define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 6; ALL-LABEL: test_x86_fmadd_ps_z: 7; ALL: ## %bb.0: 8; ALL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 9; ALL-NEXT: retq 10 %x = fmul <16 x float> %a0, %a1 11 %res = fadd <16 x float> %x, %a2 12 ret <16 x float> %res 13} 14 15define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 16; ALL-LABEL: test_x86_fmsub_ps_z: 17; ALL: ## %bb.0: 18; ALL-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 19; ALL-NEXT: retq 20 %x = fmul <16 x float> %a0, %a1 21 %res = fsub <16 x float> %x, %a2 22 ret <16 x float> %res 23} 24 25define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 26; ALL-LABEL: test_x86_fnmadd_ps_z: 27; ALL: ## %bb.0: 28; ALL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 29; ALL-NEXT: retq 30 %x = fmul <16 x float> %a0, %a1 31 %res = fsub <16 x float> %a2, %x 32 ret <16 x float> %res 33} 34 35define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 36; ALL-LABEL: test_x86_fnmsub_ps_z: 37; ALL: ## %bb.0: 38; ALL-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 39; ALL-NEXT: retq 40 %x = fmul <16 x float> %a0, %a1 41 %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 42 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 43 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 44 float -0.000000e+00>, %x 45 %res = fsub <16 x float> %y, %a2 46 ret <16 x float> %res 47} 48 49define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 50; ALL-LABEL: test_x86_fmadd_pd_z: 51; ALL: ## %bb.0: 52; ALL-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 53; ALL-NEXT: retq 54 %x = fmul <8 x double> %a0, %a1 55 %res = fadd <8 x double> %x, %a2 56 ret <8 x double> %res 57} 58 59define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 60; ALL-LABEL: test_x86_fmsub_pd_z: 61; ALL: ## %bb.0: 62; ALL-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 63; ALL-NEXT: retq 64 %x = fmul <8 x double> %a0, %a1 65 %res = fsub <8 x double> %x, %a2 66 ret <8 x double> %res 67} 68 69define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) { 70; ALL-LABEL: test_x86_fmsub_213: 71; ALL: ## %bb.0: 72; ALL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 73; ALL-NEXT: retq 74 %x = fmul double %a0, %a1 75 %res = fsub double %x, %a2 76 ret double %res 77} 78 79define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) { 80; ALL-LABEL: test_x86_fmsub_213_m: 81; ALL: ## %bb.0: 82; ALL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem 83; ALL-NEXT: retq 84 %a2 = load double , double *%a2_ptr 85 %x = fmul double %a0, %a1 86 %res = fsub double %x, %a2 87 ret double %res 88} 89 90define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) { 91; ALL-LABEL: test_x86_fmsub_231_m: 92; ALL: ## %bb.0: 93; ALL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 94; ALL-NEXT: retq 95 %a2 = load double , double *%a2_ptr 96 %x = fmul double %a0, %a2 97 %res = fsub double %x, %a1 98 ret double %res 99} 100 101define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind { 102; ALL-LABEL: test231_br: 103; ALL: ## %bb.0: 104; ALL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1 105; ALL-NEXT: retq 106 %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 107 %b2 = fadd <16 x float> %b1, %a2 108 ret <16 x float> %b2 109} 110 111define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind { 112; ALL-LABEL: test213_br: 113; ALL: ## %bb.0: 114; ALL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + mem 115; ALL-NEXT: retq 116 %b1 = fmul <16 x float> %a1, %a2 117 %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 118 ret <16 x float> %b2 119} 120 121;mask (a*c+b , a) 122define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { 123; KNL-LABEL: test_x86_fmadd132_ps: 124; KNL: ## %bb.0: 125; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 126; KNL-NEXT: vpslld $31, %zmm2, %zmm2 127; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1 128; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 {%k1} = (zmm0 * mem) + zmm1 129; KNL-NEXT: retq 130; 131; SKX-LABEL: test_x86_fmadd132_ps: 132; SKX: ## %bb.0: 133; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 134; SKX-NEXT: vpmovb2m %xmm2, %k1 135; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 {%k1} = (zmm0 * mem) + zmm1 136; SKX-NEXT: retq 137 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 138 %x = fmul <16 x float> %a0, %a2 139 %y = fadd <16 x float> %x, %a1 140 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0 141 ret <16 x float> %res 142} 143 144;mask (a*c+b , b) 145define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { 146; KNL-LABEL: test_x86_fmadd231_ps: 147; KNL: ## %bb.0: 148; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 149; KNL-NEXT: vpslld $31, %zmm2, %zmm2 150; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1 151; KNL-NEXT: vfmadd231ps {{.*#+}} zmm1 {%k1} = (zmm0 * mem) + zmm1 152; KNL-NEXT: vmovaps %zmm1, %zmm0 153; KNL-NEXT: retq 154; 155; SKX-LABEL: test_x86_fmadd231_ps: 156; SKX: ## %bb.0: 157; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 158; SKX-NEXT: vpmovb2m %xmm2, %k1 159; SKX-NEXT: vfmadd231ps {{.*#+}} zmm1 {%k1} = (zmm0 * mem) + zmm1 160; SKX-NEXT: vmovaps %zmm1, %zmm0 161; SKX-NEXT: retq 162 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 163 %x = fmul <16 x float> %a0, %a2 164 %y = fadd <16 x float> %x, %a1 165 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1 166 ret <16 x float> %res 167} 168 169;mask (b*a+c , b) 170define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { 171; KNL-LABEL: test_x86_fmadd213_ps: 172; KNL: ## %bb.0: 173; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 174; KNL-NEXT: vpslld $31, %zmm2, %zmm2 175; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1 176; KNL-NEXT: vfmadd213ps {{.*#+}} zmm1 {%k1} = (zmm0 * zmm1) + mem 177; KNL-NEXT: vmovaps %zmm1, %zmm0 178; KNL-NEXT: retq 179; 180; SKX-LABEL: test_x86_fmadd213_ps: 181; SKX: ## %bb.0: 182; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 183; SKX-NEXT: vpmovb2m %xmm2, %k1 184; SKX-NEXT: vfmadd213ps {{.*#+}} zmm1 {%k1} = (zmm0 * zmm1) + mem 185; SKX-NEXT: vmovaps %zmm1, %zmm0 186; SKX-NEXT: retq 187 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 188 %x = fmul <16 x float> %a1, %a0 189 %y = fadd <16 x float> %x, %a2 190 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1 191 ret <16 x float> %res 192} 193 194