1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=KNL 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=SKX 4 5define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 6; ALL-LABEL: test_x86_fmadd_ps_z: 7; ALL: ## %bb.0: 8; ALL-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 9; ALL-NEXT: retq 10 %x = fmul <16 x float> %a0, %a1 11 %res = fadd <16 x float> %x, %a2 12 ret <16 x float> %res 13} 14 15define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 16; ALL-LABEL: test_x86_fmsub_ps_z: 17; ALL: ## %bb.0: 18; ALL-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0 19; ALL-NEXT: retq 20 %x = fmul <16 x float> %a0, %a1 21 %res = fsub <16 x float> %x, %a2 22 ret <16 x float> %res 23} 24 25define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 26; ALL-LABEL: test_x86_fnmadd_ps_z: 27; ALL: ## %bb.0: 28; ALL-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0 29; ALL-NEXT: retq 30 %x = fmul <16 x float> %a0, %a1 31 %res = fsub <16 x float> %a2, %x 32 ret <16 x float> %res 33} 34 35define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 36; ALL-LABEL: test_x86_fnmsub_ps_z: 37; ALL: ## %bb.0: 38; ALL-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 39; ALL-NEXT: retq 40 %x = fmul <16 x float> %a0, %a1 41 %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 42 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 43 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 44 float -0.000000e+00>, %x 45 %res = fsub <16 x float> %y, %a2 46 ret <16 x float> %res 47} 48 49define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 50; ALL-LABEL: test_x86_fmadd_pd_z: 51; ALL: ## %bb.0: 52; ALL-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 53; ALL-NEXT: retq 54 %x = fmul <8 x double> %a0, %a1 55 %res = fadd <8 x double> %x, %a2 56 ret <8 x double> %res 57} 58 59define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 60; ALL-LABEL: test_x86_fmsub_pd_z: 61; ALL: ## %bb.0: 62; ALL-NEXT: vfmsub213pd %zmm2, %zmm1, %zmm0 63; ALL-NEXT: retq 64 %x = fmul <8 x double> %a0, %a1 65 %res = fsub <8 x double> %x, %a2 66 ret <8 x double> %res 67} 68 69define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) { 70; ALL-LABEL: test_x86_fmsub_213: 71; ALL: ## %bb.0: 72; ALL-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 73; ALL-NEXT: retq 74 %x = fmul double %a0, %a1 75 %res = fsub double %x, %a2 76 ret double %res 77} 78 79define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) { 80; ALL-LABEL: test_x86_fmsub_213_m: 81; ALL: ## %bb.0: 82; ALL-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 83; ALL-NEXT: retq 84 %a2 = load double , double *%a2_ptr 85 %x = fmul double %a0, %a1 86 %res = fsub double %x, %a2 87 ret double %res 88} 89 90define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) { 91; ALL-LABEL: test_x86_fmsub_231_m: 92; ALL: ## %bb.0: 93; ALL-NEXT: vfmsub132sd (%rdi), %xmm1, %xmm0 94; ALL-NEXT: retq 95 %a2 = load double , double *%a2_ptr 96 %x = fmul double %a0, %a2 97 %res = fsub double %x, %a1 98 ret double %res 99} 100 101define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind { 102; ALL-LABEL: test231_br: 103; ALL: ## %bb.0: 104; ALL-NEXT: vfmadd132ps {{.*}}(%rip){1to16}, %zmm1, %zmm0 105; ALL-NEXT: retq 106 %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 107 %b2 = fadd <16 x float> %b1, %a2 108 ret <16 x float> %b2 109} 110 111define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind { 112; ALL-LABEL: test213_br: 113; ALL: ## %bb.0: 114; ALL-NEXT: vfmadd213ps {{.*}}(%rip){1to16}, %zmm1, %zmm0 115; ALL-NEXT: retq 116 %b1 = fmul <16 x float> %a1, %a2 117 %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 118 ret <16 x float> %b2 119} 120 121;mask (a*c+b , a) 122define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { 123; KNL-LABEL: test_x86_fmadd132_ps: 124; KNL: ## %bb.0: 125; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 126; KNL-NEXT: vpslld $31, %zmm2, %zmm2 127; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1 128; KNL-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1} 129; KNL-NEXT: retq 130; 131; SKX-LABEL: test_x86_fmadd132_ps: 132; SKX: ## %bb.0: 133; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 134; SKX-NEXT: vpmovb2m %xmm2, %k1 135; SKX-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1} 136; SKX-NEXT: retq 137 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 138 %x = fmul <16 x float> %a0, %a2 139 %y = fadd <16 x float> %x, %a1 140 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0 141 ret <16 x float> %res 142} 143 144;mask (a*c+b , b) 145define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { 146; KNL-LABEL: test_x86_fmadd231_ps: 147; KNL: ## %bb.0: 148; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 149; KNL-NEXT: vpslld $31, %zmm2, %zmm2 150; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1 151; KNL-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1} 152; KNL-NEXT: vmovaps %zmm1, %zmm0 153; KNL-NEXT: retq 154; 155; SKX-LABEL: test_x86_fmadd231_ps: 156; SKX: ## %bb.0: 157; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 158; SKX-NEXT: vpmovb2m %xmm2, %k1 159; SKX-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1} 160; SKX-NEXT: vmovaps %zmm1, %zmm0 161; SKX-NEXT: retq 162 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 163 %x = fmul <16 x float> %a0, %a2 164 %y = fadd <16 x float> %x, %a1 165 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1 166 ret <16 x float> %res 167} 168 169;mask (b*a+c , b) 170define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { 171; KNL-LABEL: test_x86_fmadd213_ps: 172; KNL: ## %bb.0: 173; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 174; KNL-NEXT: vpslld $31, %zmm2, %zmm2 175; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1 176; KNL-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1} 177; KNL-NEXT: vmovaps %zmm1, %zmm0 178; KNL-NEXT: retq 179; 180; SKX-LABEL: test_x86_fmadd213_ps: 181; SKX: ## %bb.0: 182; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 183; SKX-NEXT: vpmovb2m %xmm2, %k1 184; SKX-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1} 185; SKX-NEXT: vmovaps %zmm1, %zmm0 186; SKX-NEXT: retq 187 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 188 %x = fmul <16 x float> %a1, %a0 189 %y = fadd <16 x float> %x, %a2 190 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1 191 ret <16 x float> %res 192} 193 194