• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=SKX
4
5define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
6; ALL-LABEL: test_x86_fmadd_ps_z:
7; ALL:       ## %bb.0:
8; ALL-NEXT:    vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
9; ALL-NEXT:    retq
10  %x = fmul <16 x float> %a0, %a1
11  %res = fadd <16 x float> %x, %a2
12  ret <16 x float> %res
13}
14
15define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
16; ALL-LABEL: test_x86_fmsub_ps_z:
17; ALL:       ## %bb.0:
18; ALL-NEXT:    vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
19; ALL-NEXT:    retq
20  %x = fmul <16 x float> %a0, %a1
21  %res = fsub <16 x float> %x, %a2
22  ret <16 x float> %res
23}
24
25define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
26; ALL-LABEL: test_x86_fnmadd_ps_z:
27; ALL:       ## %bb.0:
28; ALL-NEXT:    vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
29; ALL-NEXT:    retq
30  %x = fmul <16 x float> %a0, %a1
31  %res = fsub <16 x float> %a2, %x
32  ret <16 x float> %res
33}
34
35define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
36; ALL-LABEL: test_x86_fnmsub_ps_z:
37; ALL:       ## %bb.0:
38; ALL-NEXT:    vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
39; ALL-NEXT:    retq
40  %x = fmul <16 x float> %a0, %a1
41  %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
42                          float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
43                          float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
44                          float -0.000000e+00>, %x
45  %res = fsub <16 x float> %y, %a2
46  ret <16 x float> %res
47}
48
49define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
50; ALL-LABEL: test_x86_fmadd_pd_z:
51; ALL:       ## %bb.0:
52; ALL-NEXT:    vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
53; ALL-NEXT:    retq
54  %x = fmul <8 x double> %a0, %a1
55  %res = fadd <8 x double> %x, %a2
56  ret <8 x double> %res
57}
58
59define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
60; ALL-LABEL: test_x86_fmsub_pd_z:
61; ALL:       ## %bb.0:
62; ALL-NEXT:    vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
63; ALL-NEXT:    retq
64  %x = fmul <8 x double> %a0, %a1
65  %res = fsub <8 x double> %x, %a2
66  ret <8 x double> %res
67}
68
69define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
70; ALL-LABEL: test_x86_fmsub_213:
71; ALL:       ## %bb.0:
72; ALL-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
73; ALL-NEXT:    retq
74  %x = fmul double %a0, %a1
75  %res = fsub double %x, %a2
76  ret double %res
77}
78
79define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
80; ALL-LABEL: test_x86_fmsub_213_m:
81; ALL:       ## %bb.0:
82; ALL-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem
83; ALL-NEXT:    retq
84  %a2 = load double , double *%a2_ptr
85  %x = fmul double %a0, %a1
86  %res = fsub double %x, %a2
87  ret double %res
88}
89
90define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) {
91; ALL-LABEL: test_x86_fmsub_231_m:
92; ALL:       ## %bb.0:
93; ALL-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1
94; ALL-NEXT:    retq
95  %a2 = load double , double *%a2_ptr
96  %x = fmul double %a0, %a2
97  %res = fsub double %x, %a1
98  ret double %res
99}
100
101define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
102; ALL-LABEL: test231_br:
103; ALL:       ## %bb.0:
104; ALL-NEXT:    vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1
105; ALL-NEXT:    retq
106  %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
107  %b2 = fadd <16 x float> %b1, %a2
108  ret <16 x float> %b2
109}
110
111define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
112; ALL-LABEL: test213_br:
113; ALL:       ## %bb.0:
114; ALL-NEXT:    vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + mem
115; ALL-NEXT:    retq
116  %b1 = fmul <16 x float> %a1, %a2
117  %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
118  ret <16 x float> %b2
119}
120
121;mask (a*c+b , a)
122define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
123; KNL-LABEL: test_x86_fmadd132_ps:
124; KNL:       ## %bb.0:
125; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
126; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
127; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1
128; KNL-NEXT:    vfmadd132ps {{.*#+}} zmm0 {%k1} = (zmm0 * mem) + zmm1
129; KNL-NEXT:    retq
130;
131; SKX-LABEL: test_x86_fmadd132_ps:
132; SKX:       ## %bb.0:
133; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2
134; SKX-NEXT:    vpmovb2m %xmm2, %k1
135; SKX-NEXT:    vfmadd132ps {{.*#+}} zmm0 {%k1} = (zmm0 * mem) + zmm1
136; SKX-NEXT:    retq
137  %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
138  %x = fmul <16 x float> %a0, %a2
139  %y = fadd <16 x float> %x, %a1
140  %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
141  ret <16 x float> %res
142}
143
144;mask (a*c+b , b)
145define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
146; KNL-LABEL: test_x86_fmadd231_ps:
147; KNL:       ## %bb.0:
148; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
149; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
150; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1
151; KNL-NEXT:    vfmadd231ps {{.*#+}} zmm1 {%k1} = (zmm0 * mem) + zmm1
152; KNL-NEXT:    vmovaps %zmm1, %zmm0
153; KNL-NEXT:    retq
154;
155; SKX-LABEL: test_x86_fmadd231_ps:
156; SKX:       ## %bb.0:
157; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2
158; SKX-NEXT:    vpmovb2m %xmm2, %k1
159; SKX-NEXT:    vfmadd231ps {{.*#+}} zmm1 {%k1} = (zmm0 * mem) + zmm1
160; SKX-NEXT:    vmovaps %zmm1, %zmm0
161; SKX-NEXT:    retq
162  %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
163  %x = fmul <16 x float> %a0, %a2
164  %y = fadd <16 x float> %x, %a1
165  %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
166  ret <16 x float> %res
167}
168
169;mask (b*a+c , b)
170define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
171; KNL-LABEL: test_x86_fmadd213_ps:
172; KNL:       ## %bb.0:
173; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
174; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
175; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1
176; KNL-NEXT:    vfmadd213ps {{.*#+}} zmm1 {%k1} = (zmm0 * zmm1) + mem
177; KNL-NEXT:    vmovaps %zmm1, %zmm0
178; KNL-NEXT:    retq
179;
180; SKX-LABEL: test_x86_fmadd213_ps:
181; SKX:       ## %bb.0:
182; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2
183; SKX-NEXT:    vpmovb2m %xmm2, %k1
184; SKX-NEXT:    vfmadd213ps {{.*#+}} zmm1 {%k1} = (zmm0 * zmm1) + mem
185; SKX-NEXT:    vmovaps %zmm1, %zmm0
186; SKX-NEXT:    retq
187  %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
188  %x = fmul <16 x float> %a1, %a0
189  %y = fadd <16 x float> %x, %a2
190  %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
191  ret <16 x float> %res
192}
193
194