• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=SKX
4
5define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
6; ALL-LABEL: test_x86_fmadd_ps_z:
7; ALL:       ## %bb.0:
8; ALL-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm0
9; ALL-NEXT:    retq
10  %x = fmul <16 x float> %a0, %a1
11  %res = fadd <16 x float> %x, %a2
12  ret <16 x float> %res
13}
14
15define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
16; ALL-LABEL: test_x86_fmsub_ps_z:
17; ALL:       ## %bb.0:
18; ALL-NEXT:    vfmsub213ps %zmm2, %zmm1, %zmm0
19; ALL-NEXT:    retq
20  %x = fmul <16 x float> %a0, %a1
21  %res = fsub <16 x float> %x, %a2
22  ret <16 x float> %res
23}
24
25define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
26; ALL-LABEL: test_x86_fnmadd_ps_z:
27; ALL:       ## %bb.0:
28; ALL-NEXT:    vfnmadd213ps %zmm2, %zmm1, %zmm0
29; ALL-NEXT:    retq
30  %x = fmul <16 x float> %a0, %a1
31  %res = fsub <16 x float> %a2, %x
32  ret <16 x float> %res
33}
34
35define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
36; ALL-LABEL: test_x86_fnmsub_ps_z:
37; ALL:       ## %bb.0:
38; ALL-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0
39; ALL-NEXT:    retq
40  %x = fmul <16 x float> %a0, %a1
41  %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
42                          float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
43                          float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
44                          float -0.000000e+00>, %x
45  %res = fsub <16 x float> %y, %a2
46  ret <16 x float> %res
47}
48
49define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
50; ALL-LABEL: test_x86_fmadd_pd_z:
51; ALL:       ## %bb.0:
52; ALL-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0
53; ALL-NEXT:    retq
54  %x = fmul <8 x double> %a0, %a1
55  %res = fadd <8 x double> %x, %a2
56  ret <8 x double> %res
57}
58
59define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
60; ALL-LABEL: test_x86_fmsub_pd_z:
61; ALL:       ## %bb.0:
62; ALL-NEXT:    vfmsub213pd %zmm2, %zmm1, %zmm0
63; ALL-NEXT:    retq
64  %x = fmul <8 x double> %a0, %a1
65  %res = fsub <8 x double> %x, %a2
66  ret <8 x double> %res
67}
68
69define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
70; ALL-LABEL: test_x86_fmsub_213:
71; ALL:       ## %bb.0:
72; ALL-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0
73; ALL-NEXT:    retq
74  %x = fmul double %a0, %a1
75  %res = fsub double %x, %a2
76  ret double %res
77}
78
79define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
80; ALL-LABEL: test_x86_fmsub_213_m:
81; ALL:       ## %bb.0:
82; ALL-NEXT:    vfmsub213sd (%rdi), %xmm1, %xmm0
83; ALL-NEXT:    retq
84  %a2 = load double , double *%a2_ptr
85  %x = fmul double %a0, %a1
86  %res = fsub double %x, %a2
87  ret double %res
88}
89
90define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) {
91; ALL-LABEL: test_x86_fmsub_231_m:
92; ALL:       ## %bb.0:
93; ALL-NEXT:    vfmsub132sd (%rdi), %xmm1, %xmm0
94; ALL-NEXT:    retq
95  %a2 = load double , double *%a2_ptr
96  %x = fmul double %a0, %a2
97  %res = fsub double %x, %a1
98  ret double %res
99}
100
101define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
102; ALL-LABEL: test231_br:
103; ALL:       ## %bb.0:
104; ALL-NEXT:    vfmadd132ps {{.*}}(%rip){1to16}, %zmm1, %zmm0
105; ALL-NEXT:    retq
106  %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
107  %b2 = fadd <16 x float> %b1, %a2
108  ret <16 x float> %b2
109}
110
111define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
112; ALL-LABEL: test213_br:
113; ALL:       ## %bb.0:
114; ALL-NEXT:    vfmadd213ps {{.*}}(%rip){1to16}, %zmm1, %zmm0
115; ALL-NEXT:    retq
116  %b1 = fmul <16 x float> %a1, %a2
117  %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
118  ret <16 x float> %b2
119}
120
121;mask (a*c+b , a)
122define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
123; KNL-LABEL: test_x86_fmadd132_ps:
124; KNL:       ## %bb.0:
125; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
126; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
127; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1
128; KNL-NEXT:    vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
129; KNL-NEXT:    retq
130;
131; SKX-LABEL: test_x86_fmadd132_ps:
132; SKX:       ## %bb.0:
133; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2
134; SKX-NEXT:    vpmovb2m %xmm2, %k1
135; SKX-NEXT:    vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
136; SKX-NEXT:    retq
137  %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
138  %x = fmul <16 x float> %a0, %a2
139  %y = fadd <16 x float> %x, %a1
140  %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
141  ret <16 x float> %res
142}
143
144;mask (a*c+b , b)
145define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
146; KNL-LABEL: test_x86_fmadd231_ps:
147; KNL:       ## %bb.0:
148; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
149; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
150; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1
151; KNL-NEXT:    vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
152; KNL-NEXT:    vmovaps %zmm1, %zmm0
153; KNL-NEXT:    retq
154;
155; SKX-LABEL: test_x86_fmadd231_ps:
156; SKX:       ## %bb.0:
157; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2
158; SKX-NEXT:    vpmovb2m %xmm2, %k1
159; SKX-NEXT:    vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
160; SKX-NEXT:    vmovaps %zmm1, %zmm0
161; SKX-NEXT:    retq
162  %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
163  %x = fmul <16 x float> %a0, %a2
164  %y = fadd <16 x float> %x, %a1
165  %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
166  ret <16 x float> %res
167}
168
169;mask (b*a+c , b)
170define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
171; KNL-LABEL: test_x86_fmadd213_ps:
172; KNL:       ## %bb.0:
173; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
174; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
175; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1
176; KNL-NEXT:    vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
177; KNL-NEXT:    vmovaps %zmm1, %zmm0
178; KNL-NEXT:    retq
179;
180; SKX-LABEL: test_x86_fmadd213_ps:
181; SKX:       ## %bb.0:
182; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2
183; SKX-NEXT:    vpmovb2m %xmm2, %k1
184; SKX-NEXT:    vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
185; SKX-NEXT:    vmovaps %zmm1, %zmm0
186; SKX-NEXT:    retq
187  %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
188  %x = fmul <16 x float> %a1, %a0
189  %y = fadd <16 x float> %x, %a2
190  %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
191  ret <16 x float> %res
192}
193
194