• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=SKX
4
5define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
6; ALL-LABEL: test_x86_fmadd_ps_z:
7; ALL:       ## BB#0:
8; ALL-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm0
9; ALL-NEXT:    retq
10  %x = fmul <16 x float> %a0, %a1
11  %res = fadd <16 x float> %x, %a2
12  ret <16 x float> %res
13}
14
15define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
16; ALL-LABEL: test_x86_fmsub_ps_z:
17; ALL:       ## BB#0:
18; ALL-NEXT:    vfmsub213ps %zmm2, %zmm1, %zmm0
19; ALL-NEXT:    retq
20  %x = fmul <16 x float> %a0, %a1
21  %res = fsub <16 x float> %x, %a2
22  ret <16 x float> %res
23}
24
25define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
26; ALL-LABEL: test_x86_fnmadd_ps_z:
27; ALL:       ## BB#0:
28; ALL-NEXT:    vfnmadd213ps %zmm2, %zmm1, %zmm0
29; ALL-NEXT:    retq
30  %x = fmul <16 x float> %a0, %a1
31  %res = fsub <16 x float> %a2, %x
32  ret <16 x float> %res
33}
34
35define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
36; ALL-LABEL: test_x86_fnmsub_ps_z:
37; ALL:       ## BB#0:
38; ALL-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0
39; ALL-NEXT:    retq
40  %x = fmul <16 x float> %a0, %a1
41  %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
42                          float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
43                          float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
44                          float -0.000000e+00>, %x
45  %res = fsub <16 x float> %y, %a2
46  ret <16 x float> %res
47}
48
49define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
50; ALL-LABEL: test_x86_fmadd_pd_z:
51; ALL:       ## BB#0:
52; ALL-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0
53; ALL-NEXT:    retq
54  %x = fmul <8 x double> %a0, %a1
55  %res = fadd <8 x double> %x, %a2
56  ret <8 x double> %res
57}
58
59define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
60; ALL-LABEL: test_x86_fmsub_pd_z:
61; ALL:       ## BB#0:
62; ALL-NEXT:    vfmsub213pd %zmm2, %zmm1, %zmm0
63; ALL-NEXT:    retq
64  %x = fmul <8 x double> %a0, %a1
65  %res = fsub <8 x double> %x, %a2
66  ret <8 x double> %res
67}
68
69define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
70; ALL-LABEL: test_x86_fmsub_213:
71; ALL:       ## BB#0:
72; ALL-NEXT:    vfmsub213sd %xmm2, %xmm0, %xmm1
73; ALL-NEXT:    vmovaps %zmm1, %zmm0
74; ALL-NEXT:    retq
75  %x = fmul double %a0, %a1
76  %res = fsub double %x, %a2
77  ret double %res
78}
79
80define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
81; ALL-LABEL: test_x86_fmsub_213_m:
82; ALL:       ## BB#0:
83; ALL-NEXT:    vfmsub213sd (%rdi), %xmm0, %xmm1
84; ALL-NEXT:    vmovaps %zmm1, %zmm0
85; ALL-NEXT:    retq
86  %a2 = load double , double *%a2_ptr
87  %x = fmul double %a0, %a1
88  %res = fsub double %x, %a2
89  ret double %res
90}
91
92define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) {
93; ALL-LABEL: test_x86_fmsub_231_m:
94; ALL:       ## BB#0:
95; ALL-NEXT:    vfmsub231sd (%rdi), %xmm0, %xmm1
96; ALL-NEXT:    vmovaps %zmm1, %zmm0
97; ALL-NEXT:    retq
98  %a2 = load double , double *%a2_ptr
99  %x = fmul double %a0, %a2
100  %res = fsub double %x, %a1
101  ret double %res
102}
103
104define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
105; ALL-LABEL: test231_br:
106; ALL:       ## BB#0:
107; ALL-NEXT:    vfmadd231ps {{.*}}(%rip){1to16}, %zmm0, %zmm1
108; ALL-NEXT:    vmovaps %zmm1, %zmm0
109; ALL-NEXT:    retq
110  %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
111  %b2 = fadd <16 x float> %b1, %a2
112  ret <16 x float> %b2
113}
114
115define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
116; ALL-LABEL: test213_br:
117; ALL:       ## BB#0:
118; ALL-NEXT:    vfmadd213ps {{.*}}(%rip){1to16}, %zmm1, %zmm0
119; ALL-NEXT:    retq
120  %b1 = fmul <16 x float> %a1, %a2
121  %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
122  ret <16 x float> %b2
123}
124
125;mask (a*c+b , a)
126define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
127; KNL-LABEL: test_x86_fmadd132_ps:
128; KNL:       ## BB#0:
129; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
130; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
131; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1
132; KNL-NEXT:    vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
133; KNL-NEXT:    retq
134;
135; SKX-LABEL: test_x86_fmadd132_ps:
136; SKX:       ## BB#0:
137; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2
138; SKX-NEXT:    vpmovb2m %xmm2, %k1
139; SKX-NEXT:    vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
140; SKX-NEXT:    retq
141  %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
142  %x = fmul <16 x float> %a0, %a2
143  %y = fadd <16 x float> %x, %a1
144  %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
145  ret <16 x float> %res
146}
147
148;mask (a*c+b , b)
149define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
150; KNL-LABEL: test_x86_fmadd231_ps:
151; KNL:       ## BB#0:
152; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
153; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
154; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1
155; KNL-NEXT:    vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
156; KNL-NEXT:    vmovaps %zmm1, %zmm0
157; KNL-NEXT:    retq
158;
159; SKX-LABEL: test_x86_fmadd231_ps:
160; SKX:       ## BB#0:
161; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2
162; SKX-NEXT:    vpmovb2m %xmm2, %k1
163; SKX-NEXT:    vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
164; SKX-NEXT:    vmovaps %zmm1, %zmm0
165; SKX-NEXT:    retq
166  %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
167  %x = fmul <16 x float> %a0, %a2
168  %y = fadd <16 x float> %x, %a1
169  %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
170  ret <16 x float> %res
171}
172
173;mask (b*a+c , b)
174define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
175; KNL-LABEL: test_x86_fmadd213_ps:
176; KNL:       ## BB#0:
177; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
178; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
179; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1
180; KNL-NEXT:    vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
181; KNL-NEXT:    vmovaps %zmm1, %zmm0
182; KNL-NEXT:    retq
183;
184; SKX-LABEL: test_x86_fmadd213_ps:
185; SKX:       ## BB#0:
186; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2
187; SKX-NEXT:    vpmovb2m %xmm2, %k1
188; SKX-NEXT:    vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
189; SKX-NEXT:    vmovaps %zmm1, %zmm0
190; SKX-NEXT:    retq
191  %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
192  %x = fmul <16 x float> %a1, %a0
193  %y = fadd <16 x float> %x, %a2
194  %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
195  ret <16 x float> %res
196}
197
198