• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512dq  | FileCheck %s  --check-prefix=CHECK --check-prefix=SKX
3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
4
5; This test checks combinations of FNEG and FMA intrinsics on AVX-512 target
6; PR28892
7
8define <16 x float> @test1(<16 x float> %a, <16 x float> %b, <16 x float> %c)  {
9; CHECK-LABEL: test1:
10; CHECK:       # %bb.0: # %entry
11; CHECK-NEXT:    vfmsub213ps %zmm2, %zmm1, %zmm0
12; CHECK-NEXT:    retq
13entry:
14  %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
15  %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 4) #2
16  ret <16 x float> %0
17}
18
19declare <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i32)
20declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
21declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
22
23
24define <16 x float> @test2(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
25; CHECK-LABEL: test2:
26; CHECK:       # %bb.0: # %entry
27; CHECK-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0
28; CHECK-NEXT:    retq
29entry:
30  %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i32 4) #2
31  %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
32  ret <16 x float> %sub.i
33}
34
35define <16 x float> @test3(<16 x float> %a, <16 x float> %b, <16 x float> %c)  {
36; CHECK-LABEL: test3:
37; CHECK:       # %bb.0: # %entry
38; CHECK-NEXT:    vfmsub213ps %zmm2, %zmm1, %zmm0
39; CHECK-NEXT:    retq
40entry:
41  %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2
42  %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
43  ret <16 x float> %sub.i
44}
45
46define <16 x float> @test4(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
47; CHECK-LABEL: test4:
48; CHECK:       # %bb.0: # %entry
49; CHECK-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm0
50; CHECK-NEXT:    retq
51entry:
52  %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2
53  %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
54  ret <16 x float> %sub.i
55}
56
57define <16 x float> @test5(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
58; CHECK-LABEL: test5:
59; CHECK:       # %bb.0: # %entry
60; CHECK-NEXT:    vfmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0
61; CHECK-NEXT:    retq
62entry:
63  %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
64  %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 2) #2
65  ret <16 x float> %0
66}
67
68define <16 x float> @test6(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
69; CHECK-LABEL: test6:
70; CHECK:       # %bb.0: # %entry
71; CHECK-NEXT:    vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0
72; CHECK-NEXT:    retq
73entry:
74  %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 2) #2
75  %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
76  ret <16 x float> %sub.i
77}
78
79
80define <8 x float> @test7(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
81; CHECK-LABEL: test7:
82; CHECK:       # %bb.0: # %entry
83; CHECK-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
84; CHECK-NEXT:    retq
85entry:
86  %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2
87  %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
88  ret <8 x float> %sub.i
89}
90
91define <8 x float> @test8(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
92; CHECK-LABEL: test8:
93; CHECK:       # %bb.0: # %entry
94; CHECK-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
95; CHECK-NEXT:    retq
96entry:
97  %sub.c = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
98  %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %sub.c) #2
99  ret <8 x float> %0
100}
101
102declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
103
104
105define <8 x double> @test9(<8 x double> %a, <8 x double> %b, <8 x double> %c) {
106; CHECK-LABEL: test9:
107; CHECK:       # %bb.0: # %entry
108; CHECK-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm0
109; CHECK-NEXT:    retq
110entry:
111  %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32 4) #2
112  %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %0
113  ret <8 x double> %sub.i
114}
115
116declare <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32)
117
118define <2 x double> @test10(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
119; CHECK-LABEL: test10:
120; CHECK:       # %bb.0: # %entry
121; CHECK-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0
122; CHECK-NEXT:    vxorpd {{.*}}(%rip), %xmm0, %xmm0
123; CHECK-NEXT:    retq
124entry:
125  %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 -1, i32 4) #2
126  %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %0
127  ret <2 x double> %sub.i
128}
129
130declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8, i32)
131
132define <4 x float> @test11(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
133; SKX-LABEL: test11:
134; SKX:       # %bb.0: # %entry
135; SKX-NEXT:    vxorps {{.*}}(%rip){1to4}, %xmm2, %xmm3
136; SKX-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
137; SKX-NEXT:    kmovd %edi, %k1
138; SKX-NEXT:    vmovss %xmm0, %xmm3, %xmm3 {%k1}
139; SKX-NEXT:    vmovaps %xmm3, %xmm0
140; SKX-NEXT:    retq
141;
142; KNL-LABEL: test11:
143; KNL:       # %bb.0: # %entry
144; KNL-NEXT:    vbroadcastss {{.*#+}} xmm3 = [-0,-0,-0,-0]
145; KNL-NEXT:    vxorps %xmm3, %xmm2, %xmm3
146; KNL-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
147; KNL-NEXT:    kmovw %edi, %k1
148; KNL-NEXT:    vmovss %xmm0, %xmm3, %xmm3 {%k1}
149; KNL-NEXT:    vmovaps %xmm3, %xmm0
150; KNL-NEXT:    retq
151entry:
152  %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
153  %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i, i8 %mask, i32 4) #10
154  ret <4 x float> %0
155}
156
157declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
158
159define <4 x float> @test11b(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
160; SKX-LABEL: test11b:
161; SKX:       # %bb.0: # %entry
162; SKX-NEXT:    kmovd %edi, %k1
163; SKX-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1}
164; SKX-NEXT:    retq
165;
166; KNL-LABEL: test11b:
167; KNL:       # %bb.0: # %entry
168; KNL-NEXT:    kmovw %edi, %k1
169; KNL-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1}
170; KNL-NEXT:    retq
171entry:
172  %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
173  %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i, i8 %mask, i32 4) #10
174  ret <4 x float> %0
175}
176
177declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
178
179define <8 x double> @test12(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
180; SKX-LABEL: test12:
181; SKX:       # %bb.0: # %entry
182; SKX-NEXT:    kmovd %edi, %k1
183; SKX-NEXT:    vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1}
184; SKX-NEXT:    vxorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
185; SKX-NEXT:    retq
186;
187; KNL-LABEL: test12:
188; KNL:       # %bb.0: # %entry
189; KNL-NEXT:    kmovw %edi, %k1
190; KNL-NEXT:    vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1}
191; KNL-NEXT:    vpxorq {{.*}}(%rip){1to8}, %zmm0, %zmm0
192; KNL-NEXT:    retq
193entry:
194  %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32 4) #2
195  %bc = bitcast i8 %mask to <8 x i1>
196  %sel = select <8 x i1> %bc, <8 x double> %0, <8 x double> %a
197  %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %sel
198  ret <8 x double> %sub.i
199}
200
201define <2 x double> @test13(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
202; SKX-LABEL: test13:
203; SKX:       # %bb.0: # %entry
204; SKX-NEXT:    vxorpd {{.*}}(%rip), %xmm0, %xmm3
205; SKX-NEXT:    vfnmadd213sd {{.*#+}} xmm1 = -(xmm0 * xmm1) + xmm2
206; SKX-NEXT:    kmovd %edi, %k1
207; SKX-NEXT:    vmovsd %xmm1, %xmm3, %xmm3 {%k1}
208; SKX-NEXT:    vmovapd %xmm3, %xmm0
209; SKX-NEXT:    retq
210;
211; KNL-LABEL: test13:
212; KNL:       # %bb.0: # %entry
213; KNL-NEXT:    vxorpd {{.*}}(%rip), %xmm0, %xmm3
214; KNL-NEXT:    vfnmadd213sd {{.*#+}} xmm1 = -(xmm0 * xmm1) + xmm2
215; KNL-NEXT:    kmovw %edi, %k1
216; KNL-NEXT:    vmovsd %xmm1, %xmm3, %xmm3 {%k1}
217; KNL-NEXT:    vmovapd %xmm3, %xmm0
218; KNL-NEXT:    retq
219
220entry:
221  %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
222  %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4)
223  ret <2 x double> %0
224}
225
226define <16 x float> @test14(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
227; SKX-LABEL: test14:
228; SKX:       # %bb.0: # %entry
229; SKX-NEXT:    kmovd %edi, %k1
230; SKX-NEXT:    vfnmsub132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1}
231; SKX-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
232; SKX-NEXT:    retq
233;
234; KNL-LABEL: test14:
235; KNL:       # %bb.0: # %entry
236; KNL-NEXT:    kmovw %edi, %k1
237; KNL-NEXT:    vfnmsub132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1}
238; KNL-NEXT:    vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
239; KNL-NEXT:    retq
240entry:
241  %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 2) #2
242  %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
243  ret <16 x float> %sub.i
244}
245
246define <16 x float> @test15(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask)  {
247; SKX-LABEL: test15:
248; SKX:       # %bb.0: # %entry
249; SKX-NEXT:    kmovd %edi, %k1
250; SKX-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm3
251; SKX-NEXT:    vfnmadd213ps {ru-sae}, %zmm2, %zmm0, %zmm1
252; SKX-NEXT:    vmovaps %zmm1, %zmm3 {%k1}
253; SKX-NEXT:    vfnmadd132ps {rd-sae}, %zmm0, %zmm2, %zmm3 {%k1}
254; SKX-NEXT:    vmovaps %zmm3, %zmm0
255; SKX-NEXT:    retq
256;
257; KNL-LABEL: test15:
258; KNL:       # %bb.0: # %entry
259; KNL-NEXT:    kmovw %edi, %k1
260; KNL-NEXT:    vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm3
261; KNL-NEXT:    vfnmadd213ps {ru-sae}, %zmm2, %zmm0, %zmm1
262; KNL-NEXT:    vmovaps %zmm1, %zmm3 {%k1}
263; KNL-NEXT:    vfnmadd132ps {rd-sae}, %zmm0, %zmm2, %zmm3 {%k1}
264; KNL-NEXT:    vmovaps %zmm3, %zmm0
265; KNL-NEXT:    retq
266entry:
267  %bc = bitcast i16 %mask to <16 x i1>
268  %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
269  %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub.i, <16 x float> %b, <16 x float> %c, i32 2)
270  %sel = select <16 x i1> %bc, <16 x float> %0, <16 x float> %sub.i
271  %1 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sel, <16 x float> %sub.i, <16 x float> %c, i32 1)
272  %sel2 = select <16 x i1> %bc, <16 x float> %1, <16 x float> %sel
273  ret <16 x float> %sel2
274}
275
276define <16 x float> @test16(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
277; SKX-LABEL: test16:
278; SKX:       # %bb.0:
279; SKX-NEXT:    kmovd %edi, %k1
280; SKX-NEXT:    vfmsubadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
281; SKX-NEXT:    retq
282;
283; KNL-LABEL: test16:
284; KNL:       # %bb.0:
285; KNL-NEXT:    kmovw %edi, %k1
286; KNL-NEXT:    vfmsubadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
287; KNL-NEXT:    retq
288  %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
289  %res = call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 1)
290  %bc = bitcast i16 %mask to <16 x i1>
291  %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a
292  ret <16 x float> %sel
293}
294declare <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i32)
295
296define <8 x double> @test17(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
297; SKX-LABEL: test17:
298; SKX:       # %bb.0:
299; SKX-NEXT:    kmovd %edi, %k1
300; SKX-NEXT:    vfmsubadd132pd %zmm1, %zmm2, %zmm0 {%k1}
301; SKX-NEXT:    retq
302;
303; KNL-LABEL: test17:
304; KNL:       # %bb.0:
305; KNL-NEXT:    kmovw %edi, %k1
306; KNL-NEXT:    vfmsubadd132pd %zmm1, %zmm2, %zmm0 {%k1}
307; KNL-NEXT:    retq
308  %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c
309  %res = call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %sub.i, i32 4)
310  %bc = bitcast i8 %mask to <8 x i1>
311  %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a
312  ret <8 x double> %sel
313}
314declare <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i32)
315
316define <4 x float> @test18(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
317; SKX-LABEL: test18:
318; SKX:       # %bb.0: # %entry
319; SKX-NEXT:    kmovd %edi, %k1
320; SKX-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1}
321; SKX-NEXT:    retq
322;
323; KNL-LABEL: test18:
324; KNL:       # %bb.0: # %entry
325; KNL-NEXT:    kmovw %edi, %k1
326; KNL-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1}
327; KNL-NEXT:    retq
328entry:
329  %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
330  %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 4) #10
331  ret <4 x float> %0
332}
333
334define <4 x float> @test19(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
335; SKX-LABEL: test19:
336; SKX:       # %bb.0: # %entry
337; SKX-NEXT:    kmovd %edi, %k1
338; SKX-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1}
339; SKX-NEXT:    retq
340;
341; KNL-LABEL: test19:
342; KNL:       # %bb.0: # %entry
343; KNL-NEXT:    kmovw %edi, %k1
344; KNL-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1}
345; KNL-NEXT:    retq
346entry:
347  %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
348  %sub.i.2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
349  %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %sub.i.2, i8 %mask, i32 4) #10
350  ret <4 x float> %0
351}
352
353define <4 x float> @test20(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
354; SKX-LABEL: test20:
355; SKX:       # %bb.0: # %entry
356; SKX-NEXT:    kmovd %edi, %k1
357; SKX-NEXT:    vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1}
358; SKX-NEXT:    vmovaps %xmm2, %xmm0
359; SKX-NEXT:    retq
360;
361; KNL-LABEL: test20:
362; KNL:       # %bb.0: # %entry
363; KNL-NEXT:    kmovw %edi, %k1
364; KNL-NEXT:    vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1}
365; KNL-NEXT:    vmovaps %xmm2, %xmm0
366; KNL-NEXT:    retq
367entry:
368  %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
369  %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 4) #10
370  ret <4 x float> %0
371}
372
373define <4 x float> @test21(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
374; SKX-LABEL: test21:
375; SKX:       # %bb.0: # %entry
376; SKX-NEXT:    kmovd %edi, %k1
377; SKX-NEXT:    vfnmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
378; SKX-NEXT:    retq
379;
380; KNL-LABEL: test21:
381; KNL:       # %bb.0: # %entry
382; KNL-NEXT:    kmovw %edi, %k1
383; KNL-NEXT:    vfnmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
384; KNL-NEXT:    retq
385entry:
386  %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
387  %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 8) #10
388  ret <4 x float> %0
389}
390
391define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
392; SKX-LABEL: test22:
393; SKX:       # %bb.0: # %entry
394; SKX-NEXT:    kmovd %edi, %k1
395; SKX-NEXT:    vfnmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
396; SKX-NEXT:    retq
397;
398; KNL-LABEL: test22:
399; KNL:       # %bb.0: # %entry
400; KNL-NEXT:    kmovw %edi, %k1
401; KNL-NEXT:    vfnmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
402; KNL-NEXT:    retq
403entry:
404  %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
405  %sub.i.2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
406  %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %sub.i.2, i8 %mask, i32 8) #10
407  ret <4 x float> %0
408}
409
410define <4 x float> @test23(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
411; SKX-LABEL: test23:
412; SKX:       # %bb.0: # %entry
413; SKX-NEXT:    kmovd %edi, %k1
414; SKX-NEXT:    vfnmadd231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
415; SKX-NEXT:    vmovaps %xmm2, %xmm0
416; SKX-NEXT:    retq
417;
418; KNL-LABEL: test23:
419; KNL:       # %bb.0: # %entry
420; KNL-NEXT:    kmovw %edi, %k1
421; KNL-NEXT:    vfnmadd231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
422; KNL-NEXT:    vmovaps %xmm2, %xmm0
423; KNL-NEXT:    retq
424entry:
425  %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
426  %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 8) #10
427  ret <4 x float> %0
428}
429
430define <4 x float> @test24(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
431; SKX-LABEL: test24:
432; SKX:       # %bb.0: # %entry
433; SKX-NEXT:    kmovd %edi, %k1
434; SKX-NEXT:    vfmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
435; SKX-NEXT:    retq
436;
437; KNL-LABEL: test24:
438; KNL:       # %bb.0: # %entry
439; KNL-NEXT:    kmovw %edi, %k1
440; KNL-NEXT:    vfmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
441; KNL-NEXT:    retq
442entry:
443  %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
444  %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i, i8 %mask, i32 8) #10
445  ret <4 x float> %0
446}
447
448define <16 x float> @test25(<16 x float> %a, <16 x float> %b, <16 x float> %c)  {
449; CHECK-LABEL: test25:
450; CHECK:       # %bb.0: # %entry
451; CHECK-NEXT:    vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
452; CHECK-NEXT:    retq
453entry:
454  %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
455  %sub.i.2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
456  %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %sub.i, <16 x float> %sub.i.2, i32 8) #2
457  ret <16 x float> %0
458}
459